def generate_advanced_dataset():
   np.random.seed(42)
   start_date = datetime(2022, 1, 1)
   dates = (start_date + timedelta(days=x) for x in vary(730))
   classes = ('Electronics', 'Clothes', 'House & Backyard', 'Sports activities', 'Books')
   merchandise = {
       'Electronics': ('Laptop computer', 'Smartphone', 'Headphones', 'Pill', 'Smartwatch'),
       'Clothes': ('T-Shirt', 'Denims', 'Costume', 'Jacket', 'Sneakers'),
       'House & Backyard': ('Furnishings', 'Lamp', 'Rug', 'Plant', 'Cookware'),
       'Sports activities': ('Yoga Mat', 'Dumbbell', 'Working Footwear', 'Bicycle', 'Tennis Racket'),
       'Books': ('Fiction', 'Non-Fiction', 'Biography', 'Science', 'Historical past')
   }
   n_transactions = 5000
   knowledge = ()
   for _ in vary(n_transactions):
       date = np.random.alternative(dates)
       class = np.random.alternative(classes)
       product = np.random.alternative(merchandise(class))
       base_prices = {
           'Electronics': (200, 1500),
           'Clothes': (20, 150),
           'House & Backyard': (30, 500),
           'Sports activities': (25, 300),
           'Books': (10, 50)
       }
       value = np.random.uniform(*base_prices(class))
       amount = np.random.alternative((1, 1, 1, 2, 2, 3), p=(0.5, 0.2, 0.15, 0.1, 0.03, 0.02))
       customer_segment = np.random.alternative(('Premium', 'Commonplace', 'Funds'), p=(0.2, 0.5, 0.3))
       age_group = np.random.alternative(('18-25', '26-35', '36-45', '46-55', '56+'))
       area = np.random.alternative(('North', 'South', 'East', 'West', 'Central'))
       month = date.month
       seasonal_factor = 1.0
       if month in (11, 12):
           seasonal_factor = 1.5
       elif month in (6, 7):
           seasonal_factor = 1.2
       income = value * amount * seasonal_factor
       low cost = np.random.alternative((0, 5, 10, 15, 20, 25), p=(0.4, 0.2, 0.15, 0.15, 0.07, 0.03))
       marketing_channel = np.random.alternative(('Natural', 'Social Media', 'Electronic mail', 'Paid Adverts'))
       base_satisfaction = 4.0
       if customer_segment == 'Premium':
           base_satisfaction += 0.5
       if low cost > 15:
           base_satisfaction += 0.3
       satisfaction = np.clip(base_satisfaction + np.random.regular(0, 0.5), 1, 5)
       knowledge.append({
           'Date': date, 'Class': class, 'Product': product, 'Worth': spherical(value, 2),
           'Amount': amount, 'Income': spherical(income, 2), 'Customer_Segment': customer_segment,
           'Age_Group': age_group, 'Area': area, 'Discount_%': low cost,
           'Marketing_Channel': marketing_channel, 'Customer_Satisfaction': spherical(satisfaction, 2),
           'Month': date.strftime('%B'), '12 months': date.12 months, 'Quarter': f'Q{(date.month-1)//3 + 1}'
       })
   df = pd.DataFrame(knowledge)
   df('Profit_Margin') = spherical(df('Income') * (1 - df('Discount_%')/100) * 0.3, 2)
   df('Days_Since_Start') = (df('Date') - df('Date').min()).dt.days
   return df

Von admin

Schreibe einen Kommentar

Deine E-Mail-Adresse wird nicht veröffentlicht. Erforderliche Felder sind mit * markiert