ShopTRAINING/server/tools/0.生成随机测试数据.py

138 lines
5.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import numpy as np
import datetime
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
# 设置随机种子,确保结果可重现
np.random.seed(42)
# 生成日期范围2年的数据
start_date = datetime.datetime(2022, 1, 1)
end_date = datetime.datetime(2023, 12, 31)
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
# 定义药品信息
products = [
{'id': 'P001', 'name': '感冒灵颗粒', 'base_price': 35.5, 'base_sales': 15,
'seasonality': 'winter', 'category': 'OTC'},
{'id': 'P002', 'name': '布洛芬片', 'base_price': 25.8, 'base_sales': 20,
'seasonality': 'all', 'category': 'OTC'},
{'id': 'P003', 'name': '阿莫西林胶囊', 'base_price': 32.6, 'base_sales': 18,
'seasonality': 'all', 'category': 'RX'},
{'id': 'P004', 'name': '板蓝根冲剂', 'base_price': 28.9, 'base_sales': 12,
'seasonality': 'winter', 'category': 'OTC'},
{'id': 'P005', 'name': '维生素C片', 'base_price': 45.0, 'base_sales': 25,
'seasonality': 'all', 'category': 'OTC'}
]
# 定义中国主要节假日(简化版)
holidays = [
# 2022年节假日
'2022-01-01', '2022-01-31', '2022-02-01', '2022-02-02', '2022-02-03', '2022-02-04', '2022-02-05', '2022-02-06',
'2022-04-03', '2022-04-04', '2022-04-05', '2022-04-30', '2022-05-01', '2022-05-02', '2022-05-03', '2022-05-04',
'2022-06-03', '2022-06-04', '2022-06-05', '2022-09-10', '2022-09-11', '2022-09-12', '2022-10-01', '2022-10-02',
'2022-10-03', '2022-10-04', '2022-10-05', '2022-10-06', '2022-10-07',
# 2023年节假日
'2023-01-01', '2023-01-21', '2023-01-22', '2023-01-23', '2023-01-24', '2023-01-25', '2023-01-26', '2023-01-27',
'2023-04-05', '2023-04-29', '2023-04-30', '2023-05-01', '2023-05-02', '2023-05-03', '2023-06-22', '2023-06-23',
'2023-06-24', '2023-09-29', '2023-09-30', '2023-10-01', '2023-10-02', '2023-10-03', '2023-10-04', '2023-10-05',
'2023-10-06'
]
holidays = pd.to_datetime(holidays)
# 生成每个产品每天的销售数据
data = []
for date in date_range:
# 基础时间特征
weekday = date.weekday() # 0-60是周一
month = date.month # 1-12
is_holiday = 1 if date in holidays else 0
is_weekend = 1 if weekday >= 5 else 0
# 模拟温度(季节性)- 简单正弦波加噪声
day_of_year = date.dayofyear
base_temp = 20 - 15 * np.cos(2 * np.pi * day_of_year / 365) # 范围约5-35度
temperature = base_temp + np.random.normal(0, 2) # 添加随机波动
# 每三个月一次促销活动
is_promotion_period = 1 if date.day <= 7 and date.month % 3 == 0 else 0
for product in products:
# 基础销量
base_sales = product['base_sales']
# 添加星期几的影响(周末销量更高)
weekday_effect = 1.2 if is_weekend else 1.0
# 添加季节性影响
seasonality_effect = 1.0
if product['seasonality'] == 'winter' and (month in [12, 1, 2]):
seasonality_effect = 1.8 # 冬季感冒药销量提升
elif product['seasonality'] == 'summer' and (month in [6, 7, 8]):
seasonality_effect = 1.5 # 夏季药品销量提升
# 添加促销影响
promotion_effect = 1.4 if is_promotion_period else 1.0
# 添加节假日影响
holiday_effect = 1.3 if is_holiday else 1.0
# 添加温度影响(某些药品与温度相关)
temp_effect = 1.0
if product['seasonality'] == 'winter':
# 温度越低,感冒药销量越高
temp_effect = 1.0 + max(0, (15 - temperature) / 20)
# 计算当天的价格(促销期间有折扣)
price = product['base_price'] * (0.85 if is_promotion_period else 1.0)
# 计算最终销量并添加随机波动
sales = base_sales * weekday_effect * seasonality_effect * promotion_effect * holiday_effect * temp_effect
sales = int(np.round(sales * np.random.normal(1, 0.15))) # 添加随机波动
sales = max(0, sales) # 确保销量不为负
# 添加到数据列表
data.append({
'date': date,
'product_id': product['id'],
'product_name': product['name'],
'category': product['category'],
'sales': sales,
'price': price,
'weekday': weekday,
'month': month,
'is_holiday': is_holiday,
'is_weekend': is_weekend,
'is_promotion': is_promotion_period,
'temperature': temperature
})
# 创建DataFrame
df = pd.DataFrame(data)
# 保存为Excel文件
df.to_excel('pharmacy_sales.xlsx', index=False)
# 创建每个产品的销售趋势图
plt.figure(figsize=(15, 10))
for product in products:
product_data = df[df['product_id'] == product['id']]
plt.plot(product_data['date'], product_data['sales'], label=product['name'])
plt.title('药店产品销售趋势')
plt.xlabel('日期')
plt.ylabel('销售数量')
plt.legend()
plt.grid(True)
plt.savefig('sales_trends.png')
plt.close()
print(f"已生成{len(df)}条模拟药店销售数据并保存至pharmacy_sales.xlsx")
print("数据概览:")
print(df.head())
print("\n每个产品的平均日销量:")
print(df.groupby('product_name')['sales'].mean())