ShopTRAINING/server/generate_pharmacy_data.py

161 lines
6.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import numpy as np
import datetime
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import os
def generate_pharmacy_data(save_path=None):
"""
生成药店销售模拟数据并保存为Excel文件
参数:
save_path: 保存文件的路径如果为None则保存在当前目录
返回:
DataFrame: 生成的销售数据
"""
# 设置随机种子,确保结果可重现
np.random.seed(42)
# 生成日期范围2年的数据
start_date = datetime.datetime(2022, 1, 1)
end_date = datetime.datetime(2023, 12, 31)
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
# 定义药品信息
products = [
{'id': 'P001', 'name': '感冒灵颗粒', 'base_price': 35.5, 'base_sales': 15,
'seasonality': 'winter', 'category': 'OTC'},
{'id': 'P002', 'name': '布洛芬片', 'base_price': 25.8, 'base_sales': 20,
'seasonality': 'all', 'category': 'OTC'},
{'id': 'P003', 'name': '阿莫西林胶囊', 'base_price': 32.6, 'base_sales': 18,
'seasonality': 'all', 'category': 'RX'},
{'id': 'P004', 'name': '板蓝根冲剂', 'base_price': 28.9, 'base_sales': 12,
'seasonality': 'winter', 'category': 'OTC'},
{'id': 'P005', 'name': '维生素C片', 'base_price': 45.0, 'base_sales': 25,
'seasonality': 'all', 'category': 'OTC'}
]
# 定义中国主要节假日(简化版)
holidays = [
# 2022年节假日
'2022-01-01', '2022-01-31', '2022-02-01', '2022-02-02', '2022-02-03', '2022-02-04', '2022-02-05', '2022-02-06',
'2022-04-03', '2022-04-04', '2022-04-05', '2022-04-30', '2022-05-01', '2022-05-02', '2022-05-03', '2022-05-04',
'2022-06-03', '2022-06-04', '2022-06-05', '2022-09-10', '2022-09-11', '2022-09-12', '2022-10-01', '2022-10-02',
'2022-10-03', '2022-10-04', '2022-10-05', '2022-10-06', '2022-10-07',
# 2023年节假日
'2023-01-01', '2023-01-21', '2023-01-22', '2023-01-23', '2023-01-24', '2023-01-25', '2023-01-26', '2023-01-27',
'2023-04-05', '2023-04-29', '2023-04-30', '2023-05-01', '2023-05-02', '2023-05-03', '2023-06-22', '2023-06-23',
'2023-06-24', '2023-09-29', '2023-09-30', '2023-10-01', '2023-10-02', '2023-10-03', '2023-10-04', '2023-10-05',
'2023-10-06'
]
holidays = pd.to_datetime(holidays)
# 生成每个产品每天的销售数据
data = []
for date in date_range:
# 基础时间特征
weekday = date.weekday() # 0-60是周一
month = date.month # 1-12
is_holiday = 1 if date in holidays else 0
is_weekend = 1 if weekday >= 5 else 0
# 模拟温度(季节性)- 简单正弦波加噪声
day_of_year = date.dayofyear
base_temp = 20 - 15 * np.cos(2 * np.pi * day_of_year / 365) # 范围约5-35度
temperature = base_temp + np.random.normal(0, 2) # 添加随机波动
# 每三个月一次促销活动
is_promotion_period = 1 if date.day <= 7 and date.month % 3 == 0 else 0
for product in products:
# 基础销量
base_sales = product['base_sales']
# 添加星期几的影响(周末销量更高)
weekday_effect = 1.2 if is_weekend else 1.0
# 添加季节性影响
seasonality_effect = 1.0
if product['seasonality'] == 'winter' and (month in [12, 1, 2]):
seasonality_effect = 1.8 # 冬季感冒药销量提升
elif product['seasonality'] == 'summer' and (month in [6, 7, 8]):
seasonality_effect = 1.5 # 夏季药品销量提升
# 添加促销影响
promotion_effect = 1.4 if is_promotion_period else 1.0
# 添加节假日影响
holiday_effect = 1.3 if is_holiday else 1.0
# 添加温度影响(某些药品与温度相关)
temp_effect = 1.0
if product['seasonality'] == 'winter':
# 温度越低,感冒药销量越高
temp_effect = 1.0 + max(0, (15 - temperature) / 20)
# 计算当天的价格(促销期间有折扣)
price = product['base_price'] * (0.85 if is_promotion_period else 1.0)
# 计算最终销量并添加随机波动
sales = base_sales * weekday_effect * seasonality_effect * promotion_effect * holiday_effect * temp_effect
sales = int(np.round(sales * np.random.normal(1, 0.15))) # 添加随机波动
sales = max(0, sales) # 确保销量不为负
# 添加到数据列表
data.append({
'date': date,
'product_id': product['id'],
'product_name': product['name'],
'category': product['category'],
'sales': sales,
'price': price,
'weekday': weekday,
'month': month,
'is_holiday': is_holiday,
'is_weekend': is_weekend,
'is_promotion': is_promotion_period,
'temperature': temperature
})
# 创建DataFrame
df = pd.DataFrame(data)
# 确定保存路径
output_path = 'pharmacy_sales.xlsx' if save_path is None else save_path
# 保存为Excel文件
df.to_excel(output_path, index=False)
# 创建每个产品的销售趋势图
plt.figure(figsize=(15, 10))
for product in products:
product_data = df[df['product_id'] == product['id']]
plt.plot(product_data['date'], product_data['sales'], label=product['name'])
plt.title('药店产品销售趋势')
plt.xlabel('日期')
plt.ylabel('销售数量')
plt.legend()
plt.grid(True)
# 保存图表
image_path = os.path.join(os.path.dirname(output_path), 'sales_trends.png')
plt.savefig(image_path)
plt.close()
print(f"已生成{len(df)}条模拟药店销售数据,并保存至{output_path}")
print("数据概览:")
print(df.head())
print("\n每个产品的平均日销量:")
print(df.groupby('product_name')['sales'].mean())
return df
# 如果直接运行此文件,则生成数据
if __name__ == "__main__":
generate_pharmacy_data()