161 lines
6.3 KiB
Python
161 lines
6.3 KiB
Python
import pandas as pd
|
||
import numpy as np
|
||
import datetime
|
||
from sklearn.preprocessing import MinMaxScaler
|
||
import matplotlib.pyplot as plt
|
||
import os
|
||
|
||
def generate_pharmacy_data(save_path=None):
|
||
"""
|
||
生成药店销售模拟数据并保存为Excel文件
|
||
|
||
参数:
|
||
save_path: 保存文件的路径,如果为None则保存在当前目录
|
||
|
||
返回:
|
||
DataFrame: 生成的销售数据
|
||
"""
|
||
# 设置随机种子,确保结果可重现
|
||
np.random.seed(42)
|
||
|
||
# 生成日期范围(2年的数据)
|
||
start_date = datetime.datetime(2022, 1, 1)
|
||
end_date = datetime.datetime(2023, 12, 31)
|
||
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
|
||
|
||
# 定义药品信息
|
||
products = [
|
||
{'id': 'P001', 'name': '感冒灵颗粒', 'base_price': 35.5, 'base_sales': 15,
|
||
'seasonality': 'winter', 'category': 'OTC'},
|
||
{'id': 'P002', 'name': '布洛芬片', 'base_price': 25.8, 'base_sales': 20,
|
||
'seasonality': 'all', 'category': 'OTC'},
|
||
{'id': 'P003', 'name': '阿莫西林胶囊', 'base_price': 32.6, 'base_sales': 18,
|
||
'seasonality': 'all', 'category': 'RX'},
|
||
{'id': 'P004', 'name': '板蓝根冲剂', 'base_price': 28.9, 'base_sales': 12,
|
||
'seasonality': 'winter', 'category': 'OTC'},
|
||
{'id': 'P005', 'name': '维生素C片', 'base_price': 45.0, 'base_sales': 25,
|
||
'seasonality': 'all', 'category': 'OTC'}
|
||
]
|
||
|
||
# 定义中国主要节假日(简化版)
|
||
holidays = [
|
||
# 2022年节假日
|
||
'2022-01-01', '2022-01-31', '2022-02-01', '2022-02-02', '2022-02-03', '2022-02-04', '2022-02-05', '2022-02-06',
|
||
'2022-04-03', '2022-04-04', '2022-04-05', '2022-04-30', '2022-05-01', '2022-05-02', '2022-05-03', '2022-05-04',
|
||
'2022-06-03', '2022-06-04', '2022-06-05', '2022-09-10', '2022-09-11', '2022-09-12', '2022-10-01', '2022-10-02',
|
||
'2022-10-03', '2022-10-04', '2022-10-05', '2022-10-06', '2022-10-07',
|
||
|
||
# 2023年节假日
|
||
'2023-01-01', '2023-01-21', '2023-01-22', '2023-01-23', '2023-01-24', '2023-01-25', '2023-01-26', '2023-01-27',
|
||
'2023-04-05', '2023-04-29', '2023-04-30', '2023-05-01', '2023-05-02', '2023-05-03', '2023-06-22', '2023-06-23',
|
||
'2023-06-24', '2023-09-29', '2023-09-30', '2023-10-01', '2023-10-02', '2023-10-03', '2023-10-04', '2023-10-05',
|
||
'2023-10-06'
|
||
]
|
||
|
||
holidays = pd.to_datetime(holidays)
|
||
|
||
# 生成每个产品每天的销售数据
|
||
data = []
|
||
|
||
for date in date_range:
|
||
# 基础时间特征
|
||
weekday = date.weekday() # 0-6,0是周一
|
||
month = date.month # 1-12
|
||
is_holiday = 1 if date in holidays else 0
|
||
is_weekend = 1 if weekday >= 5 else 0
|
||
|
||
# 模拟温度(季节性)- 简单正弦波加噪声
|
||
day_of_year = date.dayofyear
|
||
base_temp = 20 - 15 * np.cos(2 * np.pi * day_of_year / 365) # 范围约5-35度
|
||
temperature = base_temp + np.random.normal(0, 2) # 添加随机波动
|
||
|
||
# 每三个月一次促销活动
|
||
is_promotion_period = 1 if date.day <= 7 and date.month % 3 == 0 else 0
|
||
|
||
for product in products:
|
||
# 基础销量
|
||
base_sales = product['base_sales']
|
||
|
||
# 添加星期几的影响(周末销量更高)
|
||
weekday_effect = 1.2 if is_weekend else 1.0
|
||
|
||
# 添加季节性影响
|
||
seasonality_effect = 1.0
|
||
if product['seasonality'] == 'winter' and (month in [12, 1, 2]):
|
||
seasonality_effect = 1.8 # 冬季感冒药销量提升
|
||
elif product['seasonality'] == 'summer' and (month in [6, 7, 8]):
|
||
seasonality_effect = 1.5 # 夏季药品销量提升
|
||
|
||
# 添加促销影响
|
||
promotion_effect = 1.4 if is_promotion_period else 1.0
|
||
|
||
# 添加节假日影响
|
||
holiday_effect = 1.3 if is_holiday else 1.0
|
||
|
||
# 添加温度影响(某些药品与温度相关)
|
||
temp_effect = 1.0
|
||
if product['seasonality'] == 'winter':
|
||
# 温度越低,感冒药销量越高
|
||
temp_effect = 1.0 + max(0, (15 - temperature) / 20)
|
||
|
||
# 计算当天的价格(促销期间有折扣)
|
||
price = product['base_price'] * (0.85 if is_promotion_period else 1.0)
|
||
|
||
# 计算最终销量并添加随机波动
|
||
sales = base_sales * weekday_effect * seasonality_effect * promotion_effect * holiday_effect * temp_effect
|
||
sales = int(np.round(sales * np.random.normal(1, 0.15))) # 添加随机波动
|
||
sales = max(0, sales) # 确保销量不为负
|
||
|
||
# 添加到数据列表
|
||
data.append({
|
||
'date': date,
|
||
'product_id': product['id'],
|
||
'product_name': product['name'],
|
||
'category': product['category'],
|
||
'sales': sales,
|
||
'price': price,
|
||
'weekday': weekday,
|
||
'month': month,
|
||
'is_holiday': is_holiday,
|
||
'is_weekend': is_weekend,
|
||
'is_promotion': is_promotion_period,
|
||
'temperature': temperature
|
||
})
|
||
|
||
# 创建DataFrame
|
||
df = pd.DataFrame(data)
|
||
|
||
# 确定保存路径
|
||
output_path = 'pharmacy_sales.xlsx' if save_path is None else save_path
|
||
|
||
# 保存为Excel文件
|
||
df.to_excel(output_path, index=False)
|
||
|
||
# 创建每个产品的销售趋势图
|
||
plt.figure(figsize=(15, 10))
|
||
for product in products:
|
||
product_data = df[df['product_id'] == product['id']]
|
||
plt.plot(product_data['date'], product_data['sales'], label=product['name'])
|
||
|
||
plt.title('药店产品销售趋势')
|
||
plt.xlabel('日期')
|
||
plt.ylabel('销售数量')
|
||
plt.legend()
|
||
plt.grid(True)
|
||
|
||
# 保存图表
|
||
image_path = os.path.join(os.path.dirname(output_path), 'sales_trends.png')
|
||
plt.savefig(image_path)
|
||
plt.close()
|
||
|
||
print(f"已生成{len(df)}条模拟药店销售数据,并保存至{output_path}")
|
||
print("数据概览:")
|
||
print(df.head())
|
||
print("\n每个产品的平均日销量:")
|
||
print(df.groupby('product_name')['sales'].mean())
|
||
|
||
return df
|
||
|
||
# 如果直接运行此文件,则生成数据
|
||
if __name__ == "__main__":
|
||
generate_pharmacy_data() |