161 lines
6.3 KiB
Python
161 lines
6.3 KiB
Python
![]() |
import pandas as pd
|
|||
|
import numpy as np
|
|||
|
import datetime
|
|||
|
from sklearn.preprocessing import MinMaxScaler
|
|||
|
import matplotlib.pyplot as plt
|
|||
|
import os
|
|||
|
|
|||
|
def generate_pharmacy_data(save_path=None):
|
|||
|
"""
|
|||
|
生成药店销售模拟数据并保存为Excel文件
|
|||
|
|
|||
|
参数:
|
|||
|
save_path: 保存文件的路径,如果为None则保存在当前目录
|
|||
|
|
|||
|
返回:
|
|||
|
DataFrame: 生成的销售数据
|
|||
|
"""
|
|||
|
# 设置随机种子,确保结果可重现
|
|||
|
np.random.seed(42)
|
|||
|
|
|||
|
# 生成日期范围(2年的数据)
|
|||
|
start_date = datetime.datetime(2022, 1, 1)
|
|||
|
end_date = datetime.datetime(2023, 12, 31)
|
|||
|
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
|
|||
|
|
|||
|
# 定义药品信息
|
|||
|
products = [
|
|||
|
{'id': 'P001', 'name': '感冒灵颗粒', 'base_price': 35.5, 'base_sales': 15,
|
|||
|
'seasonality': 'winter', 'category': 'OTC'},
|
|||
|
{'id': 'P002', 'name': '布洛芬片', 'base_price': 25.8, 'base_sales': 20,
|
|||
|
'seasonality': 'all', 'category': 'OTC'},
|
|||
|
{'id': 'P003', 'name': '阿莫西林胶囊', 'base_price': 32.6, 'base_sales': 18,
|
|||
|
'seasonality': 'all', 'category': 'RX'},
|
|||
|
{'id': 'P004', 'name': '板蓝根冲剂', 'base_price': 28.9, 'base_sales': 12,
|
|||
|
'seasonality': 'winter', 'category': 'OTC'},
|
|||
|
{'id': 'P005', 'name': '维生素C片', 'base_price': 45.0, 'base_sales': 25,
|
|||
|
'seasonality': 'all', 'category': 'OTC'}
|
|||
|
]
|
|||
|
|
|||
|
# 定义中国主要节假日(简化版)
|
|||
|
holidays = [
|
|||
|
# 2022年节假日
|
|||
|
'2022-01-01', '2022-01-31', '2022-02-01', '2022-02-02', '2022-02-03', '2022-02-04', '2022-02-05', '2022-02-06',
|
|||
|
'2022-04-03', '2022-04-04', '2022-04-05', '2022-04-30', '2022-05-01', '2022-05-02', '2022-05-03', '2022-05-04',
|
|||
|
'2022-06-03', '2022-06-04', '2022-06-05', '2022-09-10', '2022-09-11', '2022-09-12', '2022-10-01', '2022-10-02',
|
|||
|
'2022-10-03', '2022-10-04', '2022-10-05', '2022-10-06', '2022-10-07',
|
|||
|
|
|||
|
# 2023年节假日
|
|||
|
'2023-01-01', '2023-01-21', '2023-01-22', '2023-01-23', '2023-01-24', '2023-01-25', '2023-01-26', '2023-01-27',
|
|||
|
'2023-04-05', '2023-04-29', '2023-04-30', '2023-05-01', '2023-05-02', '2023-05-03', '2023-06-22', '2023-06-23',
|
|||
|
'2023-06-24', '2023-09-29', '2023-09-30', '2023-10-01', '2023-10-02', '2023-10-03', '2023-10-04', '2023-10-05',
|
|||
|
'2023-10-06'
|
|||
|
]
|
|||
|
|
|||
|
holidays = pd.to_datetime(holidays)
|
|||
|
|
|||
|
# 生成每个产品每天的销售数据
|
|||
|
data = []
|
|||
|
|
|||
|
for date in date_range:
|
|||
|
# 基础时间特征
|
|||
|
weekday = date.weekday() # 0-6,0是周一
|
|||
|
month = date.month # 1-12
|
|||
|
is_holiday = 1 if date in holidays else 0
|
|||
|
is_weekend = 1 if weekday >= 5 else 0
|
|||
|
|
|||
|
# 模拟温度(季节性)- 简单正弦波加噪声
|
|||
|
day_of_year = date.dayofyear
|
|||
|
base_temp = 20 - 15 * np.cos(2 * np.pi * day_of_year / 365) # 范围约5-35度
|
|||
|
temperature = base_temp + np.random.normal(0, 2) # 添加随机波动
|
|||
|
|
|||
|
# 每三个月一次促销活动
|
|||
|
is_promotion_period = 1 if date.day <= 7 and date.month % 3 == 0 else 0
|
|||
|
|
|||
|
for product in products:
|
|||
|
# 基础销量
|
|||
|
base_sales = product['base_sales']
|
|||
|
|
|||
|
# 添加星期几的影响(周末销量更高)
|
|||
|
weekday_effect = 1.2 if is_weekend else 1.0
|
|||
|
|
|||
|
# 添加季节性影响
|
|||
|
seasonality_effect = 1.0
|
|||
|
if product['seasonality'] == 'winter' and (month in [12, 1, 2]):
|
|||
|
seasonality_effect = 1.8 # 冬季感冒药销量提升
|
|||
|
elif product['seasonality'] == 'summer' and (month in [6, 7, 8]):
|
|||
|
seasonality_effect = 1.5 # 夏季药品销量提升
|
|||
|
|
|||
|
# 添加促销影响
|
|||
|
promotion_effect = 1.4 if is_promotion_period else 1.0
|
|||
|
|
|||
|
# 添加节假日影响
|
|||
|
holiday_effect = 1.3 if is_holiday else 1.0
|
|||
|
|
|||
|
# 添加温度影响(某些药品与温度相关)
|
|||
|
temp_effect = 1.0
|
|||
|
if product['seasonality'] == 'winter':
|
|||
|
# 温度越低,感冒药销量越高
|
|||
|
temp_effect = 1.0 + max(0, (15 - temperature) / 20)
|
|||
|
|
|||
|
# 计算当天的价格(促销期间有折扣)
|
|||
|
price = product['base_price'] * (0.85 if is_promotion_period else 1.0)
|
|||
|
|
|||
|
# 计算最终销量并添加随机波动
|
|||
|
sales = base_sales * weekday_effect * seasonality_effect * promotion_effect * holiday_effect * temp_effect
|
|||
|
sales = int(np.round(sales * np.random.normal(1, 0.15))) # 添加随机波动
|
|||
|
sales = max(0, sales) # 确保销量不为负
|
|||
|
|
|||
|
# 添加到数据列表
|
|||
|
data.append({
|
|||
|
'date': date,
|
|||
|
'product_id': product['id'],
|
|||
|
'product_name': product['name'],
|
|||
|
'category': product['category'],
|
|||
|
'sales': sales,
|
|||
|
'price': price,
|
|||
|
'weekday': weekday,
|
|||
|
'month': month,
|
|||
|
'is_holiday': is_holiday,
|
|||
|
'is_weekend': is_weekend,
|
|||
|
'is_promotion': is_promotion_period,
|
|||
|
'temperature': temperature
|
|||
|
})
|
|||
|
|
|||
|
# 创建DataFrame
|
|||
|
df = pd.DataFrame(data)
|
|||
|
|
|||
|
# 确定保存路径
|
|||
|
output_path = 'pharmacy_sales.xlsx' if save_path is None else save_path
|
|||
|
|
|||
|
# 保存为Excel文件
|
|||
|
df.to_excel(output_path, index=False)
|
|||
|
|
|||
|
# 创建每个产品的销售趋势图
|
|||
|
plt.figure(figsize=(15, 10))
|
|||
|
for product in products:
|
|||
|
product_data = df[df['product_id'] == product['id']]
|
|||
|
plt.plot(product_data['date'], product_data['sales'], label=product['name'])
|
|||
|
|
|||
|
plt.title('药店产品销售趋势')
|
|||
|
plt.xlabel('日期')
|
|||
|
plt.ylabel('销售数量')
|
|||
|
plt.legend()
|
|||
|
plt.grid(True)
|
|||
|
|
|||
|
# 保存图表
|
|||
|
image_path = os.path.join(os.path.dirname(output_path), 'sales_trends.png')
|
|||
|
plt.savefig(image_path)
|
|||
|
plt.close()
|
|||
|
|
|||
|
print(f"已生成{len(df)}条模拟药店销售数据,并保存至{output_path}")
|
|||
|
print("数据概览:")
|
|||
|
print(df.head())
|
|||
|
print("\n每个产品的平均日销量:")
|
|||
|
print(df.groupby('product_name')['sales'].mean())
|
|||
|
|
|||
|
return df
|
|||
|
|
|||
|
# 如果直接运行此文件,则生成数据
|
|||
|
if __name__ == "__main__":
|
|||
|
generate_pharmacy_data()
|