import pandas as pd import numpy as np import datetime from sklearn.preprocessing import MinMaxScaler import matplotlib.pyplot as plt import os def generate_pharmacy_data(save_path=None): """ 生成药店销售模拟数据并保存为Excel文件 参数: save_path: 保存文件的路径,如果为None则保存在当前目录 返回: DataFrame: 生成的销售数据 """ # 设置随机种子,确保结果可重现 np.random.seed(42) # 生成日期范围(2年的数据) start_date = datetime.datetime(2022, 1, 1) end_date = datetime.datetime(2023, 12, 31) date_range = pd.date_range(start=start_date, end=end_date, freq='D') # 定义药品信息 products = [ {'id': 'P001', 'name': '感冒灵颗粒', 'base_price': 35.5, 'base_sales': 15, 'seasonality': 'winter', 'category': 'OTC'}, {'id': 'P002', 'name': '布洛芬片', 'base_price': 25.8, 'base_sales': 20, 'seasonality': 'all', 'category': 'OTC'}, {'id': 'P003', 'name': '阿莫西林胶囊', 'base_price': 32.6, 'base_sales': 18, 'seasonality': 'all', 'category': 'RX'}, {'id': 'P004', 'name': '板蓝根冲剂', 'base_price': 28.9, 'base_sales': 12, 'seasonality': 'winter', 'category': 'OTC'}, {'id': 'P005', 'name': '维生素C片', 'base_price': 45.0, 'base_sales': 25, 'seasonality': 'all', 'category': 'OTC'} ] # 定义中国主要节假日(简化版) holidays = [ # 2022年节假日 '2022-01-01', '2022-01-31', '2022-02-01', '2022-02-02', '2022-02-03', '2022-02-04', '2022-02-05', '2022-02-06', '2022-04-03', '2022-04-04', '2022-04-05', '2022-04-30', '2022-05-01', '2022-05-02', '2022-05-03', '2022-05-04', '2022-06-03', '2022-06-04', '2022-06-05', '2022-09-10', '2022-09-11', '2022-09-12', '2022-10-01', '2022-10-02', '2022-10-03', '2022-10-04', '2022-10-05', '2022-10-06', '2022-10-07', # 2023年节假日 '2023-01-01', '2023-01-21', '2023-01-22', '2023-01-23', '2023-01-24', '2023-01-25', '2023-01-26', '2023-01-27', '2023-04-05', '2023-04-29', '2023-04-30', '2023-05-01', '2023-05-02', '2023-05-03', '2023-06-22', '2023-06-23', '2023-06-24', '2023-09-29', '2023-09-30', '2023-10-01', '2023-10-02', '2023-10-03', '2023-10-04', '2023-10-05', '2023-10-06' ] holidays = pd.to_datetime(holidays) # 生成每个产品每天的销售数据 data = [] for date in date_range: # 基础时间特征 weekday = date.weekday() # 0-6,0是周一 month = date.month # 1-12 is_holiday = 1 if date in holidays else 0 is_weekend = 1 if weekday >= 5 else 0 # 模拟温度(季节性)- 简单正弦波加噪声 day_of_year = date.dayofyear base_temp = 20 - 15 * np.cos(2 * np.pi * day_of_year / 365) # 范围约5-35度 temperature = base_temp + np.random.normal(0, 2) # 添加随机波动 # 每三个月一次促销活动 is_promotion_period = 1 if date.day <= 7 and date.month % 3 == 0 else 0 for product in products: # 基础销量 base_sales = product['base_sales'] # 添加星期几的影响(周末销量更高) weekday_effect = 1.2 if is_weekend else 1.0 # 添加季节性影响 seasonality_effect = 1.0 if product['seasonality'] == 'winter' and (month in [12, 1, 2]): seasonality_effect = 1.8 # 冬季感冒药销量提升 elif product['seasonality'] == 'summer' and (month in [6, 7, 8]): seasonality_effect = 1.5 # 夏季药品销量提升 # 添加促销影响 promotion_effect = 1.4 if is_promotion_period else 1.0 # 添加节假日影响 holiday_effect = 1.3 if is_holiday else 1.0 # 添加温度影响(某些药品与温度相关) temp_effect = 1.0 if product['seasonality'] == 'winter': # 温度越低,感冒药销量越高 temp_effect = 1.0 + max(0, (15 - temperature) / 20) # 计算当天的价格(促销期间有折扣) price = product['base_price'] * (0.85 if is_promotion_period else 1.0) # 计算最终销量并添加随机波动 sales = base_sales * weekday_effect * seasonality_effect * promotion_effect * holiday_effect * temp_effect sales = int(np.round(sales * np.random.normal(1, 0.15))) # 添加随机波动 sales = max(0, sales) # 确保销量不为负 # 添加到数据列表 data.append({ 'date': date, 'product_id': product['id'], 'product_name': product['name'], 'category': product['category'], 'sales': sales, 'price': price, 'weekday': weekday, 'month': month, 'is_holiday': is_holiday, 'is_weekend': is_weekend, 'is_promotion': is_promotion_period, 'temperature': temperature }) # 创建DataFrame df = pd.DataFrame(data) # 确定保存路径 output_path = 'pharmacy_sales.xlsx' if save_path is None else save_path # 保存为Excel文件 df.to_excel(output_path, index=False) # 创建每个产品的销售趋势图 plt.figure(figsize=(15, 10)) for product in products: product_data = df[df['product_id'] == product['id']] plt.plot(product_data['date'], product_data['sales'], label=product['name']) plt.title('药店产品销售趋势') plt.xlabel('日期') plt.ylabel('销售数量') plt.legend() plt.grid(True) # 保存图表 image_path = os.path.join(os.path.dirname(output_path), 'sales_trends.png') plt.savefig(image_path) plt.close() print(f"已生成{len(df)}条模拟药店销售数据,并保存至{output_path}") print("数据概览:") print(df.head()) print("\n每个产品的平均日销量:") print(df.groupby('product_name')['sales'].mean()) return df # 如果直接运行此文件,则生成数据 if __name__ == "__main__": generate_pharmacy_data()