ShopTRAINING/generate_multi_store_data.py

133 lines
5.4 KiB
Python
Raw Normal View History

2025-07-02 11:05:23 +08:00
#\!/usr/bin/env python3
"""
生成多店铺销售数据的脚本
"""
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random
def generate_multi_store_sales_data():
"""生成多店铺销售数据"""
# 设置随机种子
np.random.seed(42)
random.seed(42)
# 店铺信息
stores = [
{'store_id': 'S001', 'store_name': '市中心旗舰店', 'store_location': '市中心商业区', 'store_type': 'flagship'},
{'store_id': 'S002', 'store_name': '东区标准店', 'store_location': '东区居民区', 'store_type': 'standard'},
{'store_id': 'S003', 'store_name': '西区便民店', 'store_location': '西区小区内', 'store_type': 'convenience'},
{'store_id': 'S004', 'store_name': '南区社区店', 'store_location': '南区社区中心', 'store_type': 'community'},
{'store_id': 'S005', 'store_name': '北区标准店', 'store_location': '北区商业街', 'store_type': 'standard'}
]
# 产品信息
products = [
{'product_id': 'P001', 'product_name': '感冒灵颗粒', 'product_category': '感冒药', 'unit_price': 15.8},
{'product_id': 'P002', 'product_name': '布洛芬片', 'product_category': '止痛药', 'unit_price': 12.5},
{'product_id': 'P003', 'product_name': '维生素C', 'product_category': '维生素', 'unit_price': 8.9},
{'product_id': 'P004', 'product_name': '阿莫西林', 'product_category': '抗生素', 'unit_price': 18.6},
{'product_id': 'P005', 'product_name': '板蓝根颗粒', 'product_category': '中成药', 'unit_price': 11.2}
]
# 生成日期范围2年的完整数据确保足够训练
start_date = datetime(2022, 1, 1)
end_date = datetime(2023, 12, 31)
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
print(f"生成日期范围: {start_date.strftime('%Y-%m-%d')}{end_date.strftime('%Y-%m-%d')}")
print(f"总天数: {len(date_range)}")
# 生成销售数据
sales_data = []
for store in stores:
# 每个店铺的销售特征
store_multiplier = {
'S001': 1.5, # 旗舰店销量高
'S002': 1.0, # 标准店基准
'S003': 0.7, # 便民店销量低
'S004': 0.8, # 社区店销量中等
'S005': 1.1 # 北区标准店销量稍高
}[store['store_id']]
for product in products:
# 每个产品的基础销量
base_sales = {
'P001': 25, # 感冒药需求高
'P002': 20, # 止痛药需求中等
'P003': 30, # 维生素需求高
'P004': 15, # 抗生素需求低
'P005': 18 # 中成药需求中等
}[product['product_id']]
for date in date_range:
# 季节性影响
month = date.month
seasonal_factor = 1.0
if product['product_id'] in ['P001', 'P005']: # 感冒药在冬季销量高
if month in [12, 1, 2, 3]:
seasonal_factor = 1.5
elif month in [6, 7, 8]:
seasonal_factor = 0.7
# 周末效应
weekend_factor = 1.2 if date.weekday() >= 5 else 1.0
# 随机波动
random_factor = np.random.normal(1.0, 0.3)
# 计算销量
daily_sales = int(max(0, base_sales * store_multiplier * seasonal_factor * weekend_factor * random_factor))
# 计算销售金额
sales_amount = daily_sales * product['unit_price']
sales_data.append({
'date': date.strftime('%Y-%m-%d'),
'store_id': store['store_id'],
'store_name': store['store_name'],
'store_location': store['store_location'],
'store_type': store['store_type'],
'product_id': product['product_id'],
'product_name': product['product_name'],
'product_category': product['product_category'],
'unit_price': product['unit_price'],
'quantity_sold': daily_sales,
'sales_amount': round(sales_amount, 2),
'day_of_week': date.strftime('%A'),
'month': date.month,
'quarter': (date.month - 1) // 3 + 1,
'year': date.year
})
# 创建DataFrame
df = pd.DataFrame(sales_data)
# 保存到CSV文件
df.to_csv('pharmacy_sales_multi_store.csv', index=False, encoding='utf-8')
print(f"多店铺销售数据生成完成!")
print(f"数据记录数: {len(df)}")
print(f"日期范围: {df['date'].min()}{df['date'].max()}")
print(f"店铺数量: {df['store_id'].nunique()}")
print(f"产品数量: {df['product_id'].nunique()}")
print(f"文件保存为: pharmacy_sales_multi_store.csv")
# 显示数据样本
print("\n数据样本:")
print(df.head(10))
# 显示统计信息
print("\n各店铺销售统计:")
store_stats = df.groupby(['store_id', 'store_name']).agg({
'quantity_sold': 'sum',
'sales_amount': 'sum'
}).round(2)
print(store_stats)
if __name__ == "__main__":
generate_multi_store_sales_data()