ShopTRAINING/generate_multi_store_data.py
2025-07-02 11:05:23 +08:00

133 lines
5.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#\!/usr/bin/env python3
"""
生成多店铺销售数据的脚本
"""
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random
def generate_multi_store_sales_data():
"""生成多店铺销售数据"""
# 设置随机种子
np.random.seed(42)
random.seed(42)
# 店铺信息
stores = [
{'store_id': 'S001', 'store_name': '市中心旗舰店', 'store_location': '市中心商业区', 'store_type': 'flagship'},
{'store_id': 'S002', 'store_name': '东区标准店', 'store_location': '东区居民区', 'store_type': 'standard'},
{'store_id': 'S003', 'store_name': '西区便民店', 'store_location': '西区小区内', 'store_type': 'convenience'},
{'store_id': 'S004', 'store_name': '南区社区店', 'store_location': '南区社区中心', 'store_type': 'community'},
{'store_id': 'S005', 'store_name': '北区标准店', 'store_location': '北区商业街', 'store_type': 'standard'}
]
# 产品信息
products = [
{'product_id': 'P001', 'product_name': '感冒灵颗粒', 'product_category': '感冒药', 'unit_price': 15.8},
{'product_id': 'P002', 'product_name': '布洛芬片', 'product_category': '止痛药', 'unit_price': 12.5},
{'product_id': 'P003', 'product_name': '维生素C', 'product_category': '维生素', 'unit_price': 8.9},
{'product_id': 'P004', 'product_name': '阿莫西林', 'product_category': '抗生素', 'unit_price': 18.6},
{'product_id': 'P005', 'product_name': '板蓝根颗粒', 'product_category': '中成药', 'unit_price': 11.2}
]
# 生成日期范围2年的完整数据确保足够训练
start_date = datetime(2022, 1, 1)
end_date = datetime(2023, 12, 31)
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
print(f"生成日期范围: {start_date.strftime('%Y-%m-%d')}{end_date.strftime('%Y-%m-%d')}")
print(f"总天数: {len(date_range)}")
# 生成销售数据
sales_data = []
for store in stores:
# 每个店铺的销售特征
store_multiplier = {
'S001': 1.5, # 旗舰店销量高
'S002': 1.0, # 标准店基准
'S003': 0.7, # 便民店销量低
'S004': 0.8, # 社区店销量中等
'S005': 1.1 # 北区标准店销量稍高
}[store['store_id']]
for product in products:
# 每个产品的基础销量
base_sales = {
'P001': 25, # 感冒药需求高
'P002': 20, # 止痛药需求中等
'P003': 30, # 维生素需求高
'P004': 15, # 抗生素需求低
'P005': 18 # 中成药需求中等
}[product['product_id']]
for date in date_range:
# 季节性影响
month = date.month
seasonal_factor = 1.0
if product['product_id'] in ['P001', 'P005']: # 感冒药在冬季销量高
if month in [12, 1, 2, 3]:
seasonal_factor = 1.5
elif month in [6, 7, 8]:
seasonal_factor = 0.7
# 周末效应
weekend_factor = 1.2 if date.weekday() >= 5 else 1.0
# 随机波动
random_factor = np.random.normal(1.0, 0.3)
# 计算销量
daily_sales = int(max(0, base_sales * store_multiplier * seasonal_factor * weekend_factor * random_factor))
# 计算销售金额
sales_amount = daily_sales * product['unit_price']
sales_data.append({
'date': date.strftime('%Y-%m-%d'),
'store_id': store['store_id'],
'store_name': store['store_name'],
'store_location': store['store_location'],
'store_type': store['store_type'],
'product_id': product['product_id'],
'product_name': product['product_name'],
'product_category': product['product_category'],
'unit_price': product['unit_price'],
'quantity_sold': daily_sales,
'sales_amount': round(sales_amount, 2),
'day_of_week': date.strftime('%A'),
'month': date.month,
'quarter': (date.month - 1) // 3 + 1,
'year': date.year
})
# 创建DataFrame
df = pd.DataFrame(sales_data)
# 保存到CSV文件
df.to_csv('pharmacy_sales_multi_store.csv', index=False, encoding='utf-8')
print(f"多店铺销售数据生成完成!")
print(f"数据记录数: {len(df)}")
print(f"日期范围: {df['date'].min()}{df['date'].max()}")
print(f"店铺数量: {df['store_id'].nunique()}")
print(f"产品数量: {df['product_id'].nunique()}")
print(f"文件保存为: pharmacy_sales_multi_store.csv")
# 显示数据样本
print("\n数据样本:")
print(df.head(10))
# 显示统计信息
print("\n各店铺销售统计:")
store_stats = df.groupby(['store_id', 'store_name']).agg({
'quantity_sold': 'sum',
'sales_amount': 'sum'
}).round(2)
print(store_stats)
if __name__ == "__main__":
generate_multi_store_sales_data()