133 lines
5.4 KiB
Python
133 lines
5.4 KiB
Python
#\!/usr/bin/env python3
|
||
"""
|
||
生成多店铺销售数据的脚本
|
||
"""
|
||
|
||
import pandas as pd
|
||
import numpy as np
|
||
from datetime import datetime, timedelta
|
||
import random
|
||
|
||
def generate_multi_store_sales_data():
|
||
"""生成多店铺销售数据"""
|
||
|
||
# 设置随机种子
|
||
np.random.seed(42)
|
||
random.seed(42)
|
||
|
||
# 店铺信息
|
||
stores = [
|
||
{'store_id': 'S001', 'store_name': '市中心旗舰店', 'store_location': '市中心商业区', 'store_type': 'flagship'},
|
||
{'store_id': 'S002', 'store_name': '东区标准店', 'store_location': '东区居民区', 'store_type': 'standard'},
|
||
{'store_id': 'S003', 'store_name': '西区便民店', 'store_location': '西区小区内', 'store_type': 'convenience'},
|
||
{'store_id': 'S004', 'store_name': '南区社区店', 'store_location': '南区社区中心', 'store_type': 'community'},
|
||
{'store_id': 'S005', 'store_name': '北区标准店', 'store_location': '北区商业街', 'store_type': 'standard'}
|
||
]
|
||
|
||
# 产品信息
|
||
products = [
|
||
{'product_id': 'P001', 'product_name': '感冒灵颗粒', 'product_category': '感冒药', 'unit_price': 15.8},
|
||
{'product_id': 'P002', 'product_name': '布洛芬片', 'product_category': '止痛药', 'unit_price': 12.5},
|
||
{'product_id': 'P003', 'product_name': '维生素C', 'product_category': '维生素', 'unit_price': 8.9},
|
||
{'product_id': 'P004', 'product_name': '阿莫西林', 'product_category': '抗生素', 'unit_price': 18.6},
|
||
{'product_id': 'P005', 'product_name': '板蓝根颗粒', 'product_category': '中成药', 'unit_price': 11.2}
|
||
]
|
||
|
||
# 生成日期范围(2年的完整数据,确保足够训练)
|
||
start_date = datetime(2022, 1, 1)
|
||
end_date = datetime(2023, 12, 31)
|
||
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
|
||
print(f"生成日期范围: {start_date.strftime('%Y-%m-%d')} 到 {end_date.strftime('%Y-%m-%d')}")
|
||
print(f"总天数: {len(date_range)} 天")
|
||
|
||
# 生成销售数据
|
||
sales_data = []
|
||
|
||
for store in stores:
|
||
# 每个店铺的销售特征
|
||
store_multiplier = {
|
||
'S001': 1.5, # 旗舰店销量高
|
||
'S002': 1.0, # 标准店基准
|
||
'S003': 0.7, # 便民店销量低
|
||
'S004': 0.8, # 社区店销量中等
|
||
'S005': 1.1 # 北区标准店销量稍高
|
||
}[store['store_id']]
|
||
|
||
for product in products:
|
||
# 每个产品的基础销量
|
||
base_sales = {
|
||
'P001': 25, # 感冒药需求高
|
||
'P002': 20, # 止痛药需求中等
|
||
'P003': 30, # 维生素需求高
|
||
'P004': 15, # 抗生素需求低
|
||
'P005': 18 # 中成药需求中等
|
||
}[product['product_id']]
|
||
|
||
for date in date_range:
|
||
# 季节性影响
|
||
month = date.month
|
||
seasonal_factor = 1.0
|
||
if product['product_id'] in ['P001', 'P005']: # 感冒药在冬季销量高
|
||
if month in [12, 1, 2, 3]:
|
||
seasonal_factor = 1.5
|
||
elif month in [6, 7, 8]:
|
||
seasonal_factor = 0.7
|
||
|
||
# 周末效应
|
||
weekend_factor = 1.2 if date.weekday() >= 5 else 1.0
|
||
|
||
# 随机波动
|
||
random_factor = np.random.normal(1.0, 0.3)
|
||
|
||
# 计算销量
|
||
daily_sales = int(max(0, base_sales * store_multiplier * seasonal_factor * weekend_factor * random_factor))
|
||
|
||
# 计算销售金额
|
||
sales_amount = daily_sales * product['unit_price']
|
||
|
||
sales_data.append({
|
||
'date': date.strftime('%Y-%m-%d'),
|
||
'store_id': store['store_id'],
|
||
'store_name': store['store_name'],
|
||
'store_location': store['store_location'],
|
||
'store_type': store['store_type'],
|
||
'product_id': product['product_id'],
|
||
'product_name': product['product_name'],
|
||
'product_category': product['product_category'],
|
||
'unit_price': product['unit_price'],
|
||
'quantity_sold': daily_sales,
|
||
'sales_amount': round(sales_amount, 2),
|
||
'day_of_week': date.strftime('%A'),
|
||
'month': date.month,
|
||
'quarter': (date.month - 1) // 3 + 1,
|
||
'year': date.year
|
||
})
|
||
|
||
# 创建DataFrame
|
||
df = pd.DataFrame(sales_data)
|
||
|
||
# 保存到CSV文件
|
||
df.to_csv('pharmacy_sales_multi_store.csv', index=False, encoding='utf-8')
|
||
|
||
print(f"多店铺销售数据生成完成!")
|
||
print(f"数据记录数: {len(df)}")
|
||
print(f"日期范围: {df['date'].min()} 到 {df['date'].max()}")
|
||
print(f"店铺数量: {df['store_id'].nunique()}")
|
||
print(f"产品数量: {df['product_id'].nunique()}")
|
||
print(f"文件保存为: pharmacy_sales_multi_store.csv")
|
||
|
||
# 显示数据样本
|
||
print("\n数据样本:")
|
||
print(df.head(10))
|
||
|
||
# 显示统计信息
|
||
print("\n各店铺销售统计:")
|
||
store_stats = df.groupby(['store_id', 'store_name']).agg({
|
||
'quantity_sold': 'sum',
|
||
'sales_amount': 'sum'
|
||
}).round(2)
|
||
print(store_stats)
|
||
|
||
if __name__ == "__main__":
|
||
generate_multi_store_sales_data()
|