ShopTRAINING/pharmacy_predictor.py
gdtiti c0fe213b70 修复图表显示和数据处理问题
1. 修复前端图表日期排序问题:
   - 改进 PredictionView.vue 和 HistoryView.vue 中的图表渲染逻辑
   - 确保历史数据和预测数据按照正确的日期顺序显示

2. 修复后端API处理:
   - 解决 optimized_kan 模型类型的路径映射问题
   - 添加 JSON 序列化器处理 Pandas Timestamp 对象
   - 改进预测数据与历史数据的衔接处理

3. 优化图表样式和用户体验
2025-06-15 00:01:57 +08:00

2353 lines
92 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import matplotlib
# 设置matplotlib后端为Agg适用于无头服务器环境
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import os
from datetime import datetime
import json
import torch.serialization
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from models.transformer_model import TimeSeriesTransformer
from models.slstm_model import sLSTM as ScalarLSTM
from models.mlstm_model import MLSTMTransformer as MatrixLSTM
from models.kan_model import KANForecaster
from models.optimized_kan_forecaster import OptimizedKANForecaster # 导入优化版KAN模型
from models.data_utils import prepare_data, prepare_sequences, create_dataset, evaluate_model, PharmacyDataset
import torch.nn.functional as F
from models.utils import get_device, to_device, DeviceDataLoader
from tqdm import tqdm
import time
import scipy.stats as stats
# 解决画图中文显示问题
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 获取设备GPU或CPU
device = get_device()
print(f"使用设备: {device}")
# 全局参数设置
look_back = 14 # 使用过去14天数据
T = 7 # 预测未来7天销量
epochs = 50 # 训练轮次
num_features = 8 # 输入特征数
embed_dim = 32 # 嵌入维度
dense_dim = 32 # 隐藏层神经元数
num_heads = 4 # 注意力头数
dropout_rate = 0.1 # 丢弃率
num_blocks = 3 # 编码器解码器数
learn_rate = 0.001 # 学习率
batch_size = 32 # 批大小
# 默认训练函数 - 使用mLSTM作为默认模型
def train_product_model(product_id, epochs=50):
"""
默认的产品销售预测模型训练函数使用mLSTM作为默认模型
Args:
product_id: 产品ID
epochs: 训练轮次
Returns:
model: 训练好的模型
metrics: 模型评估指标
"""
return train_product_model_with_mlstm(product_id, epochs)
# 使用mLSTM模型训练的函数
def train_product_model_with_mlstm(product_id, epochs=50):
# 读取生成的药店销售数据
df = pd.read_excel('pharmacy_sales.xlsx')
# 筛选特定产品数据
product_df = df[df['product_id'] == product_id].sort_values('date')
product_name = product_df['product_name'].iloc[0]
print(f"使用mLSTM模型训练产品 '{product_name}' (ID: {product_id}) 的销售预测模型")
print(f"使用设备: {device}")
# 创建特征和目标变量
features = ['sales', 'price', 'weekday', 'month', 'is_holiday', 'is_weekend', 'is_promotion', 'temperature']
# 预处理数据
X = product_df[features].values
y = product_df[['sales']].values # 保持为二维数组
# 归一化数据
scaler_X = MinMaxScaler(feature_range=(0, 1))
scaler_y = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)
# 划分训练集和测试集80% 训练20% 测试)
train_size = int(len(X_scaled) * 0.8)
X_train, X_test = X_scaled[:train_size], X_scaled[train_size:]
y_train, y_test = y_scaled[:train_size], y_scaled[train_size:]
# 创建时间序列数据
trainX, trainY = create_dataset(X_train, y_train, look_back, T)
testX, testY = create_dataset(X_test, y_test, look_back, T)
# 转换为PyTorch的Tensor
trainX_tensor = torch.Tensor(trainX)
trainY_tensor = torch.Tensor(trainY)
testX_tensor = torch.Tensor(testX)
testY_tensor = torch.Tensor(testY)
# 创建数据加载器
train_dataset = PharmacyDataset(trainX_tensor, trainY_tensor)
test_dataset = PharmacyDataset(testX_tensor, testY_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
# 将数据加载器包装到设备加载器中
train_loader = DeviceDataLoader(train_loader, device)
test_loader = DeviceDataLoader(test_loader, device)
# 初始化mLSTM结合Transformer模型
model = MLSTMTransformer(
num_features=num_features,
hidden_size=128,
mlstm_layers=1,
embed_dim=embed_dim,
dense_dim=dense_dim,
num_heads=num_heads,
dropout_rate=dropout_rate,
num_blocks=num_blocks,
output_sequence_length=T
)
# 将模型移动到设备上
model = model.to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learn_rate)
# 训练模型
train_losses = []
test_losses = []
for epoch in range(epochs):
model.train()
epoch_loss = 0
for X_batch, y_batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}", leave=False):
# 前向传播
outputs = model(X_batch)
loss = criterion(outputs.squeeze(-1), y_batch)
# 反向传播和优化
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_loss += loss.item()
# 计算训练损失
train_loss = epoch_loss / len(train_loader)
train_losses.append(train_loss)
# 在测试集上评估
model.eval()
test_loss = 0
with torch.no_grad():
for X_batch, y_batch in test_loader:
outputs = model(X_batch)
loss = criterion(outputs.squeeze(-1), y_batch)
test_loss += loss.item()
test_loss = test_loss / len(test_loader)
test_losses.append(test_loss)
if (epoch + 1) % 10 == 0:
print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}")
# 绘制损失曲线
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='训练损失')
plt.plot(test_losses, label='测试损失')
plt.title(f'{product_name} - mLSTM模型训练和测试损失')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.savefig(f'{product_id}_mlstm_loss_curve.png')
# 生成预测
model.eval()
with torch.no_grad():
# 将测试数据移动到设备上
testX_device = to_device(testX_tensor, device)
y_pred_scaled = model(testX_device).squeeze(-1).cpu().numpy()
# 反归一化预测结果
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).reshape(y_pred_scaled.shape)
y_true = scaler_y.inverse_transform(testY.reshape(-1, 1)).reshape(testY.shape)
# 评估模型
metrics = evaluate_model(y_true.flatten(), y_pred.flatten())
print(f"\n{product_name} mLSTM模型评估指标:")
for metric, value in metrics.items():
print(f"{metric}: {value:.4f}")
# 绘制预测结果
plt.figure(figsize=(12, 6))
# 获取测试集的实际日期
test_dates = product_df['date'].iloc[train_size + look_back:train_size + look_back + len(y_true)].values
# 只绘制最后30天的预测
days_to_plot = min(30, len(y_true))
start_idx = max(0, len(y_true) - days_to_plot)
plt.plot(test_dates[start_idx:], y_true[start_idx:, 0], 'b-', label='实际销量')
plt.plot(test_dates[start_idx:], y_pred[start_idx:, 0], 'r--', label='mLSTM预测销量')
plt.title(f'{product_name} - mLSTM销量预测结果')
plt.xlabel('日期')
plt.ylabel('销量')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(f'{product_id}_mlstm_prediction.png')
# 保存预测结果到CSV
all_dates = []
num_samples = len(y_true)
for i in range(num_samples):
start_idx = train_size + i + look_back
dates = product_df['date'].iloc[start_idx : start_idx + T]
all_dates.extend(dates)
# 修正日期长度不匹配的问题
flat_y_true = y_true.flatten()
flat_y_pred = y_pred.flatten()
min_len = min(len(all_dates), len(flat_y_true))
results_df = pd.DataFrame({
'date': all_dates[:min_len],
'actual_sales': flat_y_true[:min_len],
'predicted_sales': flat_y_pred[:min_len]
})
results_df.to_csv(f'{product_id}_mlstm_prediction_results.csv', index=False)
print(f"\nmLSTM预测结果已保存到 {product_id}_mlstm_prediction_results.csv")
# 创建models目录和子目录
model_dir = 'models/mlstm'
os.makedirs(model_dir, exist_ok=True)
# 保存模型
model_path = os.path.join(model_dir, f'{product_id}_model.pt')
torch.save({
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'train_loss': train_losses,
'test_loss': test_losses,
'scaler_X': scaler_X,
'scaler_y': scaler_y,
'features': features,
'look_back': look_back,
'T': T,
'model_type': 'mlstm'
}, model_path)
print(f"模型已成功保存到 {model_path}")
# 保存日志文件
log_path = os.path.join(model_dir, f'{product_id}_log.json')
log_data = {
'product_id': product_id,
'product_name': product_name,
'model_type': 'mlstm',
'training_completed_at': datetime.now().isoformat(),
'epochs': epochs,
'metrics': metrics,
'file_path': model_path
}
with open(log_path, 'w', encoding='utf-8') as f:
json.dump(log_data, f, indent=4, ensure_ascii=False)
print(f"训练日志已保存到 {log_path}")
return model, metrics
# 使用KAN模型训练的函数
def train_product_model_with_kan(product_id, epochs=50, use_optimized=False):
# 读取生成的药店销售数据
df = pd.read_excel('pharmacy_sales.xlsx')
# 筛选特定产品数据
product_df = df[df['product_id'] == product_id].sort_values('date')
product_name = product_df['product_name'].iloc[0]
model_type = "优化版KAN" if use_optimized else "KAN"
print(f"使用{model_type}模型训练产品 '{product_name}' (ID: {product_id}) 的销售预测模型")
print(f"使用设备: {device}")
# 创建特征和目标变量
features = ['sales', 'price', 'weekday', 'month', 'is_holiday', 'is_weekend', 'is_promotion', 'temperature']
# 预处理数据
X = product_df[features].values
y = product_df[['sales']].values # 保持为二维数组
# 归一化数据
scaler_X = MinMaxScaler(feature_range=(0, 1))
scaler_y = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)
# 划分训练集和测试集80% 训练20% 测试)
train_size = int(len(X_scaled) * 0.8)
X_train, X_test = X_scaled[:train_size], X_scaled[train_size:]
y_train, y_test = y_scaled[:train_size], y_scaled[train_size:]
# 创建时间序列数据
trainX, trainY = create_dataset(X_train, y_train, look_back, T)
testX, testY = create_dataset(X_test, y_test, look_back, T)
# 转换为PyTorch的Tensor
trainX_tensor = torch.Tensor(trainX)
trainY_tensor = torch.Tensor(trainY)
testX_tensor = torch.Tensor(testX)
testY_tensor = torch.Tensor(testY)
# 创建数据加载器
train_dataset = PharmacyDataset(trainX_tensor, trainY_tensor)
test_dataset = PharmacyDataset(testX_tensor, testY_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
# 将数据加载器包装到设备加载器中
train_loader = DeviceDataLoader(train_loader, device)
test_loader = DeviceDataLoader(test_loader, device)
# 初始化KAN模型
if use_optimized:
model = OptimizedKANForecaster(
input_features=num_features,
hidden_sizes=[64, 128, 64],
output_size=1,
grid_size=5,
spline_order=3,
dropout_rate=dropout_rate,
output_sequence_length=T
)
else:
model = KANForecaster(
input_features=num_features,
hidden_sizes=[64, 128, 64],
output_size=1,
grid_size=5,
spline_order=3,
dropout_rate=dropout_rate,
output_sequence_length=T
)
# 将模型移动到设备上
model = model.to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learn_rate)
# 训练模型
train_losses = []
test_losses = []
for epoch in range(epochs):
model.train()
epoch_loss = 0
for X_batch, y_batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}", leave=False):
# 前向传播
outputs = model(X_batch)
loss = criterion(outputs.squeeze(-1), y_batch)
# 反向传播和优化
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_loss += loss.item()
# 计算训练损失
train_loss = epoch_loss / len(train_loader)
train_losses.append(train_loss)
# 在测试集上评估
model.eval()
test_loss = 0
with torch.no_grad():
for X_batch, y_batch in test_loader:
outputs = model(X_batch)
loss = criterion(outputs.squeeze(-1), y_batch)
test_loss += loss.item()
test_loss = test_loss / len(test_loader)
test_losses.append(test_loss)
if (epoch + 1) % 10 == 0:
print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}")
# 绘制损失曲线
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='训练损失')
plt.plot(test_losses, label='测试损失')
plt.title(f'{product_name} - {model_type}模型训练和测试损失')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
# 文件名添加标识符
model_suffix = "_optimized" if use_optimized else ""
plt.savefig(f'{product_id}_kan{model_suffix}_loss_curve.png')
# 生成预测
model.eval()
with torch.no_grad():
# 将测试数据移动到设备上
testX_device = to_device(testX_tensor, device)
y_pred_scaled = model(testX_device).squeeze(-1).cpu().numpy()
# 反归一化预测结果
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).reshape(y_pred_scaled.shape)
y_true = scaler_y.inverse_transform(testY.reshape(-1, 1)).reshape(testY.shape)
# 评估模型
metrics = evaluate_model(y_true.flatten(), y_pred.flatten())
print(f"\n{product_name} {model_type}模型评估指标:")
for metric, value in metrics.items():
print(f"{metric}: {value:.4f}")
# 绘制预测结果
plt.figure(figsize=(12, 6))
# 获取测试集的实际日期
test_dates = product_df['date'].iloc[train_size + look_back:train_size + look_back + len(y_true)].values
# 只绘制最后30天的预测
days_to_plot = min(30, len(y_true))
start_idx = max(0, len(y_true) - days_to_plot)
plt.plot(test_dates[start_idx:], y_true[start_idx:, 0], 'b-', label='实际销量')
plt.plot(test_dates[start_idx:], y_pred[start_idx:, 0], 'r--', label=f'{model_type}预测销量')
plt.title(f'{product_name} - {model_type}销量预测结果')
plt.xlabel('日期')
plt.ylabel('销量')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(f'{product_id}_kan{model_suffix}_prediction.png')
# 保存预测结果到CSV
all_dates = []
num_samples = len(y_true)
for i in range(num_samples):
start_idx = train_size + i + look_back
dates = product_df['date'].iloc[start_idx : start_idx + T]
all_dates.extend(dates)
# 修正日期长度不匹配的问题
flat_y_true = y_true.flatten()
flat_y_pred = y_pred.flatten()
min_len = min(len(all_dates), len(flat_y_true))
results_df = pd.DataFrame({
'date': all_dates[:min_len],
'actual_sales': flat_y_true[:min_len],
'predicted_sales': flat_y_pred[:min_len]
})
results_df.to_csv(f'{product_id}_kan{model_suffix}_prediction_results.csv', index=False)
print(f"\n{model_type}预测结果已保存到 {product_id}_kan{model_suffix}_prediction_results.csv")
# 创建models目录和子目录
model_dir = f'models/kan{model_suffix}'
os.makedirs(model_dir, exist_ok=True)
# 保存模型
model_path = os.path.join(model_dir, f'{product_id}_model.pt')
torch.save({
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'train_loss': train_losses,
'test_loss': test_losses,
'scaler_X': scaler_X,
'scaler_y': scaler_y,
'features': features,
'look_back': look_back,
'T': T,
'model_type': f'kan{model_suffix}'
}, model_path)
print(f"模型已成功保存到 {model_path}")
# 保存日志文件
log_path = os.path.join(model_dir, f'{product_id}_log.json')
log_data = {
'product_id': product_id,
'product_name': product_name,
'model_type': f'kan{model_suffix}',
'training_completed_at': datetime.now().isoformat(),
'epochs': epochs,
'metrics': metrics,
'file_path': model_path
}
with open(log_path, 'w', encoding='utf-8') as f:
json.dump(log_data, f, indent=4, ensure_ascii=False)
print(f"训练日志已保存到 {log_path}")
return model, metrics
# 使用Transformer模型训练的函数
def train_product_model_with_transformer(product_id, epochs=50):
# 读取生成的药店销售数据
df = pd.read_excel('pharmacy_sales.xlsx')
# 筛选特定产品数据
product_df = df[df['product_id'] == product_id].sort_values('date')
product_name = product_df['product_name'].iloc[0]
print(f"使用Transformer模型训练产品 '{product_name}' (ID: {product_id}) 的销售预测模型")
print(f"使用设备: {device}")
# 创建特征和目标变量
features = ['sales', 'price', 'weekday', 'month', 'is_holiday', 'is_weekend', 'is_promotion', 'temperature']
# 预处理数据
X = product_df[features].values
y = product_df[['sales']].values # 保持为二维数组
# 归一化数据
scaler_X = MinMaxScaler(feature_range=(0, 1))
scaler_y = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)
# 划分训练集和测试集80% 训练20% 测试)
train_size = int(len(X_scaled) * 0.8)
X_train, X_test = X_scaled[:train_size], X_scaled[train_size:]
y_train, y_test = y_scaled[:train_size], y_scaled[train_size:]
# 创建时间序列数据
trainX, trainY = create_dataset(X_train, y_train, look_back, T)
testX, testY = create_dataset(X_test, y_test, look_back, T)
# 转换为PyTorch的Tensor
trainX_tensor = torch.Tensor(trainX)
trainY_tensor = torch.Tensor(trainY)
testX_tensor = torch.Tensor(testX)
testY_tensor = torch.Tensor(testY)
# 创建数据加载器
train_dataset = PharmacyDataset(trainX_tensor, trainY_tensor)
test_dataset = PharmacyDataset(testX_tensor, testY_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
# 将数据加载器包装到设备加载器中
train_loader = DeviceDataLoader(train_loader, device)
test_loader = DeviceDataLoader(test_loader, device)
# 初始化Transformer模型
model = TimeSeriesTransformer(
num_features=num_features,
d_model=embed_dim,
nhead=num_heads,
num_encoder_layers=num_blocks,
dim_feedforward=dense_dim,
dropout=dropout_rate,
output_sequence_length=T
)
# 将模型移动到设备上
model = model.to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learn_rate)
# 训练模型
train_losses = []
test_losses = []
for epoch in range(epochs):
model.train()
epoch_loss = 0
for X_batch, y_batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}", leave=False):
# 前向传播
outputs = model(X_batch)
loss = criterion(outputs.squeeze(-1), y_batch)
# 反向传播和优化
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_loss += loss.item()
# 计算训练损失
train_loss = epoch_loss / len(train_loader)
train_losses.append(train_loss)
# 在测试集上评估
model.eval()
test_loss = 0
with torch.no_grad():
for X_batch, y_batch in test_loader:
outputs = model(X_batch)
loss = criterion(outputs.squeeze(-1), y_batch)
test_loss += loss.item()
test_loss = test_loss / len(test_loader)
test_losses.append(test_loss)
if (epoch + 1) % 10 == 0:
print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}")
# 绘制损失曲线
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='训练损失')
plt.plot(test_losses, label='测试损失')
plt.title(f'{product_name} - Transformer模型训练和测试损失')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.savefig(f'{product_id}_transformer_loss_curve.png')
# 生成预测
model.eval()
with torch.no_grad():
# 将测试数据移动到设备上
testX_device = to_device(testX_tensor, device)
y_pred_scaled = model(testX_device).squeeze(-1).cpu().numpy()
# 反归一化预测结果
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).reshape(y_pred_scaled.shape)
y_true = scaler_y.inverse_transform(testY.reshape(-1, 1)).reshape(testY.shape)
# 评估模型
metrics = evaluate_model(y_true.flatten(), y_pred.flatten())
print(f"\n{product_name} Transformer模型评估指标:")
for metric, value in metrics.items():
print(f"{metric}: {value:.4f}")
# 绘制预测结果
plt.figure(figsize=(12, 6))
# 获取测试集的实际日期
test_dates = product_df['date'].iloc[train_size + look_back:train_size + look_back + len(y_true)].values
# 只绘制最后30天的预测
days_to_plot = min(30, len(y_true))
start_idx = max(0, len(y_true) - days_to_plot)
plt.plot(test_dates[start_idx:], y_true[start_idx:, 0], 'b-', label='实际销量')
plt.plot(test_dates[start_idx:], y_pred[start_idx:, 0], 'r--', label='Transformer预测销量')
plt.title(f'{product_name} - Transformer销量预测结果')
plt.xlabel('日期')
plt.ylabel('销量')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
# 强制重新绘制图表
fig.canvas.draw()
# 将预测起始日期和预测时长添加到文件名中
start_date_str = start_date_obj.strftime('%Y%m%d')
# 保存和显示图表
forecast_chart = f'{product_id}_transformer_forecast_{start_date_str}_days{T}.png'
plt.savefig(forecast_chart)
print(f"预测图表已保存为: {forecast_chart}")
# 关闭图表以释放内存
plt.close()
# 创建预测日期范围
last_date = product_df['date'].iloc[-1]
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=T, freq='D')
# 创建预测结果DataFrame - 确保长度一致
# 使用测试集的最后T个预测作为未来预测
future_predictions = y_pred[-1, :T].flatten() # 取最后一个样本的预测序列
# 确保长度匹配
if len(future_dates) != len(future_predictions):
# 如果长度不匹配调整future_predictions的长度
future_predictions = future_predictions[:len(future_dates)]
predictions_df = pd.DataFrame({
'date': future_dates,
'product_id': product_id,
'product_name': product_name,
'predicted_sales': future_predictions
})
print(f"\n{product_name} 未来 {T} 天销售预测 (使用Transformer模型):")
print(predictions_df[['date', 'predicted_sales']])
# 创建预测结果目录
output_dir = f'predictions/transformer/{product_id}'
os.makedirs(output_dir, exist_ok=True)
# 可视化预测结果
try:
# 1. 创建预测图表
forecast_fig, forecast_ax = plt.subplots(figsize=(12, 6))
# 显示历史数据和预测数据
history_days = 14 # 减少为显示最近14天的历史数据原来是30天
# 只选择预测起始日期之前14天的历史数据而不是全部历史数据
history_end_date = start_date_obj - pd.Timedelta(days=1) # 预测起始日期的前一天
history_start_date = history_end_date - pd.Timedelta(days=history_days) # 向前推14天
# 过滤历史数据,只保留这个日期范围内的数据
history_df = product_df[(product_df['date'] >= history_start_date) &
(product_df['date'] <= history_end_date)][['date', 'sales']].copy()
if history_df.empty:
print(f"警告: 在日期范围 {history_start_date}{history_end_date} 内没有历史数据")
# 如果没有符合条件的历史数据,就使用最近的数据
history_df = product_df.iloc[-min(history_days, len(product_df)):][['date', 'sales']].copy()
print(f"历史数据日期范围: {history_df['date'].min()}{history_df['date'].max()}")
print(f"预测数据日期范围: {future_dates.min()}{future_dates.max()}")
print(f"预测起始日期: {start_date_obj.strftime('%Y-%m-%d')}")
# 设置图表样式
plt.style.use('seaborn-v0_8-whitegrid')
# 绘制历史数据
forecast_ax.plot(history_df['date'].values, history_df['sales'].values,
'b-', linewidth=2, marker='o', markersize=4,
label='历史销量')
# 添加历史数据和预测数据的分隔线
forecast_ax.axvline(x=start_date_obj, color='gray', linestyle='--', alpha=0.7)
# 绘制预测数据确保使用future_dates作为x轴
forecast_ax.plot(future_dates, future_predictions,
'r-', linewidth=2.5, marker='s', markersize=5,
label=f'{model_type}预测销量')
# 强制X轴从预测起始日期的前14天开始如果有历史数据到预测结束日期
date_min = start_date_obj - pd.Timedelta(days=history_days)
date_max = future_dates.max() + pd.Timedelta(days=1)
print(f"设置X轴范围: {date_min}{date_max}")
forecast_ax.set_xlim(date_min, date_max)
# 设置X轴日期格式和刻度
import matplotlib.dates as mdates
forecast_ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
forecast_ax.xaxis.set_major_locator(mdates.DayLocator(interval=2)) # 每隔2天显示一个刻度
# 添加网格线
forecast_ax.grid(True, linestyle='--', alpha=0.6)
# 美化图表
forecast_ax.set_title(f'{product_name} - {model_type}销量预测 (从{start_date_obj.strftime("%Y-%m-%d")}开始,预测{future_days}天)',
fontsize=14, fontweight='bold')
forecast_ax.set_xlabel('日期', fontsize=12)
forecast_ax.set_ylabel('销量', fontsize=12)
# 添加图例并设置位置
forecast_ax.legend(loc='upper left', frameon=True, fancybox=True, shadow=True)
# 添加预测区域的背景色
min_y, max_y = forecast_ax.get_ylim()
forecast_ax.fill_between(
[start_date_obj, future_dates.max()],
min_y, max_y,
color='lightyellow', alpha=0.3, zorder=0
)
# 在图表上标注"预测区域"
forecast_ax.text(
start_date_obj + pd.Timedelta(days=future_days/2),
min_y + (max_y - min_y) * 0.05,
'预测区域',
ha='center', va='bottom',
bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.3)
)
# 设置Y轴从0开始
forecast_ax.set_ylim(bottom=0)
plt.xticks(rotation=45)
plt.tight_layout()
# 强制重新绘制图表
forecast_fig.canvas.draw()
# 将预测起始日期和预测时长添加到文件名中
start_date_str = start_date_obj.strftime('%Y%m%d')
# 保存预测图表
forecast_chart = f'{output_dir}/forecast_{start_date_str}_days{future_days}.png'
plt.savefig(forecast_chart, dpi=120) # 增加DPI提高图像质量
print(f"预测图表已保存为: {forecast_chart}")
# 关闭图表以释放内存
plt.close(forecast_fig)
# 2. 创建历史趋势图表
try:
print("\n开始生成历史趋势图...")
history_fig, history_ax = plt.subplots(figsize=(12, 6))
# 设置图表样式
plt.style.use('seaborn-v0_8-whitegrid')
# 获取预测起始日期
current_year = start_date_obj.year
current_month = start_date_obj.month
current_day = start_date_obj.day
print(f"预测起始日期: {start_date_obj}")
# 计算同期日期范围前3天和后3天共7天
days_before = 3
days_after = 3
date_range_start = start_date_obj - pd.Timedelta(days=days_before)
date_range_end = start_date_obj + pd.Timedelta(days=days_after)
# 计算去年同期日期范围
last_year_start = date_range_start.replace(year=date_range_start.year-1)
last_year_end = date_range_end.replace(year=date_range_end.year-1)
# 计算上月同期日期范围
if date_range_start.month > 1:
last_month_start = date_range_start.replace(month=date_range_start.month-1)
last_month_end = date_range_end.replace(month=date_range_end.month-1)
else:
# 如果是1月则转到上一年的12月
last_month_start = date_range_start.replace(year=date_range_start.year-1, month=12)
last_month_end = date_range_end.replace(year=date_range_end.year-1, month=12)
print(f"当前日期范围: {date_range_start}{date_range_end}")
print(f"去年同期范围: {last_year_start}{last_year_end}")
print(f"上月同期范围: {last_month_start}{last_month_end}")
# 查找对应日期范围的数据
current_period_data = product_df[
(product_df['date'] >= date_range_start) &
(product_df['date'] <= date_range_end)
]
print(f"当前期间数据点数: {len(current_period_data)}")
last_year_period_data = product_df[
(product_df['date'] >= last_year_start) &
(product_df['date'] <= last_year_end)
]
print(f"去年同期数据点数: {len(last_year_period_data)}")
last_month_period_data = product_df[
(product_df['date'] >= last_month_start) &
(product_df['date'] <= last_month_end)
]
print(f"上月同期数据点数: {len(last_month_period_data)}")
# 绘制曲线图
has_data = False
if not current_period_data.empty:
has_data = True
# 确保日期升序排序
current_period_data = current_period_data.sort_values('date')
# 生成相对天数以date_range_start为基准
current_period_data['day_offset'] = (current_period_data['date'] - date_range_start).dt.days
print(f"当前期间日期: {current_period_data['date'].tolist()}")
print(f"当前期间相对天数: {current_period_data['day_offset'].tolist()}")
print(f"当前期间销量: {current_period_data['sales'].tolist()}")
history_ax.plot(
current_period_data['day_offset'],
current_period_data['sales'],
'r-',
marker='o',
linewidth=2.5,
markersize=7,
label=f"当前期间 ({date_range_start.strftime('%Y-%m-%d')}{date_range_end.strftime('%Y-%m-%d')})"
)
# 标记预测起始日期
current_center_point = current_period_data[current_period_data['date'] == start_date_obj]
if not current_center_point.empty:
history_ax.scatter(
current_center_point['day_offset'],
current_center_point['sales'],
color='red',
s=150,
marker='*',
zorder=10,
label=f"预测起始日 ({start_date_obj.strftime('%Y-%m-%d')})"
)
if not last_year_period_data.empty:
has_data = True
# 确保日期升序排序
last_year_period_data = last_year_period_data.sort_values('date')
last_year_period_data['day_offset'] = (last_year_period_data['date'] - last_year_start).dt.days
print(f"去年同期日期: {last_year_period_data['date'].tolist()}")
print(f"去年同期相对天数: {last_year_period_data['day_offset'].tolist()}")
print(f"去年同期销量: {last_year_period_data['sales'].tolist()}")
history_ax.plot(
last_year_period_data['day_offset'],
last_year_period_data['sales'],
'b-',
marker='s',
linewidth=2,
markersize=6,
label=f"去年同期 ({last_year_start.strftime('%Y-%m-%d')}{last_year_end.strftime('%Y-%m-%d')})"
)
if not last_month_period_data.empty:
has_data = True
# 确保日期升序排序
last_month_period_data = last_month_period_data.sort_values('date')
last_month_period_data['day_offset'] = (last_month_period_data['date'] - last_month_start).dt.days
print(f"上月同期日期: {last_month_period_data['date'].tolist()}")
print(f"上月同期相对天数: {last_month_period_data['day_offset'].tolist()}")
print(f"上月同期销量: {last_month_period_data['sales'].tolist()}")
history_ax.plot(
last_month_period_data['day_offset'],
last_month_period_data['sales'],
'g-',
marker='^',
linewidth=2,
markersize=6,
label=f"上月同期 ({last_month_start.strftime('%Y-%m-%d')}{last_month_end.strftime('%Y-%m-%d')})"
)
# 设置X轴标签为相对天数
days_labels = list(range(7))
days_offsets = list(range(7))
day_names = [(date_range_start + pd.Timedelta(days=d)).strftime('%m-%d') for d in range(7)]
history_ax.set_xticks(days_offsets)
history_ax.set_xticklabels(day_names)
# 添加垂直参考线标记预测起始日
history_ax.axvline(x=days_before, color='red', linestyle='--', alpha=0.5)
# 美化图表
history_ax.set_title(f'{product_name} - 同期销量趋势对比 (7天)', fontsize=14, fontweight='bold')
history_ax.set_xlabel('日期', fontsize=12)
history_ax.set_ylabel('销量', fontsize=12)
history_ax.grid(True, linestyle='--', alpha=0.7)
# 设置Y轴从0开始
history_ax.set_ylim(bottom=0)
# 添加预测起始日标记区域
history_ax.axvspan(days_before-0.2, days_before+0.2, color='lightyellow', alpha=0.3)
# 只有在有标签的图表元素存在时才添加图例
if has_data and (not current_period_data.empty or not last_year_period_data.empty or not last_month_period_data.empty):
history_ax.legend(loc='upper left', frameon=True, fancybox=True, shadow=True)
# 如果所有数据集都为空,显示提示
if not has_data:
history_ax.text(0.5, 0.5, '没有找到可比较的同期数据',
horizontalalignment='center', verticalalignment='center',
transform=history_ax.transAxes, fontsize=14)
plt.tight_layout()
# 强制重新绘制图表
history_fig.canvas.draw()
# 保存历史趋势图表
history_chart = f'{output_dir}/history_{start_date_str}.png'
plt.savefig(history_chart, dpi=120) # 增加DPI提高图像质量
print(f"历史趋势图表已保存为: {history_chart}")
# 关闭图表以释放内存
plt.close(history_fig)
except Exception as e:
import traceback
print(f"生成历史趋势图时出错: {e}")
traceback.print_exc()
history_chart = None
# 创建一个包含历史和预测数据的完整DataFrame供CSV导出和API返回
history_df['data_type'] = '历史销量'
predictions_df = pd.DataFrame({
'date': future_dates,
'sales': y_pred,
'data_type': '预测销量',
'product_id': product_id,
'product_name': product_name
})
# 合并历史和预测数据
complete_df = pd.concat([
history_df[['date', 'sales', 'data_type']].assign(product_id=product_id, product_name=product_name),
predictions_df
]).sort_values('date')
except Exception as e:
import traceback
print(f"生成预测图表时出错: {e}")
traceback.print_exc()
forecast_chart = None
history_chart = None
# 出错时仍然创建预测数据
predictions_df = pd.DataFrame({
'date': future_dates,
'sales': y_pred,
'data_type': '预测销量',
'product_id': product_id,
'product_name': product_name
})
complete_df = predictions_df
# 保存预测结果到CSV
try:
forecast_csv = f'{output_dir}/forecast_{start_date_str}_days{future_days}.csv'
complete_df.to_csv(forecast_csv, index=False)
print(f"预测结果已保存到: {forecast_csv}")
except Exception as e:
print(f"保存CSV文件时出错: {e}")
forecast_csv = None
# 返回文件路径信息和预测数据
result = {
'predictions_df': complete_df, # 返回包含历史数据的完整DataFrame
'chart_path': forecast_chart,
'history_chart_path': history_chart,
'csv_path': forecast_csv
}
# 保存模型
model_dir = f'models/transformer'
os.makedirs(model_dir, exist_ok=True)
model_path = f'{model_dir}/{product_id}_model.pt'
# 保存模型和相关数据
checkpoint = {
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'scaler_X': scaler_X,
'scaler_y': scaler_y,
'metrics': metrics,
'epochs': epochs,
'look_back': look_back,
'T': T,
'features': features
}
torch.save(checkpoint, model_path)
print(f"模型已保存到 {model_path}")
# 保存训练日志
log_path = f'{model_dir}/{product_id}_log.json'
log_data = {
'product_id': product_id,
'product_name': product_name,
'model_type': 'transformer',
'training_completed_at': datetime.now().isoformat(),
'epochs': epochs,
'metrics': metrics,
'file_path': model_path
}
with open(log_path, 'w', encoding='utf-8') as f:
json.dump(log_data, f, indent=4, ensure_ascii=False)
print(f"训练日志已保存到 {log_path}")
return model, metrics
# 加载模型并进行预测的函数
def load_model_and_predict(product_id, model_type, future_days=7, start_date=None, analyze_result=False):
"""
加载指定类型的模型并进行未来销量预测
Args:
product_id: 产品ID
model_type: 模型类型,可选 'mlstm', 'kan', 'transformer', 'optimized_kan'
future_days: 预测未来天数默认7天
start_date: 预测起始日期,格式为'YYYY-MM-DD'默认为None表示使用数据集最后日期的下一天
analyze_result: 是否分析预测结果默认为False
Returns:
predictions: 预测结果
analysis: 如果analyze_result=True返回预测分析结果否则为None
"""
print("\n" + "="*80)
print(f"加载模型并预测 - 详细调试信息:")
print(f"产品ID: {product_id}, 模型类型: {model_type}, 预测天数: {future_days}, 预测起始日期: {start_date}")
print("="*80 + "\n")
# 处理优化版KAN模型的路径
actual_model_path = model_type
if model_type == 'optimized_kan':
actual_model_path = 'kan_optimized'
print(f"优化版KAN模型: 使用路径 'models/{actual_model_path}/{product_id}_model.pt'")
model_path = f'models/{actual_model_path}/{product_id}_model.pt'
if not os.path.exists(model_path):
print(f"错误: 未找到产品 {product_id}{model_type} 模型文件")
return None
# 获取设备
device = get_device()
print(f"使用设备: {device} 进行预测")
# 加载模型和相关数据
checkpoint = torch.load(model_path, map_location=device, weights_only=False)
# 读取原始数据以获取最新的记录
df = pd.read_excel('pharmacy_sales.xlsx')
product_df = df[df['product_id'] == product_id].sort_values('date')
product_name = product_df['product_name'].iloc[0]
# 获取最近的look_back天数据
features = checkpoint['features']
look_back = checkpoint['look_back']
T = checkpoint['T']
scaler_X = checkpoint['scaler_X']
scaler_y = checkpoint['scaler_y']
last_data = product_df[features].values[-look_back:]
last_data_scaled = scaler_X.transform(last_data)
# 创建模型并加载参数
if model_type == 'mlstm':
ModelClass = MLSTMTransformer
model_params = {
'num_features': len(features),
'hidden_size': 128,
'mlstm_layers': 1,
'embed_dim': embed_dim,
'dense_dim': dense_dim,
'num_heads': num_heads,
'dropout_rate': dropout_rate,
'num_blocks': num_blocks,
'output_sequence_length': T
}
elif model_type == 'kan':
ModelClass = KANForecaster
model_params = {
'input_features': len(features),
'hidden_sizes': [64, 128, 64],
'output_size': 1,
'grid_size': 5,
'spline_order': 3,
'dropout_rate': dropout_rate,
'output_sequence_length': T
}
elif model_type == 'optimized_kan':
ModelClass = OptimizedKANForecaster
model_params = {
'input_features': len(features),
'hidden_sizes': [64, 128, 64],
'output_size': 1,
'grid_size': 5,
'spline_order': 3,
'dropout_rate': dropout_rate,
'output_sequence_length': T
}
elif model_type == 'transformer':
ModelClass = TimeSeriesTransformer
model_params = {
'num_features': len(features),
'd_model': embed_dim,
'nhead': num_heads,
'num_encoder_layers': num_blocks,
'dim_feedforward': dense_dim,
'dropout': dropout_rate,
'output_sequence_length': T
}
else:
print(f"错误: 不支持的模型类型 '{model_type}'")
return None
model = ModelClass(**model_params)
model.load_state_dict(checkpoint['model_state_dict'])
# 将模型移动到设备上
model = model.to(device)
model.eval()
# 准备输入数据
X_input = torch.Tensor(last_data_scaled).unsqueeze(0) # 添加批次维度
X_input = X_input.to(device) # 移动到设备上
# 进行预测
with torch.no_grad():
# 获取模型默认预测长度
default_pred_length = T
print(f"模型默认预测长度: {default_pred_length}")
# 使用模型进行预测 - 如果请求的预测天数小于模型默认值,截断结果
if future_days <= default_pred_length:
print(f"请求的预测天数 {future_days} 小于等于模型默认值 {default_pred_length},截取需要的部分")
y_pred_scaled = model(X_input).squeeze(0).cpu().numpy()[:future_days]
else:
# 如果请求的预测天数大于模型默认值,需要多次预测并拼接结果
print(f"请求的预测天数 {future_days} 大于模型默认值 {default_pred_length},需要多次预测")
y_pred_scaled = model(X_input).squeeze(0).cpu().numpy()
# 只取默认预测长度的结果
y_pred_scaled = y_pred_scaled[:min(future_days, default_pred_length)]
# 反归一化预测结果
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
# 创建预测日期范围
last_date = product_df['date'].iloc[-1]
if start_date:
try:
# 使用用户指定的日期作为预测起点
start_date_obj = pd.Timestamp(start_date)
print(f"成功解析用户指定的预测起始日期: {start_date_obj.strftime('%Y-%m-%d')}")
except Exception as e:
# 如果日期格式无效,使用当前日期
start_date_obj = pd.Timestamp.now().normalize()
print(f"日期解析错误: {e}")
print(f"使用当前日期 {start_date_obj.strftime('%Y-%m-%d')} 作为预测起点")
else:
# 如果未指定日期,使用数据集最后日期的下一天
start_date_obj = last_date + pd.Timedelta(days=1)
print(f"未指定起始日期,使用数据集最后日期 {last_date.strftime('%Y-%m-%d')} 的下一天作为预测起点: {start_date_obj.strftime('%Y-%m-%d')}")
future_dates = pd.date_range(start=start_date_obj, periods=future_days, freq='D')
print(f"生成预测日期范围: {future_dates[0]}{future_dates[-1]}, 共 {len(future_dates)}")
# 创建预测结果目录
output_dir = f'predictions/{model_type}/{product_id}'
os.makedirs(output_dir, exist_ok=True)
# 可视化预测结果
try:
# 1. 创建预测图表
forecast_fig, forecast_ax = plt.subplots(figsize=(12, 6))
# 显示历史数据和预测数据
history_days = 14 # 减少为显示最近14天的历史数据原来是30天
# 只选择预测起始日期之前14天的历史数据而不是全部历史数据
history_end_date = start_date_obj - pd.Timedelta(days=1) # 预测起始日期的前一天
history_start_date = history_end_date - pd.Timedelta(days=history_days) # 向前推14天
# 过滤历史数据,只保留这个日期范围内的数据
history_df = product_df[(product_df['date'] >= history_start_date) &
(product_df['date'] <= history_end_date)][['date', 'sales']].copy()
if history_df.empty:
print(f"警告: 在日期范围 {history_start_date}{history_end_date} 内没有历史数据")
# 如果没有符合条件的历史数据,就使用最近的数据
history_df = product_df.iloc[-min(history_days, len(product_df)):][['date', 'sales']].copy()
print(f"历史数据日期范围: {history_df['date'].min()}{history_df['date'].max()}")
print(f"预测数据日期范围: {future_dates.min()}{future_dates.max()}")
print(f"预测起始日期: {start_date_obj.strftime('%Y-%m-%d')}")
# 设置图表样式
plt.style.use('seaborn-v0_8-whitegrid')
# 绘制历史数据
forecast_ax.plot(history_df['date'].values, history_df['sales'].values,
'b-', linewidth=2, marker='o', markersize=4,
label='历史销量')
# 添加历史数据和预测数据的分隔线
forecast_ax.axvline(x=start_date_obj, color='gray', linestyle='--', alpha=0.7)
# 绘制预测数据确保使用future_dates作为x轴
forecast_ax.plot(future_dates, y_pred,
'r-', linewidth=2.5, marker='s', markersize=5,
label=f'{model_type}预测销量')
# 强制X轴从预测起始日期的前14天开始如果有历史数据到预测结束日期
date_min = start_date_obj - pd.Timedelta(days=history_days)
date_max = future_dates.max() + pd.Timedelta(days=1)
print(f"设置X轴范围: {date_min}{date_max}")
forecast_ax.set_xlim(date_min, date_max)
# 设置X轴日期格式和刻度
import matplotlib.dates as mdates
forecast_ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
forecast_ax.xaxis.set_major_locator(mdates.DayLocator(interval=2)) # 每隔2天显示一个刻度
# 添加网格线
forecast_ax.grid(True, linestyle='--', alpha=0.6)
# 美化图表
forecast_ax.set_title(f'{product_name} - {model_type}销量预测 (从{start_date_obj.strftime("%Y-%m-%d")}开始,预测{future_days}天)',
fontsize=14, fontweight='bold')
forecast_ax.set_xlabel('日期', fontsize=12)
forecast_ax.set_ylabel('销量', fontsize=12)
# 添加图例并设置位置
forecast_ax.legend(loc='upper left', frameon=True, fancybox=True, shadow=True)
# 添加预测区域的背景色
min_y, max_y = forecast_ax.get_ylim()
forecast_ax.fill_between(
[start_date_obj, future_dates.max()],
min_y, max_y,
color='lightyellow', alpha=0.3, zorder=0
)
# 在图表上标注"预测区域"
forecast_ax.text(
start_date_obj + pd.Timedelta(days=future_days/2),
min_y + (max_y - min_y) * 0.05,
'预测区域',
ha='center', va='bottom',
bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.3)
)
# 设置Y轴从0开始
forecast_ax.set_ylim(bottom=0)
plt.xticks(rotation=45)
plt.tight_layout()
# 强制重新绘制图表
forecast_fig.canvas.draw()
# 将预测起始日期和预测时长添加到文件名中
start_date_str = start_date_obj.strftime('%Y%m%d')
# 保存预测图表
forecast_chart = f'{output_dir}/forecast_{start_date_str}_days{future_days}.png'
plt.savefig(forecast_chart, dpi=120) # 增加DPI提高图像质量
print(f"预测图表已保存为: {forecast_chart}")
# 关闭图表以释放内存
plt.close(forecast_fig)
# 2. 创建历史趋势图表
try:
print("\n开始生成历史趋势图...")
history_fig, history_ax = plt.subplots(figsize=(12, 6))
# 设置图表样式
plt.style.use('seaborn-v0_8-whitegrid')
# 获取预测起始日期
current_year = start_date_obj.year
current_month = start_date_obj.month
current_day = start_date_obj.day
print(f"预测起始日期: {start_date_obj}")
# 计算同期日期范围前3天和后3天共7天
days_before = 3
days_after = 3
date_range_start = start_date_obj - pd.Timedelta(days=days_before)
date_range_end = start_date_obj + pd.Timedelta(days=days_after)
# 计算去年同期日期范围
last_year_start = date_range_start.replace(year=date_range_start.year-1)
last_year_end = date_range_end.replace(year=date_range_end.year-1)
# 计算上月同期日期范围
if date_range_start.month > 1:
last_month_start = date_range_start.replace(month=date_range_start.month-1)
last_month_end = date_range_end.replace(month=date_range_end.month-1)
else:
# 如果是1月则转到上一年的12月
last_month_start = date_range_start.replace(year=date_range_start.year-1, month=12)
last_month_end = date_range_end.replace(year=date_range_end.year-1, month=12)
print(f"当前日期范围: {date_range_start}{date_range_end}")
print(f"去年同期范围: {last_year_start}{last_year_end}")
print(f"上月同期范围: {last_month_start}{last_month_end}")
# 查找对应日期范围的数据
current_period_data = product_df[
(product_df['date'] >= date_range_start) &
(product_df['date'] <= date_range_end)
]
print(f"当前期间数据点数: {len(current_period_data)}")
last_year_period_data = product_df[
(product_df['date'] >= last_year_start) &
(product_df['date'] <= last_year_end)
]
print(f"去年同期数据点数: {len(last_year_period_data)}")
last_month_period_data = product_df[
(product_df['date'] >= last_month_start) &
(product_df['date'] <= last_month_end)
]
print(f"上月同期数据点数: {len(last_month_period_data)}")
# 绘制曲线图
has_data = False
if not current_period_data.empty:
has_data = True
# 确保日期升序排序
current_period_data = current_period_data.sort_values('date')
# 生成相对天数以date_range_start为基准
current_period_data['day_offset'] = (current_period_data['date'] - date_range_start).dt.days
print(f"当前期间日期: {current_period_data['date'].tolist()}")
print(f"当前期间相对天数: {current_period_data['day_offset'].tolist()}")
print(f"当前期间销量: {current_period_data['sales'].tolist()}")
history_ax.plot(
current_period_data['day_offset'],
current_period_data['sales'],
'r-',
marker='o',
linewidth=2.5,
markersize=7,
label=f"当前期间 ({date_range_start.strftime('%Y-%m-%d')}{date_range_end.strftime('%Y-%m-%d')})"
)
# 标记预测起始日期
current_center_point = current_period_data[current_period_data['date'] == start_date_obj]
if not current_center_point.empty:
history_ax.scatter(
current_center_point['day_offset'],
current_center_point['sales'],
color='red',
s=150,
marker='*',
zorder=10,
label=f"预测起始日 ({start_date_obj.strftime('%Y-%m-%d')})"
)
if not last_year_period_data.empty:
has_data = True
# 确保日期升序排序
last_year_period_data = last_year_period_data.sort_values('date')
last_year_period_data['day_offset'] = (last_year_period_data['date'] - last_year_start).dt.days
print(f"去年同期日期: {last_year_period_data['date'].tolist()}")
print(f"去年同期相对天数: {last_year_period_data['day_offset'].tolist()}")
print(f"去年同期销量: {last_year_period_data['sales'].tolist()}")
history_ax.plot(
last_year_period_data['day_offset'],
last_year_period_data['sales'],
'b-',
marker='s',
linewidth=2,
markersize=6,
label=f"去年同期 ({last_year_start.strftime('%Y-%m-%d')}{last_year_end.strftime('%Y-%m-%d')})"
)
if not last_month_period_data.empty:
has_data = True
# 确保日期升序排序
last_month_period_data = last_month_period_data.sort_values('date')
last_month_period_data['day_offset'] = (last_month_period_data['date'] - last_month_start).dt.days
print(f"上月同期日期: {last_month_period_data['date'].tolist()}")
print(f"上月同期相对天数: {last_month_period_data['day_offset'].tolist()}")
print(f"上月同期销量: {last_month_period_data['sales'].tolist()}")
history_ax.plot(
last_month_period_data['day_offset'],
last_month_period_data['sales'],
'g-',
marker='^',
linewidth=2,
markersize=6,
label=f"上月同期 ({last_month_start.strftime('%Y-%m-%d')}{last_month_end.strftime('%Y-%m-%d')})"
)
# 设置X轴标签为相对天数
days_labels = list(range(7))
days_offsets = list(range(7))
day_names = [(date_range_start + pd.Timedelta(days=d)).strftime('%m-%d') for d in range(7)]
history_ax.set_xticks(days_offsets)
history_ax.set_xticklabels(day_names)
# 添加垂直参考线标记预测起始日
history_ax.axvline(x=days_before, color='red', linestyle='--', alpha=0.5)
# 美化图表
history_ax.set_title(f'{product_name} - 同期销量趋势对比 (7天)', fontsize=14, fontweight='bold')
history_ax.set_xlabel('日期', fontsize=12)
history_ax.set_ylabel('销量', fontsize=12)
history_ax.grid(True, linestyle='--', alpha=0.7)
# 设置Y轴从0开始
history_ax.set_ylim(bottom=0)
# 添加预测起始日标记区域
history_ax.axvspan(days_before-0.2, days_before+0.2, color='lightyellow', alpha=0.3)
# 只有在有标签的图表元素存在时才添加图例
if has_data and (not current_period_data.empty or not last_year_period_data.empty or not last_month_period_data.empty):
history_ax.legend(loc='upper left', frameon=True, fancybox=True, shadow=True)
# 如果所有数据集都为空,显示提示
if not has_data:
history_ax.text(0.5, 0.5, '没有找到可比较的同期数据',
horizontalalignment='center', verticalalignment='center',
transform=history_ax.transAxes, fontsize=14)
plt.tight_layout()
# 强制重新绘制图表
history_fig.canvas.draw()
# 保存历史趋势图表
history_chart = f'{output_dir}/history_{start_date_str}.png'
plt.savefig(history_chart, dpi=120) # 增加DPI提高图像质量
print(f"历史趋势图表已保存为: {history_chart}")
# 关闭图表以释放内存
plt.close(history_fig)
except Exception as e:
import traceback
print(f"生成历史趋势图时出错: {e}")
traceback.print_exc()
history_chart = None
# 创建一个包含历史和预测数据的完整DataFrame供CSV导出和API返回
history_df['data_type'] = '历史销量'
predictions_df = pd.DataFrame({
'date': future_dates,
'sales': y_pred,
'data_type': '预测销量',
'product_id': product_id,
'product_name': product_name
})
# 合并历史和预测数据
complete_df = pd.concat([
history_df[['date', 'sales', 'data_type']].assign(product_id=product_id, product_name=product_name),
predictions_df
]).sort_values('date')
except Exception as e:
import traceback
print(f"生成预测图表时出错: {e}")
traceback.print_exc()
forecast_chart = None
history_chart = None
# 出错时仍然创建预测数据
predictions_df = pd.DataFrame({
'date': future_dates,
'sales': y_pred,
'data_type': '预测销量',
'product_id': product_id,
'product_name': product_name
})
complete_df = predictions_df
# 保存预测结果到CSV
try:
forecast_csv = f'{output_dir}/forecast_{start_date_str}_days{future_days}.csv'
complete_df.to_csv(forecast_csv, index=False)
print(f"预测结果已保存到: {forecast_csv}")
except Exception as e:
print(f"保存CSV文件时出错: {e}")
forecast_csv = None
# 返回文件路径信息和预测数据
result = {
'predictions_df': complete_df, # 返回包含历史数据的完整DataFrame
'chart_path': forecast_chart,
'history_chart_path': history_chart,
'csv_path': forecast_csv
}
# 在函数末尾添加
analysis = None
if analyze_result:
analysis = analyze_prediction_result(product_id, model_type, y_pred, features=None)
return result, analysis
# 特定加载KAN模型并预测的函数
def load_kan_model_and_predict(product_id, future_days=7):
"""
加载KAN模型并进行未来销量预测是load_model_and_predict的简化版本固定模型类型为'kan'
Args:
product_id: 产品ID
future_days: 预测未来天数默认7天
Returns:
预测结果字典
"""
return load_model_and_predict(product_id, model_type='kan', future_days=future_days)
class PharmacyPredictor:
def __init__(self, data_path='pharmacy_sales.xlsx', model_dir='saved_models'):
self.data_path = data_path
self.model_dir = model_dir
self.device = get_device()
if not os.path.exists(model_dir):
os.makedirs(model_dir)
print(f"Using device: {self.device}")
# 加载数据
if os.path.exists(data_path):
self.data = pd.read_excel(data_path)
print(f"Loaded data from {data_path}")
else:
print(f"Data file {data_path} not found. Please generate data first.")
self.data = None
# ... 保留原有方法 ...
# 修改train_model方法添加优化版KAN模型选项
def train_model(self, product_id, model_type='transformer', epochs=100, batch_size=32,
learning_rate=0.001, sequence_length=30, forecast_horizon=7,
hidden_size=64, num_layers=2, dropout=0.1, use_optimized=False):
"""
训练预测模型
参数:
product_id: 产品ID
model_type: 模型类型 ('transformer', 'slstm', 'mlstm', 'kan')
epochs: 训练轮数
batch_size: 批次大小
learning_rate: 学习率
sequence_length: 输入序列长度
forecast_horizon: 预测天数
hidden_size: 隐藏层大小
num_layers: 层数
dropout: Dropout比例
use_optimized: 是否使用优化版KAN模型仅对model_type='kan'有效)
"""
if self.data is None:
print("No data available. Please load or generate data first.")
return None
# 获取特定产品数据
product_data = self.data[self.data['product_id'] == product_id].copy()
if product_data.empty:
print(f"No data found for product {product_id}")
return None
# 准备数据
X, y, X_train, X_val, y_train, y_val, scaler_X, scaler_y = prepare_data(
product_data, sequence_length, forecast_horizon
)
# 准备序列数据
train_loader = prepare_sequences(X_train, y_train, batch_size)
val_loader = prepare_sequences(X_val, y_val, batch_size)
input_dim = X.shape[2]
output_dim = y.shape[1]
# 选择模型
if model_type == 'transformer':
model = TimeSeriesTransformer(
input_dim=input_dim,
hidden_dim=hidden_size,
output_dim=output_dim,
num_layers=num_layers,
dropout=dropout
).to(self.device)
print("Using Transformer model")
elif model_type == 'slstm':
model = ScalarLSTM(
input_dim=input_dim,
hidden_dim=hidden_size,
output_dim=output_dim,
num_layers=num_layers,
dropout=dropout
).to(self.device)
print("Using Scalar LSTM model")
elif model_type == 'mlstm':
model = MatrixLSTM(
input_dim=input_dim,
hidden_dim=hidden_size,
matrix_dim=8, # 矩阵维度
output_dim=output_dim,
num_layers=num_layers,
dropout=dropout
).to(self.device)
print("Using Matrix LSTM model")
elif model_type == 'kan':
if use_optimized:
model = OptimizedKANForecaster(
input_features=input_dim,
hidden_sizes=[hidden_size, hidden_size*2, hidden_size],
output_sequence_length=output_dim,
grid_size=5,
spline_order=3,
dropout_rate=dropout
).to(self.device)
print("Using Optimized KAN Forecaster model")
else:
model = KANForecaster(
input_features=input_dim,
hidden_sizes=[hidden_size, hidden_size*2, hidden_size],
output_sequence_length=output_dim,
grid_size=5,
spline_order=3,
dropout_rate=dropout
).to(self.device)
print("Using KAN Forecaster model")
else:
raise ValueError(f"Unknown model type: {model_type}")
# 优化器和损失函数
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()
# 训练模型
best_val_loss = float('inf')
best_model = None
patience = 10
counter = 0
train_losses = []
val_losses = []
start_time = time.time()
for epoch in range(epochs):
model.train()
train_loss = 0
with tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}") as pbar:
for X_batch, y_batch in pbar:
X_batch = X_batch.to(self.device)
y_batch = y_batch.to(self.device)
optimizer.zero_grad()
# 前向传播
if model_type == 'kan':
outputs = model(X_batch)
# 添加KAN的正则化损失
reg_loss = model.regularization_loss() * 0.01
loss = criterion(outputs, y_batch) + reg_loss
else:
outputs = model(X_batch)
loss = criterion(outputs, y_batch)
# 反向传播和优化
loss.backward()
optimizer.step()
train_loss += loss.item()
pbar.set_postfix({'loss': loss.item()})
train_loss /= len(train_loader)
train_losses.append(train_loss)
# 验证
model.eval()
val_loss = 0
with torch.no_grad():
for X_batch, y_batch in val_loader:
X_batch = X_batch.to(self.device)
y_batch = y_batch.to(self.device)
outputs = model(X_batch)
loss = criterion(outputs, y_batch)
val_loss += loss.item()
val_loss /= len(val_loader)
val_losses.append(val_loss)
print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
# 早停
if val_loss < best_val_loss:
best_val_loss = val_loss
best_model = model.state_dict()
counter = 0
else:
counter += 1
if counter >= patience:
print(f"Early stopping at epoch {epoch+1}")
break
training_time = time.time() - start_time
print(f"Training completed in {training_time:.2f} seconds")
# 加载最佳模型
model.load_state_dict(best_model)
# 评估模型
model.eval()
with torch.no_grad():
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(self.device)
y_pred = model(X_val_tensor).cpu().numpy()
y_val_np = y_val_tensor.cpu().numpy()
# 反归一化
y_pred = scaler_y.inverse_transform(y_pred)
y_val_np = scaler_y.inverse_transform(y_val_np)
# 计算评估指标
mse = mean_squared_error(y_val_np, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_val_np, y_pred)
r2 = r2_score(y_val_np.flatten(), y_pred.flatten())
# 计算MAPE
mask = y_val_np != 0 # 避免除以零
mape = np.mean(np.abs((y_val_np[mask] - y_pred[mask]) / y_val_np[mask])) * 100
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"R²: {r2:.4f}")
print(f"MAPE: {mape:.2f}%")
# 保存模型和指标
model_suffix = "_optimized" if (model_type == 'kan' and use_optimized) else ""
model_path = os.path.join(self.model_dir, f"{product_id}_{model_type}{model_suffix}.pt")
# 保存模型
torch.save({
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'scaler_X': scaler_X,
'scaler_y': scaler_y,
'metrics': {
'mse': mse,
'rmse': rmse,
'mae': mae,
'r2': r2,
'mape': mape
},
'params': {
'model_type': model_type,
'use_optimized': use_optimized if model_type == 'kan' else False,
'sequence_length': sequence_length,
'forecast_horizon': forecast_horizon,
'hidden_size': hidden_size,
'num_layers': num_layers,
'dropout': dropout
},
'training_time': training_time,
'train_losses': train_losses,
'val_losses': val_losses
}, model_path)
print(f"Model saved to {model_path}")
# 绘制损失曲线
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title(f'Training and Validation Loss for {product_id} ({model_type}{model_suffix})')
plt.legend()
plt.grid(True)
# 保存损失曲线图
loss_plot_path = os.path.join(self.model_dir, f"{product_id}_{model_type}{model_suffix}_loss.png")
plt.savefig(loss_plot_path)
plt.close()
# 绘制预测结果
plt.figure(figsize=(12, 6))
# 选择最后30个点进行可视化
n_points = min(30, len(y_val_np))
x_axis = np.arange(n_points)
for i in range(min(3, y_val_np.shape[1])):
plt.subplot(min(3, y_val_np.shape[1]), 1, i+1)
plt.plot(x_axis, y_val_np[-n_points:, i], 'b-', label=f'Actual Day {i+1}')
plt.plot(x_axis, y_pred[-n_points:, i], 'r--', label=f'Predicted Day {i+1}')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.suptitle(f'Prediction Results for {product_id} ({model_type}{model_suffix})')
plt.subplots_adjust(top=0.9)
# 保存预测结果图
pred_plot_path = os.path.join(self.model_dir, f"{product_id}_{model_type}{model_suffix}_pred.png")
plt.savefig(pred_plot_path)
plt.close()
# 返回评估指标
return {
'mse': mse,
'rmse': rmse,
'mae': mae,
'r2': r2,
'mape': mape,
'training_time': training_time
}
# ... 保留其他方法 ...
# 添加一个新方法专门用于使用优化版KAN模型进行训练
def train_optimized_kan_model(self, product_id, epochs=100, batch_size=32,
learning_rate=0.001, sequence_length=30, forecast_horizon=7,
hidden_size=64, num_layers=2, dropout=0.1):
"""
使用优化版KAN模型进行训练
参数:
product_id: 产品ID
epochs: 训练轮数
batch_size: 批次大小
learning_rate: 学习率
sequence_length: 输入序列长度
forecast_horizon: 预测天数
hidden_size: 隐藏层大小
num_layers: 层数
dropout: Dropout比例
"""
return self.train_model(
product_id=product_id,
model_type='kan',
epochs=epochs,
batch_size=batch_size,
learning_rate=learning_rate,
sequence_length=sequence_length,
forecast_horizon=forecast_horizon,
hidden_size=hidden_size,
num_layers=num_layers,
dropout=dropout,
use_optimized=True # 使用优化版KAN模型
)
# 添加比较原始KAN和优化KAN的方法
def compare_kan_models(self, product_id, epochs=100, batch_size=32,
learning_rate=0.001, sequence_length=30, forecast_horizon=7,
hidden_size=64, num_layers=2, dropout=0.1):
"""
比较原始KAN模型和优化版KAN模型的性能
参数:
product_id: 产品ID
epochs: 训练轮数
batch_size: 批次大小
learning_rate: 学习率
sequence_length: 输入序列长度
forecast_horizon: 预测天数
hidden_size: 隐藏层大小
num_layers: 层数
dropout: Dropout比例
"""
print("Training original KAN model...")
original_metrics = self.train_model(
product_id=product_id,
model_type='kan',
epochs=epochs,
batch_size=batch_size,
learning_rate=learning_rate,
sequence_length=sequence_length,
forecast_horizon=forecast_horizon,
hidden_size=hidden_size,
num_layers=num_layers,
dropout=dropout,
use_optimized=False
)
print("\nTraining optimized KAN model...")
optimized_metrics = self.train_model(
product_id=product_id,
model_type='kan',
epochs=epochs,
batch_size=batch_size,
learning_rate=learning_rate,
sequence_length=sequence_length,
forecast_horizon=forecast_horizon,
hidden_size=hidden_size,
num_layers=num_layers,
dropout=dropout,
use_optimized=True
)
# 计算性能提升百分比
improvement = {
'mse': (original_metrics['mse'] - optimized_metrics['mse']) / original_metrics['mse'] * 100,
'rmse': (original_metrics['rmse'] - optimized_metrics['rmse']) / original_metrics['rmse'] * 100,
'mae': (original_metrics['mae'] - optimized_metrics['mae']) / original_metrics['mae'] * 100,
'r2': (optimized_metrics['r2'] - original_metrics['r2']) / abs(original_metrics['r2']) * 100 if original_metrics['r2'] != 0 else float('inf'),
'mape': (original_metrics['mape'] - optimized_metrics['mape']) / original_metrics['mape'] * 100,
'training_time': (original_metrics['training_time'] - optimized_metrics['training_time']) / original_metrics['training_time'] * 100
}
# 打印比较结果
print("\n===== KAN Models Comparison =====")
print(f"Product ID: {product_id}")
print("\nMetrics:")
print(f"{'Metric':<15} {'Original KAN':<15} {'Optimized KAN':<15} {'Improvement':<15}")
print("-" * 60)
print(f"{'MSE':<15} {original_metrics['mse']:<15.4f} {optimized_metrics['mse']:<15.4f} {improvement['mse']:<15.2f}%")
print(f"{'RMSE':<15} {original_metrics['rmse']:<15.4f} {optimized_metrics['rmse']:<15.4f} {improvement['rmse']:<15.2f}%")
print(f"{'MAE':<15} {original_metrics['mae']:<15.4f} {optimized_metrics['mae']:<15.4f} {improvement['mae']:<15.2f}%")
print(f"{'':<15} {original_metrics['r2']:<15.4f} {optimized_metrics['r2']:<15.4f} {improvement['r2']:<15.2f}%")
print(f"{'MAPE':<15} {original_metrics['mape']:<15.2f}% {optimized_metrics['mape']:<15.2f}% {improvement['mape']:<15.2f}%")
print(f"{'Training Time':<15} {original_metrics['training_time']:<15.2f}s {optimized_metrics['training_time']:<15.2f}s {improvement['training_time']:<15.2f}%")
# 保存比较结果
comparison_path = os.path.join(self.model_dir, f"{product_id}_kan_comparison.json")
comparison_results = {
'product_id': product_id,
'original_kan': original_metrics,
'optimized_kan': optimized_metrics,
'improvement': improvement,
'params': {
'epochs': epochs,
'batch_size': batch_size,
'learning_rate': learning_rate,
'sequence_length': sequence_length,
'forecast_horizon': forecast_horizon,
'hidden_size': hidden_size,
'num_layers': num_layers,
'dropout': dropout
}
}
with open(comparison_path, 'w') as f:
json.dump(comparison_results, f, indent=4)
print(f"\nComparison results saved to {comparison_path}")
# 绘制比较图表
self._plot_model_comparison(product_id, original_metrics, optimized_metrics)
return comparison_results
def _plot_model_comparison(self, product_id, original_metrics, optimized_metrics):
"""
绘制原始KAN和优化KAN的性能比较图表
"""
metrics = ['mse', 'rmse', 'mae', 'r2', 'mape']
metric_names = ['MSE', 'RMSE', 'MAE', '', 'MAPE (%)']
# 准备数据
original_values = [original_metrics[m] for m in metrics]
optimized_values = [optimized_metrics[m] for m in metrics]
# 绘制条形图
plt.figure(figsize=(14, 8))
x = np.arange(len(metrics))
width = 0.35
plt.bar(x - width/2, original_values, width, label='Original KAN')
plt.bar(x + width/2, optimized_values, width, label='Optimized KAN')
plt.xlabel('Metrics')
plt.ylabel('Values')
plt.title(f'KAN Models Performance Comparison for {product_id}')
plt.xticks(x, metric_names)
plt.legend()
plt.grid(True, axis='y', linestyle='--', alpha=0.7)
# 添加数值标签
for i, v in enumerate(original_values):
plt.text(i - width/2, v + 0.01, f'{v:.4f}', ha='center', va='bottom', fontsize=9)
for i, v in enumerate(optimized_values):
plt.text(i + width/2, v + 0.01, f'{v:.4f}', ha='center', va='bottom', fontsize=9)
# 保存图表
comparison_plot_path = os.path.join(self.model_dir, f"{product_id}_kan_comparison.png")
plt.savefig(comparison_plot_path)
plt.close()
# 绘制训练时间比较
plt.figure(figsize=(10, 6))
models = ['Original KAN', 'Optimized KAN']
times = [original_metrics['training_time'], optimized_metrics['training_time']]
plt.bar(models, times, color=['blue', 'green'])
plt.xlabel('Model')
plt.ylabel('Training Time (seconds)')
plt.title(f'Training Time Comparison for {product_id}')
plt.grid(True, axis='y', linestyle='--', alpha=0.7)
# 添加数值标签
for i, v in enumerate(times):
plt.text(i, v + 0.1, f'{v:.2f}s', ha='center', va='bottom')
# 保存图表
time_plot_path = os.path.join(self.model_dir, f"{product_id}_kan_training_time.png")
plt.savefig(time_plot_path)
plt.close()
print(f"Comparison plots saved to {comparison_plot_path} and {time_plot_path}")
# 预测结果分析模块
def analyze_prediction_result(product_id, model_type, predictions, features=None):
"""
分析预测结果,提供解释和洞察
Args:
product_id: 产品ID
model_type: 模型类型
predictions: 预测结果
features: 预测使用的特征(可选)
Returns:
analysis: 预测分析结果字典
"""
print(f"分析产品 {product_id}{model_type} 模型预测结果...")
# 加载历史数据
df = pd.read_excel('pharmacy_sales.xlsx')
product_df = df[df['product_id'] == product_id].sort_values('date')
# 准备分析结果
analysis = {
'trend': analyze_trend(predictions),
'statistics': calculate_prediction_statistics(predictions),
'historical_comparison': compare_with_historical(predictions, product_df),
'factors': analyze_influencing_factors(product_id, model_type, predictions, features)
}
# 生成文本解释
analysis['explanation'] = generate_prediction_explanation(analysis, product_id, model_type)
return analysis
def analyze_trend(predictions):
"""分析预测趋势"""
if len(predictions) < 2:
return 'unknown'
# 计算趋势线斜率
x = np.arange(len(predictions))
slope, _, _, _, _ = stats.linregress(x, predictions)
# 判断趋势
if slope > 0.05:
return 'increasing'
elif slope < -0.05:
return 'decreasing'
else:
# 检查波动性
std_dev = np.std(predictions)
mean_val = np.mean(predictions)
cv = std_dev / mean_val if mean_val != 0 else 0
if cv > 0.1: # 变异系数大于10%
return 'fluctuating'
else:
return 'stable'
def calculate_prediction_statistics(predictions):
"""计算预测统计数据"""
return {
'mean': float(np.mean(predictions)),
'median': float(np.median(predictions)),
'min': float(np.min(predictions)),
'max': float(np.max(predictions)),
'std': float(np.std(predictions)),
'day_over_day_changes': calculate_day_over_day_changes(predictions)
}
def calculate_day_over_day_changes(predictions):
"""计算日环比变化"""
changes = []
for i in range(1, len(predictions)):
if predictions[i-1] != 0:
pct_change = (predictions[i] - predictions[i-1]) / predictions[i-1] * 100
else:
pct_change = 0
changes.append(float(pct_change))
return changes
def compare_with_historical(predictions, product_df):
"""与历史数据比较"""
historical_sales = product_df['sales'].values[-len(predictions):] if len(product_df) >= len(predictions) else []
result = {
'has_historical_data': len(historical_sales) > 0
}
if result['has_historical_data']:
# 计算同期历史数据的统计信息
result['historical_mean'] = float(np.mean(historical_sales))
result['historical_median'] = float(np.median(historical_sales))
result['historical_min'] = float(np.min(historical_sales))
result['historical_max'] = float(np.max(historical_sales))
# 计算预测值与历史值的差异
if result['historical_mean'] != 0:
result['mean_difference_pct'] = float((np.mean(predictions) - result['historical_mean']) / result['historical_mean'] * 100)
else:
result['mean_difference_pct'] = 0
return result
def analyze_influencing_factors(product_id, model_type, predictions, features=None):
"""分析影响预测的因素"""
# 这里是简化实现,实际应用中可以基于模型特性提取更详细的影响因素
factors = []
# 基于模型类型添加通用因素
if model_type == 'transformer':
factors.append({
'name': '时间序列模式',
'importance': 'high',
'description': 'Transformer模型擅长捕捉时间序列中的长期依赖关系和复杂模式'
})
elif model_type == 'mlstm':
factors.append({
'name': '序列记忆',
'importance': 'high',
'description': '矩阵LSTM模型具有强大的序列记忆能力能够捕捉时间序列中的长期依赖'
})
elif 'kan' in model_type:
factors.append({
'name': '非线性关系',
'importance': 'high',
'description': 'KAN模型擅长学习复杂的非线性映射关系适合捕捉销售数据中的复杂模式'
})
# 添加通用影响因素
factors.extend([
{
'name': '季节性模式',
'importance': 'medium',
'description': '销售数据通常表现出季节性波动,如周末销量高于工作日'
},
{
'name': '历史趋势',
'importance': 'high',
'description': '历史销售趋势是预测的重要基础'
}
])
# 如果提供了特征,可以进行更详细的分析
if features is not None and isinstance(features, dict):
if 'is_promotion' in features and features['is_promotion']:
factors.append({
'name': '促销活动',
'importance': 'high',
'description': '促销活动通常会显著提升销量'
})
if 'is_holiday' in features and features['is_holiday']:
factors.append({
'name': '节假日',
'importance': 'medium',
'description': '节假日期间销售模式可能与平日不同'
})
if 'temperature' in features:
factors.append({
'name': '温度因素',
'importance': 'medium',
'description': '天气温度可能影响客流量和特定药品需求'
})
return factors
def generate_prediction_explanation(analysis, product_id, model_type):
"""生成预测解释文本"""
explanation = []
# 添加趋势解释
trend = analysis['trend']
if trend == 'increasing':
explanation.append(f"预测显示产品 {product_id} 未来销量呈上升趋势,平均日环比增长 {np.mean(analysis['statistics']['day_over_day_changes']):.2f}%。")
elif trend == 'decreasing':
explanation.append(f"预测显示产品 {product_id} 未来销量呈下降趋势,平均日环比下降 {abs(np.mean(analysis['statistics']['day_over_day_changes'])):.2f}%。")
elif trend == 'fluctuating':
explanation.append(f"预测显示产品 {product_id} 未来销量有波动,但无明显上升或下降趋势。")
else:
explanation.append(f"预测显示产品 {product_id} 未来销量相对稳定。")
# 添加峰值和低谷解释
stats = analysis['statistics']
explanation.append(f"预测期内最高销量为 {stats['max']:.2f},最低销量为 {stats['min']:.2f},平均销量为 {stats['mean']:.2f}")
# 添加与历史数据比较
hist_comp = analysis['historical_comparison']
if hist_comp['has_historical_data']:
if hist_comp['mean_difference_pct'] > 10:
explanation.append(f"预测销量比历史同期高 {hist_comp['mean_difference_pct']:.2f}%,可能需要增加库存。")
elif hist_comp['mean_difference_pct'] < -10:
explanation.append(f"预测销量比历史同期低 {abs(hist_comp['mean_difference_pct']):.2f}%,建议适当减少库存。")
else:
explanation.append("预测销量与历史同期相近,可参考历史库存水平。")
# 添加影响因素解释
factors = analysis['factors']
if factors:
high_importance_factors = [f for f in factors if f['importance'] == 'high']
if high_importance_factors:
factor_names = [f['name'] for f in high_importance_factors[:2]]
explanation.append(f"主要影响因素包括: {', '.join(factor_names)}")
# 添加模型特性解释
if model_type == 'transformer':
explanation.append("Transformer模型善于捕捉时间序列中的长期依赖关系预测结果更注重整体趋势。")
elif model_type == 'mlstm':
explanation.append("矩阵LSTM模型结合了序列记忆和注意力机制对时间序列中的突变点有较好的适应性。")
elif model_type == 'kan':
explanation.append("KAN模型利用B样条基函数自适应学习非线性关系对复杂非线性模式有较强的表达能力。")
elif model_type == 'optimized_kan':
explanation.append("优化版KAN模型在保持预测精度的同时显著降低了内存占用并提高了训练速度。")
return " ".join(explanation)
if __name__ == "__main__":
# 首先生成测试数据
try:
print("正在检查是否存在模拟数据...")
df = pd.read_excel('pharmacy_sales.xlsx')
print("发现现有数据,跳过数据生成步骤。")
except:
print("未找到数据,正在生成模拟数据...")
import generate_pharmacy_data
print("数据生成完成!")
# 读取数据获取所有产品ID
df = pd.read_excel('pharmacy_sales.xlsx')
product_ids = df['product_id'].unique()
# 为每个产品训练一个模型
all_metrics = {}
for product_id in product_ids:
print(f"\n{'='*50}")
print(f"开始训练产品 {product_id} 的模型")
print(f"{'='*50}")
_, metrics = train_product_model(product_id, epochs=epochs)
all_metrics[product_id] = metrics
# 输出所有产品的评估指标
print("\n所有产品模型评估结果汇总:")
for product_id, metrics in all_metrics.items():
product_name = df[df['product_id'] == product_id]['product_name'].iloc[0]
print(f"\n{product_name} (ID: {product_id}):")
for metric, value in metrics.items():
print(f" {metric}: {value:.4f}")
print("\n模型训练和评估完成!")