
1. 修复前端图表日期排序问题: - 改进 PredictionView.vue 和 HistoryView.vue 中的图表渲染逻辑 - 确保历史数据和预测数据按照正确的日期顺序显示 2. 修复后端API处理: - 解决 optimized_kan 模型类型的路径映射问题 - 添加 JSON 序列化器处理 Pandas Timestamp 对象 - 改进预测数据与历史数据的衔接处理 3. 优化图表样式和用户体验
2353 lines
92 KiB
Python
2353 lines
92 KiB
Python
import pandas as pd
|
||
import numpy as np
|
||
import torch
|
||
import torch.nn as nn
|
||
import torch.optim as optim
|
||
from torch.utils.data import DataLoader, TensorDataset
|
||
import matplotlib
|
||
# 设置matplotlib后端为Agg,适用于无头服务器环境
|
||
matplotlib.use('Agg')
|
||
import matplotlib.pyplot as plt
|
||
from sklearn.preprocessing import MinMaxScaler
|
||
from sklearn.model_selection import train_test_split
|
||
import os
|
||
from datetime import datetime
|
||
import json
|
||
import torch.serialization
|
||
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
|
||
from models.transformer_model import TimeSeriesTransformer
|
||
from models.slstm_model import sLSTM as ScalarLSTM
|
||
from models.mlstm_model import MLSTMTransformer as MatrixLSTM
|
||
from models.kan_model import KANForecaster
|
||
from models.optimized_kan_forecaster import OptimizedKANForecaster # 导入优化版KAN模型
|
||
from models.data_utils import prepare_data, prepare_sequences, create_dataset, evaluate_model, PharmacyDataset
|
||
import torch.nn.functional as F
|
||
from models.utils import get_device, to_device, DeviceDataLoader
|
||
from tqdm import tqdm
|
||
import time
|
||
import scipy.stats as stats
|
||
|
||
# 解决画图中文显示问题
|
||
plt.rcParams['font.sans-serif'] = ['SimHei']
|
||
plt.rcParams['axes.unicode_minus'] = False
|
||
|
||
# 获取设备(GPU或CPU)
|
||
device = get_device()
|
||
print(f"使用设备: {device}")
|
||
|
||
# 全局参数设置
|
||
look_back = 14 # 使用过去14天数据
|
||
T = 7 # 预测未来7天销量
|
||
epochs = 50 # 训练轮次
|
||
num_features = 8 # 输入特征数
|
||
embed_dim = 32 # 嵌入维度
|
||
dense_dim = 32 # 隐藏层神经元数
|
||
num_heads = 4 # 注意力头数
|
||
dropout_rate = 0.1 # 丢弃率
|
||
num_blocks = 3 # 编码器解码器数
|
||
learn_rate = 0.001 # 学习率
|
||
batch_size = 32 # 批大小
|
||
|
||
# 默认训练函数 - 使用mLSTM作为默认模型
|
||
def train_product_model(product_id, epochs=50):
|
||
"""
|
||
默认的产品销售预测模型训练函数,使用mLSTM作为默认模型
|
||
|
||
Args:
|
||
product_id: 产品ID
|
||
epochs: 训练轮次
|
||
|
||
Returns:
|
||
model: 训练好的模型
|
||
metrics: 模型评估指标
|
||
"""
|
||
return train_product_model_with_mlstm(product_id, epochs)
|
||
|
||
# 使用mLSTM模型训练的函数
|
||
def train_product_model_with_mlstm(product_id, epochs=50):
|
||
# 读取生成的药店销售数据
|
||
df = pd.read_excel('pharmacy_sales.xlsx')
|
||
|
||
# 筛选特定产品数据
|
||
product_df = df[df['product_id'] == product_id].sort_values('date')
|
||
product_name = product_df['product_name'].iloc[0]
|
||
|
||
print(f"使用mLSTM模型训练产品 '{product_name}' (ID: {product_id}) 的销售预测模型")
|
||
print(f"使用设备: {device}")
|
||
|
||
# 创建特征和目标变量
|
||
features = ['sales', 'price', 'weekday', 'month', 'is_holiday', 'is_weekend', 'is_promotion', 'temperature']
|
||
|
||
# 预处理数据
|
||
X = product_df[features].values
|
||
y = product_df[['sales']].values # 保持为二维数组
|
||
|
||
# 归一化数据
|
||
scaler_X = MinMaxScaler(feature_range=(0, 1))
|
||
scaler_y = MinMaxScaler(feature_range=(0, 1))
|
||
|
||
X_scaled = scaler_X.fit_transform(X)
|
||
y_scaled = scaler_y.fit_transform(y)
|
||
|
||
# 划分训练集和测试集(80% 训练,20% 测试)
|
||
train_size = int(len(X_scaled) * 0.8)
|
||
X_train, X_test = X_scaled[:train_size], X_scaled[train_size:]
|
||
y_train, y_test = y_scaled[:train_size], y_scaled[train_size:]
|
||
|
||
# 创建时间序列数据
|
||
trainX, trainY = create_dataset(X_train, y_train, look_back, T)
|
||
testX, testY = create_dataset(X_test, y_test, look_back, T)
|
||
|
||
# 转换为PyTorch的Tensor
|
||
trainX_tensor = torch.Tensor(trainX)
|
||
trainY_tensor = torch.Tensor(trainY)
|
||
testX_tensor = torch.Tensor(testX)
|
||
testY_tensor = torch.Tensor(testY)
|
||
|
||
# 创建数据加载器
|
||
train_dataset = PharmacyDataset(trainX_tensor, trainY_tensor)
|
||
test_dataset = PharmacyDataset(testX_tensor, testY_tensor)
|
||
|
||
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
|
||
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
|
||
|
||
# 将数据加载器包装到设备加载器中
|
||
train_loader = DeviceDataLoader(train_loader, device)
|
||
test_loader = DeviceDataLoader(test_loader, device)
|
||
|
||
# 初始化mLSTM结合Transformer模型
|
||
model = MLSTMTransformer(
|
||
num_features=num_features,
|
||
hidden_size=128,
|
||
mlstm_layers=1,
|
||
embed_dim=embed_dim,
|
||
dense_dim=dense_dim,
|
||
num_heads=num_heads,
|
||
dropout_rate=dropout_rate,
|
||
num_blocks=num_blocks,
|
||
output_sequence_length=T
|
||
)
|
||
|
||
# 将模型移动到设备上
|
||
model = model.to(device)
|
||
|
||
criterion = nn.MSELoss()
|
||
optimizer = optim.Adam(model.parameters(), lr=learn_rate)
|
||
|
||
# 训练模型
|
||
train_losses = []
|
||
test_losses = []
|
||
|
||
for epoch in range(epochs):
|
||
model.train()
|
||
epoch_loss = 0
|
||
for X_batch, y_batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}", leave=False):
|
||
# 前向传播
|
||
outputs = model(X_batch)
|
||
loss = criterion(outputs.squeeze(-1), y_batch)
|
||
|
||
# 反向传播和优化
|
||
optimizer.zero_grad()
|
||
loss.backward()
|
||
optimizer.step()
|
||
|
||
epoch_loss += loss.item()
|
||
|
||
# 计算训练损失
|
||
train_loss = epoch_loss / len(train_loader)
|
||
train_losses.append(train_loss)
|
||
|
||
# 在测试集上评估
|
||
model.eval()
|
||
test_loss = 0
|
||
with torch.no_grad():
|
||
for X_batch, y_batch in test_loader:
|
||
outputs = model(X_batch)
|
||
loss = criterion(outputs.squeeze(-1), y_batch)
|
||
test_loss += loss.item()
|
||
|
||
test_loss = test_loss / len(test_loader)
|
||
test_losses.append(test_loss)
|
||
|
||
if (epoch + 1) % 10 == 0:
|
||
print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}")
|
||
|
||
# 绘制损失曲线
|
||
plt.figure(figsize=(10, 5))
|
||
plt.plot(train_losses, label='训练损失')
|
||
plt.plot(test_losses, label='测试损失')
|
||
plt.title(f'{product_name} - mLSTM模型训练和测试损失')
|
||
plt.xlabel('Epoch')
|
||
plt.ylabel('Loss')
|
||
plt.legend()
|
||
plt.grid(True)
|
||
plt.savefig(f'{product_id}_mlstm_loss_curve.png')
|
||
|
||
# 生成预测
|
||
model.eval()
|
||
with torch.no_grad():
|
||
# 将测试数据移动到设备上
|
||
testX_device = to_device(testX_tensor, device)
|
||
y_pred_scaled = model(testX_device).squeeze(-1).cpu().numpy()
|
||
|
||
# 反归一化预测结果
|
||
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).reshape(y_pred_scaled.shape)
|
||
y_true = scaler_y.inverse_transform(testY.reshape(-1, 1)).reshape(testY.shape)
|
||
|
||
# 评估模型
|
||
metrics = evaluate_model(y_true.flatten(), y_pred.flatten())
|
||
print(f"\n{product_name} mLSTM模型评估指标:")
|
||
for metric, value in metrics.items():
|
||
print(f"{metric}: {value:.4f}")
|
||
|
||
# 绘制预测结果
|
||
plt.figure(figsize=(12, 6))
|
||
|
||
# 获取测试集的实际日期
|
||
test_dates = product_df['date'].iloc[train_size + look_back:train_size + look_back + len(y_true)].values
|
||
|
||
# 只绘制最后30天的预测
|
||
days_to_plot = min(30, len(y_true))
|
||
start_idx = max(0, len(y_true) - days_to_plot)
|
||
|
||
plt.plot(test_dates[start_idx:], y_true[start_idx:, 0], 'b-', label='实际销量')
|
||
plt.plot(test_dates[start_idx:], y_pred[start_idx:, 0], 'r--', label='mLSTM预测销量')
|
||
|
||
plt.title(f'{product_name} - mLSTM销量预测结果')
|
||
plt.xlabel('日期')
|
||
plt.ylabel('销量')
|
||
plt.legend()
|
||
plt.grid(True)
|
||
plt.xticks(rotation=45)
|
||
plt.tight_layout()
|
||
plt.savefig(f'{product_id}_mlstm_prediction.png')
|
||
|
||
# 保存预测结果到CSV
|
||
all_dates = []
|
||
num_samples = len(y_true)
|
||
for i in range(num_samples):
|
||
start_idx = train_size + i + look_back
|
||
dates = product_df['date'].iloc[start_idx : start_idx + T]
|
||
all_dates.extend(dates)
|
||
|
||
# 修正日期长度不匹配的问题
|
||
flat_y_true = y_true.flatten()
|
||
flat_y_pred = y_pred.flatten()
|
||
min_len = min(len(all_dates), len(flat_y_true))
|
||
|
||
results_df = pd.DataFrame({
|
||
'date': all_dates[:min_len],
|
||
'actual_sales': flat_y_true[:min_len],
|
||
'predicted_sales': flat_y_pred[:min_len]
|
||
})
|
||
results_df.to_csv(f'{product_id}_mlstm_prediction_results.csv', index=False)
|
||
|
||
print(f"\nmLSTM预测结果已保存到 {product_id}_mlstm_prediction_results.csv")
|
||
|
||
# 创建models目录和子目录
|
||
model_dir = 'models/mlstm'
|
||
os.makedirs(model_dir, exist_ok=True)
|
||
|
||
# 保存模型
|
||
model_path = os.path.join(model_dir, f'{product_id}_model.pt')
|
||
torch.save({
|
||
'model_state_dict': model.state_dict(),
|
||
'optimizer_state_dict': optimizer.state_dict(),
|
||
'train_loss': train_losses,
|
||
'test_loss': test_losses,
|
||
'scaler_X': scaler_X,
|
||
'scaler_y': scaler_y,
|
||
'features': features,
|
||
'look_back': look_back,
|
||
'T': T,
|
||
'model_type': 'mlstm'
|
||
}, model_path)
|
||
print(f"模型已成功保存到 {model_path}")
|
||
|
||
# 保存日志文件
|
||
log_path = os.path.join(model_dir, f'{product_id}_log.json')
|
||
log_data = {
|
||
'product_id': product_id,
|
||
'product_name': product_name,
|
||
'model_type': 'mlstm',
|
||
'training_completed_at': datetime.now().isoformat(),
|
||
'epochs': epochs,
|
||
'metrics': metrics,
|
||
'file_path': model_path
|
||
}
|
||
with open(log_path, 'w', encoding='utf-8') as f:
|
||
json.dump(log_data, f, indent=4, ensure_ascii=False)
|
||
print(f"训练日志已保存到 {log_path}")
|
||
|
||
return model, metrics
|
||
|
||
# 使用KAN模型训练的函数
|
||
def train_product_model_with_kan(product_id, epochs=50, use_optimized=False):
|
||
# 读取生成的药店销售数据
|
||
df = pd.read_excel('pharmacy_sales.xlsx')
|
||
|
||
# 筛选特定产品数据
|
||
product_df = df[df['product_id'] == product_id].sort_values('date')
|
||
product_name = product_df['product_name'].iloc[0]
|
||
|
||
model_type = "优化版KAN" if use_optimized else "KAN"
|
||
print(f"使用{model_type}模型训练产品 '{product_name}' (ID: {product_id}) 的销售预测模型")
|
||
print(f"使用设备: {device}")
|
||
|
||
# 创建特征和目标变量
|
||
features = ['sales', 'price', 'weekday', 'month', 'is_holiday', 'is_weekend', 'is_promotion', 'temperature']
|
||
|
||
# 预处理数据
|
||
X = product_df[features].values
|
||
y = product_df[['sales']].values # 保持为二维数组
|
||
|
||
# 归一化数据
|
||
scaler_X = MinMaxScaler(feature_range=(0, 1))
|
||
scaler_y = MinMaxScaler(feature_range=(0, 1))
|
||
|
||
X_scaled = scaler_X.fit_transform(X)
|
||
y_scaled = scaler_y.fit_transform(y)
|
||
|
||
# 划分训练集和测试集(80% 训练,20% 测试)
|
||
train_size = int(len(X_scaled) * 0.8)
|
||
X_train, X_test = X_scaled[:train_size], X_scaled[train_size:]
|
||
y_train, y_test = y_scaled[:train_size], y_scaled[train_size:]
|
||
|
||
# 创建时间序列数据
|
||
trainX, trainY = create_dataset(X_train, y_train, look_back, T)
|
||
testX, testY = create_dataset(X_test, y_test, look_back, T)
|
||
|
||
# 转换为PyTorch的Tensor
|
||
trainX_tensor = torch.Tensor(trainX)
|
||
trainY_tensor = torch.Tensor(trainY)
|
||
testX_tensor = torch.Tensor(testX)
|
||
testY_tensor = torch.Tensor(testY)
|
||
|
||
# 创建数据加载器
|
||
train_dataset = PharmacyDataset(trainX_tensor, trainY_tensor)
|
||
test_dataset = PharmacyDataset(testX_tensor, testY_tensor)
|
||
|
||
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
|
||
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
|
||
|
||
# 将数据加载器包装到设备加载器中
|
||
train_loader = DeviceDataLoader(train_loader, device)
|
||
test_loader = DeviceDataLoader(test_loader, device)
|
||
|
||
# 初始化KAN模型
|
||
if use_optimized:
|
||
model = OptimizedKANForecaster(
|
||
input_features=num_features,
|
||
hidden_sizes=[64, 128, 64],
|
||
output_size=1,
|
||
grid_size=5,
|
||
spline_order=3,
|
||
dropout_rate=dropout_rate,
|
||
output_sequence_length=T
|
||
)
|
||
else:
|
||
model = KANForecaster(
|
||
input_features=num_features,
|
||
hidden_sizes=[64, 128, 64],
|
||
output_size=1,
|
||
grid_size=5,
|
||
spline_order=3,
|
||
dropout_rate=dropout_rate,
|
||
output_sequence_length=T
|
||
)
|
||
|
||
# 将模型移动到设备上
|
||
model = model.to(device)
|
||
|
||
criterion = nn.MSELoss()
|
||
optimizer = optim.Adam(model.parameters(), lr=learn_rate)
|
||
|
||
# 训练模型
|
||
train_losses = []
|
||
test_losses = []
|
||
|
||
for epoch in range(epochs):
|
||
model.train()
|
||
epoch_loss = 0
|
||
for X_batch, y_batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}", leave=False):
|
||
# 前向传播
|
||
outputs = model(X_batch)
|
||
loss = criterion(outputs.squeeze(-1), y_batch)
|
||
|
||
# 反向传播和优化
|
||
optimizer.zero_grad()
|
||
loss.backward()
|
||
optimizer.step()
|
||
|
||
epoch_loss += loss.item()
|
||
|
||
# 计算训练损失
|
||
train_loss = epoch_loss / len(train_loader)
|
||
train_losses.append(train_loss)
|
||
|
||
# 在测试集上评估
|
||
model.eval()
|
||
test_loss = 0
|
||
with torch.no_grad():
|
||
for X_batch, y_batch in test_loader:
|
||
outputs = model(X_batch)
|
||
loss = criterion(outputs.squeeze(-1), y_batch)
|
||
test_loss += loss.item()
|
||
|
||
test_loss = test_loss / len(test_loader)
|
||
test_losses.append(test_loss)
|
||
|
||
if (epoch + 1) % 10 == 0:
|
||
print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}")
|
||
|
||
# 绘制损失曲线
|
||
plt.figure(figsize=(10, 5))
|
||
plt.plot(train_losses, label='训练损失')
|
||
plt.plot(test_losses, label='测试损失')
|
||
plt.title(f'{product_name} - {model_type}模型训练和测试损失')
|
||
plt.xlabel('Epoch')
|
||
plt.ylabel('Loss')
|
||
plt.legend()
|
||
plt.grid(True)
|
||
|
||
# 文件名添加标识符
|
||
model_suffix = "_optimized" if use_optimized else ""
|
||
plt.savefig(f'{product_id}_kan{model_suffix}_loss_curve.png')
|
||
|
||
# 生成预测
|
||
model.eval()
|
||
with torch.no_grad():
|
||
# 将测试数据移动到设备上
|
||
testX_device = to_device(testX_tensor, device)
|
||
y_pred_scaled = model(testX_device).squeeze(-1).cpu().numpy()
|
||
|
||
# 反归一化预测结果
|
||
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).reshape(y_pred_scaled.shape)
|
||
y_true = scaler_y.inverse_transform(testY.reshape(-1, 1)).reshape(testY.shape)
|
||
|
||
# 评估模型
|
||
metrics = evaluate_model(y_true.flatten(), y_pred.flatten())
|
||
print(f"\n{product_name} {model_type}模型评估指标:")
|
||
for metric, value in metrics.items():
|
||
print(f"{metric}: {value:.4f}")
|
||
|
||
# 绘制预测结果
|
||
plt.figure(figsize=(12, 6))
|
||
|
||
# 获取测试集的实际日期
|
||
test_dates = product_df['date'].iloc[train_size + look_back:train_size + look_back + len(y_true)].values
|
||
|
||
# 只绘制最后30天的预测
|
||
days_to_plot = min(30, len(y_true))
|
||
start_idx = max(0, len(y_true) - days_to_plot)
|
||
|
||
plt.plot(test_dates[start_idx:], y_true[start_idx:, 0], 'b-', label='实际销量')
|
||
plt.plot(test_dates[start_idx:], y_pred[start_idx:, 0], 'r--', label=f'{model_type}预测销量')
|
||
|
||
plt.title(f'{product_name} - {model_type}销量预测结果')
|
||
plt.xlabel('日期')
|
||
plt.ylabel('销量')
|
||
plt.legend()
|
||
plt.grid(True)
|
||
plt.xticks(rotation=45)
|
||
plt.tight_layout()
|
||
plt.savefig(f'{product_id}_kan{model_suffix}_prediction.png')
|
||
|
||
# 保存预测结果到CSV
|
||
all_dates = []
|
||
num_samples = len(y_true)
|
||
for i in range(num_samples):
|
||
start_idx = train_size + i + look_back
|
||
dates = product_df['date'].iloc[start_idx : start_idx + T]
|
||
all_dates.extend(dates)
|
||
|
||
# 修正日期长度不匹配的问题
|
||
flat_y_true = y_true.flatten()
|
||
flat_y_pred = y_pred.flatten()
|
||
min_len = min(len(all_dates), len(flat_y_true))
|
||
|
||
results_df = pd.DataFrame({
|
||
'date': all_dates[:min_len],
|
||
'actual_sales': flat_y_true[:min_len],
|
||
'predicted_sales': flat_y_pred[:min_len]
|
||
})
|
||
results_df.to_csv(f'{product_id}_kan{model_suffix}_prediction_results.csv', index=False)
|
||
|
||
print(f"\n{model_type}预测结果已保存到 {product_id}_kan{model_suffix}_prediction_results.csv")
|
||
|
||
# 创建models目录和子目录
|
||
model_dir = f'models/kan{model_suffix}'
|
||
os.makedirs(model_dir, exist_ok=True)
|
||
|
||
# 保存模型
|
||
model_path = os.path.join(model_dir, f'{product_id}_model.pt')
|
||
torch.save({
|
||
'model_state_dict': model.state_dict(),
|
||
'optimizer_state_dict': optimizer.state_dict(),
|
||
'train_loss': train_losses,
|
||
'test_loss': test_losses,
|
||
'scaler_X': scaler_X,
|
||
'scaler_y': scaler_y,
|
||
'features': features,
|
||
'look_back': look_back,
|
||
'T': T,
|
||
'model_type': f'kan{model_suffix}'
|
||
}, model_path)
|
||
print(f"模型已成功保存到 {model_path}")
|
||
|
||
# 保存日志文件
|
||
log_path = os.path.join(model_dir, f'{product_id}_log.json')
|
||
log_data = {
|
||
'product_id': product_id,
|
||
'product_name': product_name,
|
||
'model_type': f'kan{model_suffix}',
|
||
'training_completed_at': datetime.now().isoformat(),
|
||
'epochs': epochs,
|
||
'metrics': metrics,
|
||
'file_path': model_path
|
||
}
|
||
with open(log_path, 'w', encoding='utf-8') as f:
|
||
json.dump(log_data, f, indent=4, ensure_ascii=False)
|
||
print(f"训练日志已保存到 {log_path}")
|
||
|
||
return model, metrics
|
||
|
||
# 使用Transformer模型训练的函数
|
||
def train_product_model_with_transformer(product_id, epochs=50):
|
||
# 读取生成的药店销售数据
|
||
df = pd.read_excel('pharmacy_sales.xlsx')
|
||
|
||
# 筛选特定产品数据
|
||
product_df = df[df['product_id'] == product_id].sort_values('date')
|
||
product_name = product_df['product_name'].iloc[0]
|
||
|
||
print(f"使用Transformer模型训练产品 '{product_name}' (ID: {product_id}) 的销售预测模型")
|
||
print(f"使用设备: {device}")
|
||
|
||
# 创建特征和目标变量
|
||
features = ['sales', 'price', 'weekday', 'month', 'is_holiday', 'is_weekend', 'is_promotion', 'temperature']
|
||
|
||
# 预处理数据
|
||
X = product_df[features].values
|
||
y = product_df[['sales']].values # 保持为二维数组
|
||
|
||
# 归一化数据
|
||
scaler_X = MinMaxScaler(feature_range=(0, 1))
|
||
scaler_y = MinMaxScaler(feature_range=(0, 1))
|
||
|
||
X_scaled = scaler_X.fit_transform(X)
|
||
y_scaled = scaler_y.fit_transform(y)
|
||
|
||
# 划分训练集和测试集(80% 训练,20% 测试)
|
||
train_size = int(len(X_scaled) * 0.8)
|
||
X_train, X_test = X_scaled[:train_size], X_scaled[train_size:]
|
||
y_train, y_test = y_scaled[:train_size], y_scaled[train_size:]
|
||
|
||
# 创建时间序列数据
|
||
trainX, trainY = create_dataset(X_train, y_train, look_back, T)
|
||
testX, testY = create_dataset(X_test, y_test, look_back, T)
|
||
|
||
# 转换为PyTorch的Tensor
|
||
trainX_tensor = torch.Tensor(trainX)
|
||
trainY_tensor = torch.Tensor(trainY)
|
||
testX_tensor = torch.Tensor(testX)
|
||
testY_tensor = torch.Tensor(testY)
|
||
|
||
# 创建数据加载器
|
||
train_dataset = PharmacyDataset(trainX_tensor, trainY_tensor)
|
||
test_dataset = PharmacyDataset(testX_tensor, testY_tensor)
|
||
|
||
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
|
||
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
|
||
|
||
# 将数据加载器包装到设备加载器中
|
||
train_loader = DeviceDataLoader(train_loader, device)
|
||
test_loader = DeviceDataLoader(test_loader, device)
|
||
|
||
# 初始化Transformer模型
|
||
model = TimeSeriesTransformer(
|
||
num_features=num_features,
|
||
d_model=embed_dim,
|
||
nhead=num_heads,
|
||
num_encoder_layers=num_blocks,
|
||
dim_feedforward=dense_dim,
|
||
dropout=dropout_rate,
|
||
output_sequence_length=T
|
||
)
|
||
|
||
# 将模型移动到设备上
|
||
model = model.to(device)
|
||
|
||
criterion = nn.MSELoss()
|
||
optimizer = optim.Adam(model.parameters(), lr=learn_rate)
|
||
|
||
# 训练模型
|
||
train_losses = []
|
||
test_losses = []
|
||
|
||
for epoch in range(epochs):
|
||
model.train()
|
||
epoch_loss = 0
|
||
for X_batch, y_batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}", leave=False):
|
||
# 前向传播
|
||
outputs = model(X_batch)
|
||
loss = criterion(outputs.squeeze(-1), y_batch)
|
||
|
||
# 反向传播和优化
|
||
optimizer.zero_grad()
|
||
loss.backward()
|
||
optimizer.step()
|
||
|
||
epoch_loss += loss.item()
|
||
|
||
# 计算训练损失
|
||
train_loss = epoch_loss / len(train_loader)
|
||
train_losses.append(train_loss)
|
||
|
||
# 在测试集上评估
|
||
model.eval()
|
||
test_loss = 0
|
||
with torch.no_grad():
|
||
for X_batch, y_batch in test_loader:
|
||
outputs = model(X_batch)
|
||
loss = criterion(outputs.squeeze(-1), y_batch)
|
||
test_loss += loss.item()
|
||
|
||
test_loss = test_loss / len(test_loader)
|
||
test_losses.append(test_loss)
|
||
|
||
if (epoch + 1) % 10 == 0:
|
||
print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}")
|
||
|
||
# 绘制损失曲线
|
||
plt.figure(figsize=(10, 5))
|
||
plt.plot(train_losses, label='训练损失')
|
||
plt.plot(test_losses, label='测试损失')
|
||
plt.title(f'{product_name} - Transformer模型训练和测试损失')
|
||
plt.xlabel('Epoch')
|
||
plt.ylabel('Loss')
|
||
plt.legend()
|
||
plt.grid(True)
|
||
plt.savefig(f'{product_id}_transformer_loss_curve.png')
|
||
|
||
# 生成预测
|
||
model.eval()
|
||
with torch.no_grad():
|
||
# 将测试数据移动到设备上
|
||
testX_device = to_device(testX_tensor, device)
|
||
y_pred_scaled = model(testX_device).squeeze(-1).cpu().numpy()
|
||
|
||
# 反归一化预测结果
|
||
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).reshape(y_pred_scaled.shape)
|
||
y_true = scaler_y.inverse_transform(testY.reshape(-1, 1)).reshape(testY.shape)
|
||
|
||
# 评估模型
|
||
metrics = evaluate_model(y_true.flatten(), y_pred.flatten())
|
||
print(f"\n{product_name} Transformer模型评估指标:")
|
||
for metric, value in metrics.items():
|
||
print(f"{metric}: {value:.4f}")
|
||
|
||
# 绘制预测结果
|
||
plt.figure(figsize=(12, 6))
|
||
|
||
# 获取测试集的实际日期
|
||
test_dates = product_df['date'].iloc[train_size + look_back:train_size + look_back + len(y_true)].values
|
||
|
||
# 只绘制最后30天的预测
|
||
days_to_plot = min(30, len(y_true))
|
||
start_idx = max(0, len(y_true) - days_to_plot)
|
||
|
||
plt.plot(test_dates[start_idx:], y_true[start_idx:, 0], 'b-', label='实际销量')
|
||
plt.plot(test_dates[start_idx:], y_pred[start_idx:, 0], 'r--', label='Transformer预测销量')
|
||
|
||
plt.title(f'{product_name} - Transformer销量预测结果')
|
||
plt.xlabel('日期')
|
||
plt.ylabel('销量')
|
||
plt.legend()
|
||
plt.grid(True)
|
||
plt.xticks(rotation=45)
|
||
plt.tight_layout()
|
||
|
||
# 强制重新绘制图表
|
||
fig.canvas.draw()
|
||
|
||
# 将预测起始日期和预测时长添加到文件名中
|
||
start_date_str = start_date_obj.strftime('%Y%m%d')
|
||
|
||
# 保存和显示图表
|
||
forecast_chart = f'{product_id}_transformer_forecast_{start_date_str}_days{T}.png'
|
||
plt.savefig(forecast_chart)
|
||
print(f"预测图表已保存为: {forecast_chart}")
|
||
|
||
# 关闭图表以释放内存
|
||
plt.close()
|
||
|
||
# 创建预测日期范围
|
||
last_date = product_df['date'].iloc[-1]
|
||
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=T, freq='D')
|
||
|
||
# 创建预测结果DataFrame - 确保长度一致
|
||
# 使用测试集的最后T个预测作为未来预测
|
||
future_predictions = y_pred[-1, :T].flatten() # 取最后一个样本的预测序列
|
||
|
||
# 确保长度匹配
|
||
if len(future_dates) != len(future_predictions):
|
||
# 如果长度不匹配,调整future_predictions的长度
|
||
future_predictions = future_predictions[:len(future_dates)]
|
||
|
||
predictions_df = pd.DataFrame({
|
||
'date': future_dates,
|
||
'product_id': product_id,
|
||
'product_name': product_name,
|
||
'predicted_sales': future_predictions
|
||
})
|
||
|
||
print(f"\n{product_name} 未来 {T} 天销售预测 (使用Transformer模型):")
|
||
print(predictions_df[['date', 'predicted_sales']])
|
||
|
||
# 创建预测结果目录
|
||
output_dir = f'predictions/transformer/{product_id}'
|
||
os.makedirs(output_dir, exist_ok=True)
|
||
|
||
# 可视化预测结果
|
||
try:
|
||
# 1. 创建预测图表
|
||
forecast_fig, forecast_ax = plt.subplots(figsize=(12, 6))
|
||
|
||
# 显示历史数据和预测数据
|
||
history_days = 14 # 减少为显示最近14天的历史数据,原来是30天
|
||
|
||
# 只选择预测起始日期之前14天的历史数据,而不是全部历史数据
|
||
history_end_date = start_date_obj - pd.Timedelta(days=1) # 预测起始日期的前一天
|
||
history_start_date = history_end_date - pd.Timedelta(days=history_days) # 向前推14天
|
||
|
||
# 过滤历史数据,只保留这个日期范围内的数据
|
||
history_df = product_df[(product_df['date'] >= history_start_date) &
|
||
(product_df['date'] <= history_end_date)][['date', 'sales']].copy()
|
||
|
||
if history_df.empty:
|
||
print(f"警告: 在日期范围 {history_start_date} 到 {history_end_date} 内没有历史数据")
|
||
# 如果没有符合条件的历史数据,就使用最近的数据
|
||
history_df = product_df.iloc[-min(history_days, len(product_df)):][['date', 'sales']].copy()
|
||
|
||
print(f"历史数据日期范围: {history_df['date'].min()} 到 {history_df['date'].max()}")
|
||
print(f"预测数据日期范围: {future_dates.min()} 到 {future_dates.max()}")
|
||
print(f"预测起始日期: {start_date_obj.strftime('%Y-%m-%d')}")
|
||
|
||
# 设置图表样式
|
||
plt.style.use('seaborn-v0_8-whitegrid')
|
||
|
||
# 绘制历史数据
|
||
forecast_ax.plot(history_df['date'].values, history_df['sales'].values,
|
||
'b-', linewidth=2, marker='o', markersize=4,
|
||
label='历史销量')
|
||
|
||
# 添加历史数据和预测数据的分隔线
|
||
forecast_ax.axvline(x=start_date_obj, color='gray', linestyle='--', alpha=0.7)
|
||
|
||
# 绘制预测数据,确保使用future_dates作为x轴
|
||
forecast_ax.plot(future_dates, future_predictions,
|
||
'r-', linewidth=2.5, marker='s', markersize=5,
|
||
label=f'{model_type}预测销量')
|
||
|
||
# 强制X轴从预测起始日期的前14天开始(如果有历史数据)到预测结束日期
|
||
date_min = start_date_obj - pd.Timedelta(days=history_days)
|
||
date_max = future_dates.max() + pd.Timedelta(days=1)
|
||
|
||
print(f"设置X轴范围: {date_min} 到 {date_max}")
|
||
forecast_ax.set_xlim(date_min, date_max)
|
||
|
||
# 设置X轴日期格式和刻度
|
||
import matplotlib.dates as mdates
|
||
forecast_ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
|
||
forecast_ax.xaxis.set_major_locator(mdates.DayLocator(interval=2)) # 每隔2天显示一个刻度
|
||
|
||
# 添加网格线
|
||
forecast_ax.grid(True, linestyle='--', alpha=0.6)
|
||
|
||
# 美化图表
|
||
forecast_ax.set_title(f'{product_name} - {model_type}销量预测 (从{start_date_obj.strftime("%Y-%m-%d")}开始,预测{future_days}天)',
|
||
fontsize=14, fontweight='bold')
|
||
forecast_ax.set_xlabel('日期', fontsize=12)
|
||
forecast_ax.set_ylabel('销量', fontsize=12)
|
||
|
||
# 添加图例并设置位置
|
||
forecast_ax.legend(loc='upper left', frameon=True, fancybox=True, shadow=True)
|
||
|
||
# 添加预测区域的背景色
|
||
min_y, max_y = forecast_ax.get_ylim()
|
||
forecast_ax.fill_between(
|
||
[start_date_obj, future_dates.max()],
|
||
min_y, max_y,
|
||
color='lightyellow', alpha=0.3, zorder=0
|
||
)
|
||
|
||
# 在图表上标注"预测区域"
|
||
forecast_ax.text(
|
||
start_date_obj + pd.Timedelta(days=future_days/2),
|
||
min_y + (max_y - min_y) * 0.05,
|
||
'预测区域',
|
||
ha='center', va='bottom',
|
||
bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.3)
|
||
)
|
||
|
||
# 设置Y轴从0开始
|
||
forecast_ax.set_ylim(bottom=0)
|
||
|
||
plt.xticks(rotation=45)
|
||
plt.tight_layout()
|
||
|
||
# 强制重新绘制图表
|
||
forecast_fig.canvas.draw()
|
||
|
||
# 将预测起始日期和预测时长添加到文件名中
|
||
start_date_str = start_date_obj.strftime('%Y%m%d')
|
||
|
||
# 保存预测图表
|
||
forecast_chart = f'{output_dir}/forecast_{start_date_str}_days{future_days}.png'
|
||
plt.savefig(forecast_chart, dpi=120) # 增加DPI提高图像质量
|
||
print(f"预测图表已保存为: {forecast_chart}")
|
||
|
||
# 关闭图表以释放内存
|
||
plt.close(forecast_fig)
|
||
|
||
# 2. 创建历史趋势图表
|
||
try:
|
||
print("\n开始生成历史趋势图...")
|
||
history_fig, history_ax = plt.subplots(figsize=(12, 6))
|
||
|
||
# 设置图表样式
|
||
plt.style.use('seaborn-v0_8-whitegrid')
|
||
|
||
# 获取预测起始日期
|
||
current_year = start_date_obj.year
|
||
current_month = start_date_obj.month
|
||
current_day = start_date_obj.day
|
||
print(f"预测起始日期: {start_date_obj}")
|
||
|
||
# 计算同期日期范围(前3天和后3天,共7天)
|
||
days_before = 3
|
||
days_after = 3
|
||
date_range_start = start_date_obj - pd.Timedelta(days=days_before)
|
||
date_range_end = start_date_obj + pd.Timedelta(days=days_after)
|
||
|
||
# 计算去年同期日期范围
|
||
last_year_start = date_range_start.replace(year=date_range_start.year-1)
|
||
last_year_end = date_range_end.replace(year=date_range_end.year-1)
|
||
|
||
# 计算上月同期日期范围
|
||
if date_range_start.month > 1:
|
||
last_month_start = date_range_start.replace(month=date_range_start.month-1)
|
||
last_month_end = date_range_end.replace(month=date_range_end.month-1)
|
||
else:
|
||
# 如果是1月,则转到上一年的12月
|
||
last_month_start = date_range_start.replace(year=date_range_start.year-1, month=12)
|
||
last_month_end = date_range_end.replace(year=date_range_end.year-1, month=12)
|
||
|
||
print(f"当前日期范围: {date_range_start} 到 {date_range_end}")
|
||
print(f"去年同期范围: {last_year_start} 到 {last_year_end}")
|
||
print(f"上月同期范围: {last_month_start} 到 {last_month_end}")
|
||
|
||
# 查找对应日期范围的数据
|
||
current_period_data = product_df[
|
||
(product_df['date'] >= date_range_start) &
|
||
(product_df['date'] <= date_range_end)
|
||
]
|
||
print(f"当前期间数据点数: {len(current_period_data)}")
|
||
|
||
last_year_period_data = product_df[
|
||
(product_df['date'] >= last_year_start) &
|
||
(product_df['date'] <= last_year_end)
|
||
]
|
||
print(f"去年同期数据点数: {len(last_year_period_data)}")
|
||
|
||
last_month_period_data = product_df[
|
||
(product_df['date'] >= last_month_start) &
|
||
(product_df['date'] <= last_month_end)
|
||
]
|
||
print(f"上月同期数据点数: {len(last_month_period_data)}")
|
||
|
||
# 绘制曲线图
|
||
has_data = False
|
||
|
||
if not current_period_data.empty:
|
||
has_data = True
|
||
# 确保日期升序排序
|
||
current_period_data = current_period_data.sort_values('date')
|
||
# 生成相对天数(以date_range_start为基准)
|
||
current_period_data['day_offset'] = (current_period_data['date'] - date_range_start).dt.days
|
||
print(f"当前期间日期: {current_period_data['date'].tolist()}")
|
||
print(f"当前期间相对天数: {current_period_data['day_offset'].tolist()}")
|
||
print(f"当前期间销量: {current_period_data['sales'].tolist()}")
|
||
|
||
history_ax.plot(
|
||
current_period_data['day_offset'],
|
||
current_period_data['sales'],
|
||
'r-',
|
||
marker='o',
|
||
linewidth=2.5,
|
||
markersize=7,
|
||
label=f"当前期间 ({date_range_start.strftime('%Y-%m-%d')} 到 {date_range_end.strftime('%Y-%m-%d')})"
|
||
)
|
||
|
||
# 标记预测起始日期
|
||
current_center_point = current_period_data[current_period_data['date'] == start_date_obj]
|
||
if not current_center_point.empty:
|
||
history_ax.scatter(
|
||
current_center_point['day_offset'],
|
||
current_center_point['sales'],
|
||
color='red',
|
||
s=150,
|
||
marker='*',
|
||
zorder=10,
|
||
label=f"预测起始日 ({start_date_obj.strftime('%Y-%m-%d')})"
|
||
)
|
||
|
||
if not last_year_period_data.empty:
|
||
has_data = True
|
||
# 确保日期升序排序
|
||
last_year_period_data = last_year_period_data.sort_values('date')
|
||
last_year_period_data['day_offset'] = (last_year_period_data['date'] - last_year_start).dt.days
|
||
print(f"去年同期日期: {last_year_period_data['date'].tolist()}")
|
||
print(f"去年同期相对天数: {last_year_period_data['day_offset'].tolist()}")
|
||
print(f"去年同期销量: {last_year_period_data['sales'].tolist()}")
|
||
|
||
history_ax.plot(
|
||
last_year_period_data['day_offset'],
|
||
last_year_period_data['sales'],
|
||
'b-',
|
||
marker='s',
|
||
linewidth=2,
|
||
markersize=6,
|
||
label=f"去年同期 ({last_year_start.strftime('%Y-%m-%d')} 到 {last_year_end.strftime('%Y-%m-%d')})"
|
||
)
|
||
|
||
if not last_month_period_data.empty:
|
||
has_data = True
|
||
# 确保日期升序排序
|
||
last_month_period_data = last_month_period_data.sort_values('date')
|
||
last_month_period_data['day_offset'] = (last_month_period_data['date'] - last_month_start).dt.days
|
||
print(f"上月同期日期: {last_month_period_data['date'].tolist()}")
|
||
print(f"上月同期相对天数: {last_month_period_data['day_offset'].tolist()}")
|
||
print(f"上月同期销量: {last_month_period_data['sales'].tolist()}")
|
||
|
||
history_ax.plot(
|
||
last_month_period_data['day_offset'],
|
||
last_month_period_data['sales'],
|
||
'g-',
|
||
marker='^',
|
||
linewidth=2,
|
||
markersize=6,
|
||
label=f"上月同期 ({last_month_start.strftime('%Y-%m-%d')} 到 {last_month_end.strftime('%Y-%m-%d')})"
|
||
)
|
||
|
||
# 设置X轴标签为相对天数
|
||
days_labels = list(range(7))
|
||
days_offsets = list(range(7))
|
||
day_names = [(date_range_start + pd.Timedelta(days=d)).strftime('%m-%d') for d in range(7)]
|
||
|
||
history_ax.set_xticks(days_offsets)
|
||
history_ax.set_xticklabels(day_names)
|
||
|
||
# 添加垂直参考线标记预测起始日
|
||
history_ax.axvline(x=days_before, color='red', linestyle='--', alpha=0.5)
|
||
|
||
# 美化图表
|
||
history_ax.set_title(f'{product_name} - 同期销量趋势对比 (7天)', fontsize=14, fontweight='bold')
|
||
history_ax.set_xlabel('日期', fontsize=12)
|
||
history_ax.set_ylabel('销量', fontsize=12)
|
||
history_ax.grid(True, linestyle='--', alpha=0.7)
|
||
|
||
# 设置Y轴从0开始
|
||
history_ax.set_ylim(bottom=0)
|
||
|
||
# 添加预测起始日标记区域
|
||
history_ax.axvspan(days_before-0.2, days_before+0.2, color='lightyellow', alpha=0.3)
|
||
|
||
# 只有在有标签的图表元素存在时才添加图例
|
||
if has_data and (not current_period_data.empty or not last_year_period_data.empty or not last_month_period_data.empty):
|
||
history_ax.legend(loc='upper left', frameon=True, fancybox=True, shadow=True)
|
||
|
||
# 如果所有数据集都为空,显示提示
|
||
if not has_data:
|
||
history_ax.text(0.5, 0.5, '没有找到可比较的同期数据',
|
||
horizontalalignment='center', verticalalignment='center',
|
||
transform=history_ax.transAxes, fontsize=14)
|
||
|
||
plt.tight_layout()
|
||
|
||
# 强制重新绘制图表
|
||
history_fig.canvas.draw()
|
||
|
||
# 保存历史趋势图表
|
||
history_chart = f'{output_dir}/history_{start_date_str}.png'
|
||
plt.savefig(history_chart, dpi=120) # 增加DPI提高图像质量
|
||
print(f"历史趋势图表已保存为: {history_chart}")
|
||
|
||
# 关闭图表以释放内存
|
||
plt.close(history_fig)
|
||
|
||
except Exception as e:
|
||
import traceback
|
||
print(f"生成历史趋势图时出错: {e}")
|
||
traceback.print_exc()
|
||
history_chart = None
|
||
|
||
# 创建一个包含历史和预测数据的完整DataFrame供CSV导出和API返回
|
||
history_df['data_type'] = '历史销量'
|
||
|
||
predictions_df = pd.DataFrame({
|
||
'date': future_dates,
|
||
'sales': y_pred,
|
||
'data_type': '预测销量',
|
||
'product_id': product_id,
|
||
'product_name': product_name
|
||
})
|
||
|
||
# 合并历史和预测数据
|
||
complete_df = pd.concat([
|
||
history_df[['date', 'sales', 'data_type']].assign(product_id=product_id, product_name=product_name),
|
||
predictions_df
|
||
]).sort_values('date')
|
||
|
||
except Exception as e:
|
||
import traceback
|
||
print(f"生成预测图表时出错: {e}")
|
||
traceback.print_exc()
|
||
forecast_chart = None
|
||
history_chart = None
|
||
|
||
# 出错时仍然创建预测数据
|
||
predictions_df = pd.DataFrame({
|
||
'date': future_dates,
|
||
'sales': y_pred,
|
||
'data_type': '预测销量',
|
||
'product_id': product_id,
|
||
'product_name': product_name
|
||
})
|
||
complete_df = predictions_df
|
||
|
||
# 保存预测结果到CSV
|
||
try:
|
||
forecast_csv = f'{output_dir}/forecast_{start_date_str}_days{future_days}.csv'
|
||
complete_df.to_csv(forecast_csv, index=False)
|
||
print(f"预测结果已保存到: {forecast_csv}")
|
||
except Exception as e:
|
||
print(f"保存CSV文件时出错: {e}")
|
||
forecast_csv = None
|
||
|
||
# 返回文件路径信息和预测数据
|
||
result = {
|
||
'predictions_df': complete_df, # 返回包含历史数据的完整DataFrame
|
||
'chart_path': forecast_chart,
|
||
'history_chart_path': history_chart,
|
||
'csv_path': forecast_csv
|
||
}
|
||
|
||
# 保存模型
|
||
model_dir = f'models/transformer'
|
||
os.makedirs(model_dir, exist_ok=True)
|
||
model_path = f'{model_dir}/{product_id}_model.pt'
|
||
|
||
# 保存模型和相关数据
|
||
checkpoint = {
|
||
'model_state_dict': model.state_dict(),
|
||
'optimizer_state_dict': optimizer.state_dict(),
|
||
'scaler_X': scaler_X,
|
||
'scaler_y': scaler_y,
|
||
'metrics': metrics,
|
||
'epochs': epochs,
|
||
'look_back': look_back,
|
||
'T': T,
|
||
'features': features
|
||
}
|
||
|
||
torch.save(checkpoint, model_path)
|
||
print(f"模型已保存到 {model_path}")
|
||
|
||
# 保存训练日志
|
||
log_path = f'{model_dir}/{product_id}_log.json'
|
||
log_data = {
|
||
'product_id': product_id,
|
||
'product_name': product_name,
|
||
'model_type': 'transformer',
|
||
'training_completed_at': datetime.now().isoformat(),
|
||
'epochs': epochs,
|
||
'metrics': metrics,
|
||
'file_path': model_path
|
||
}
|
||
with open(log_path, 'w', encoding='utf-8') as f:
|
||
json.dump(log_data, f, indent=4, ensure_ascii=False)
|
||
print(f"训练日志已保存到 {log_path}")
|
||
|
||
return model, metrics
|
||
|
||
# 加载模型并进行预测的函数
|
||
def load_model_and_predict(product_id, model_type, future_days=7, start_date=None, analyze_result=False):
|
||
"""
|
||
加载指定类型的模型并进行未来销量预测
|
||
|
||
Args:
|
||
product_id: 产品ID
|
||
model_type: 模型类型,可选 'mlstm', 'kan', 'transformer', 'optimized_kan'
|
||
future_days: 预测未来天数,默认7天
|
||
start_date: 预测起始日期,格式为'YYYY-MM-DD',默认为None表示使用数据集最后日期的下一天
|
||
analyze_result: 是否分析预测结果,默认为False
|
||
|
||
Returns:
|
||
predictions: 预测结果
|
||
analysis: 如果analyze_result=True,返回预测分析结果,否则为None
|
||
"""
|
||
print("\n" + "="*80)
|
||
print(f"加载模型并预测 - 详细调试信息:")
|
||
print(f"产品ID: {product_id}, 模型类型: {model_type}, 预测天数: {future_days}, 预测起始日期: {start_date}")
|
||
print("="*80 + "\n")
|
||
|
||
# 处理优化版KAN模型的路径
|
||
actual_model_path = model_type
|
||
if model_type == 'optimized_kan':
|
||
actual_model_path = 'kan_optimized'
|
||
print(f"优化版KAN模型: 使用路径 'models/{actual_model_path}/{product_id}_model.pt'")
|
||
|
||
model_path = f'models/{actual_model_path}/{product_id}_model.pt'
|
||
|
||
if not os.path.exists(model_path):
|
||
print(f"错误: 未找到产品 {product_id} 的 {model_type} 模型文件")
|
||
return None
|
||
|
||
# 获取设备
|
||
device = get_device()
|
||
print(f"使用设备: {device} 进行预测")
|
||
|
||
# 加载模型和相关数据
|
||
checkpoint = torch.load(model_path, map_location=device, weights_only=False)
|
||
|
||
# 读取原始数据以获取最新的记录
|
||
df = pd.read_excel('pharmacy_sales.xlsx')
|
||
product_df = df[df['product_id'] == product_id].sort_values('date')
|
||
product_name = product_df['product_name'].iloc[0]
|
||
|
||
# 获取最近的look_back天数据
|
||
features = checkpoint['features']
|
||
look_back = checkpoint['look_back']
|
||
T = checkpoint['T']
|
||
scaler_X = checkpoint['scaler_X']
|
||
scaler_y = checkpoint['scaler_y']
|
||
|
||
last_data = product_df[features].values[-look_back:]
|
||
last_data_scaled = scaler_X.transform(last_data)
|
||
|
||
# 创建模型并加载参数
|
||
if model_type == 'mlstm':
|
||
ModelClass = MLSTMTransformer
|
||
model_params = {
|
||
'num_features': len(features),
|
||
'hidden_size': 128,
|
||
'mlstm_layers': 1,
|
||
'embed_dim': embed_dim,
|
||
'dense_dim': dense_dim,
|
||
'num_heads': num_heads,
|
||
'dropout_rate': dropout_rate,
|
||
'num_blocks': num_blocks,
|
||
'output_sequence_length': T
|
||
}
|
||
elif model_type == 'kan':
|
||
ModelClass = KANForecaster
|
||
model_params = {
|
||
'input_features': len(features),
|
||
'hidden_sizes': [64, 128, 64],
|
||
'output_size': 1,
|
||
'grid_size': 5,
|
||
'spline_order': 3,
|
||
'dropout_rate': dropout_rate,
|
||
'output_sequence_length': T
|
||
}
|
||
elif model_type == 'optimized_kan':
|
||
ModelClass = OptimizedKANForecaster
|
||
model_params = {
|
||
'input_features': len(features),
|
||
'hidden_sizes': [64, 128, 64],
|
||
'output_size': 1,
|
||
'grid_size': 5,
|
||
'spline_order': 3,
|
||
'dropout_rate': dropout_rate,
|
||
'output_sequence_length': T
|
||
}
|
||
elif model_type == 'transformer':
|
||
ModelClass = TimeSeriesTransformer
|
||
model_params = {
|
||
'num_features': len(features),
|
||
'd_model': embed_dim,
|
||
'nhead': num_heads,
|
||
'num_encoder_layers': num_blocks,
|
||
'dim_feedforward': dense_dim,
|
||
'dropout': dropout_rate,
|
||
'output_sequence_length': T
|
||
}
|
||
else:
|
||
print(f"错误: 不支持的模型类型 '{model_type}'")
|
||
return None
|
||
|
||
model = ModelClass(**model_params)
|
||
model.load_state_dict(checkpoint['model_state_dict'])
|
||
|
||
# 将模型移动到设备上
|
||
model = model.to(device)
|
||
model.eval()
|
||
|
||
# 准备输入数据
|
||
X_input = torch.Tensor(last_data_scaled).unsqueeze(0) # 添加批次维度
|
||
X_input = X_input.to(device) # 移动到设备上
|
||
|
||
# 进行预测
|
||
with torch.no_grad():
|
||
# 获取模型默认预测长度
|
||
default_pred_length = T
|
||
print(f"模型默认预测长度: {default_pred_length}天")
|
||
|
||
# 使用模型进行预测 - 如果请求的预测天数小于模型默认值,截断结果
|
||
if future_days <= default_pred_length:
|
||
print(f"请求的预测天数 {future_days} 小于等于模型默认值 {default_pred_length},截取需要的部分")
|
||
y_pred_scaled = model(X_input).squeeze(0).cpu().numpy()[:future_days]
|
||
else:
|
||
# 如果请求的预测天数大于模型默认值,需要多次预测并拼接结果
|
||
print(f"请求的预测天数 {future_days} 大于模型默认值 {default_pred_length},需要多次预测")
|
||
y_pred_scaled = model(X_input).squeeze(0).cpu().numpy()
|
||
|
||
# 只取默认预测长度的结果
|
||
y_pred_scaled = y_pred_scaled[:min(future_days, default_pred_length)]
|
||
|
||
# 反归一化预测结果
|
||
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
|
||
|
||
# 创建预测日期范围
|
||
last_date = product_df['date'].iloc[-1]
|
||
|
||
if start_date:
|
||
try:
|
||
# 使用用户指定的日期作为预测起点
|
||
start_date_obj = pd.Timestamp(start_date)
|
||
print(f"成功解析用户指定的预测起始日期: {start_date_obj.strftime('%Y-%m-%d')}")
|
||
except Exception as e:
|
||
# 如果日期格式无效,使用当前日期
|
||
start_date_obj = pd.Timestamp.now().normalize()
|
||
print(f"日期解析错误: {e}")
|
||
print(f"使用当前日期 {start_date_obj.strftime('%Y-%m-%d')} 作为预测起点")
|
||
else:
|
||
# 如果未指定日期,使用数据集最后日期的下一天
|
||
start_date_obj = last_date + pd.Timedelta(days=1)
|
||
print(f"未指定起始日期,使用数据集最后日期 {last_date.strftime('%Y-%m-%d')} 的下一天作为预测起点: {start_date_obj.strftime('%Y-%m-%d')}")
|
||
|
||
future_dates = pd.date_range(start=start_date_obj, periods=future_days, freq='D')
|
||
print(f"生成预测日期范围: {future_dates[0]} 到 {future_dates[-1]}, 共 {len(future_dates)} 天")
|
||
|
||
# 创建预测结果目录
|
||
output_dir = f'predictions/{model_type}/{product_id}'
|
||
os.makedirs(output_dir, exist_ok=True)
|
||
|
||
# 可视化预测结果
|
||
try:
|
||
# 1. 创建预测图表
|
||
forecast_fig, forecast_ax = plt.subplots(figsize=(12, 6))
|
||
|
||
# 显示历史数据和预测数据
|
||
history_days = 14 # 减少为显示最近14天的历史数据,原来是30天
|
||
|
||
# 只选择预测起始日期之前14天的历史数据,而不是全部历史数据
|
||
history_end_date = start_date_obj - pd.Timedelta(days=1) # 预测起始日期的前一天
|
||
history_start_date = history_end_date - pd.Timedelta(days=history_days) # 向前推14天
|
||
|
||
# 过滤历史数据,只保留这个日期范围内的数据
|
||
history_df = product_df[(product_df['date'] >= history_start_date) &
|
||
(product_df['date'] <= history_end_date)][['date', 'sales']].copy()
|
||
|
||
if history_df.empty:
|
||
print(f"警告: 在日期范围 {history_start_date} 到 {history_end_date} 内没有历史数据")
|
||
# 如果没有符合条件的历史数据,就使用最近的数据
|
||
history_df = product_df.iloc[-min(history_days, len(product_df)):][['date', 'sales']].copy()
|
||
|
||
print(f"历史数据日期范围: {history_df['date'].min()} 到 {history_df['date'].max()}")
|
||
print(f"预测数据日期范围: {future_dates.min()} 到 {future_dates.max()}")
|
||
print(f"预测起始日期: {start_date_obj.strftime('%Y-%m-%d')}")
|
||
|
||
# 设置图表样式
|
||
plt.style.use('seaborn-v0_8-whitegrid')
|
||
|
||
# 绘制历史数据
|
||
forecast_ax.plot(history_df['date'].values, history_df['sales'].values,
|
||
'b-', linewidth=2, marker='o', markersize=4,
|
||
label='历史销量')
|
||
|
||
# 添加历史数据和预测数据的分隔线
|
||
forecast_ax.axvline(x=start_date_obj, color='gray', linestyle='--', alpha=0.7)
|
||
|
||
# 绘制预测数据,确保使用future_dates作为x轴
|
||
forecast_ax.plot(future_dates, y_pred,
|
||
'r-', linewidth=2.5, marker='s', markersize=5,
|
||
label=f'{model_type}预测销量')
|
||
|
||
# 强制X轴从预测起始日期的前14天开始(如果有历史数据)到预测结束日期
|
||
date_min = start_date_obj - pd.Timedelta(days=history_days)
|
||
date_max = future_dates.max() + pd.Timedelta(days=1)
|
||
|
||
print(f"设置X轴范围: {date_min} 到 {date_max}")
|
||
forecast_ax.set_xlim(date_min, date_max)
|
||
|
||
# 设置X轴日期格式和刻度
|
||
import matplotlib.dates as mdates
|
||
forecast_ax.xaxis.set_major_formatter(mdates.DateFormatter('%m-%d'))
|
||
forecast_ax.xaxis.set_major_locator(mdates.DayLocator(interval=2)) # 每隔2天显示一个刻度
|
||
|
||
# 添加网格线
|
||
forecast_ax.grid(True, linestyle='--', alpha=0.6)
|
||
|
||
# 美化图表
|
||
forecast_ax.set_title(f'{product_name} - {model_type}销量预测 (从{start_date_obj.strftime("%Y-%m-%d")}开始,预测{future_days}天)',
|
||
fontsize=14, fontweight='bold')
|
||
forecast_ax.set_xlabel('日期', fontsize=12)
|
||
forecast_ax.set_ylabel('销量', fontsize=12)
|
||
|
||
# 添加图例并设置位置
|
||
forecast_ax.legend(loc='upper left', frameon=True, fancybox=True, shadow=True)
|
||
|
||
# 添加预测区域的背景色
|
||
min_y, max_y = forecast_ax.get_ylim()
|
||
forecast_ax.fill_between(
|
||
[start_date_obj, future_dates.max()],
|
||
min_y, max_y,
|
||
color='lightyellow', alpha=0.3, zorder=0
|
||
)
|
||
|
||
# 在图表上标注"预测区域"
|
||
forecast_ax.text(
|
||
start_date_obj + pd.Timedelta(days=future_days/2),
|
||
min_y + (max_y - min_y) * 0.05,
|
||
'预测区域',
|
||
ha='center', va='bottom',
|
||
bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.3)
|
||
)
|
||
|
||
# 设置Y轴从0开始
|
||
forecast_ax.set_ylim(bottom=0)
|
||
|
||
plt.xticks(rotation=45)
|
||
plt.tight_layout()
|
||
|
||
# 强制重新绘制图表
|
||
forecast_fig.canvas.draw()
|
||
|
||
# 将预测起始日期和预测时长添加到文件名中
|
||
start_date_str = start_date_obj.strftime('%Y%m%d')
|
||
|
||
# 保存预测图表
|
||
forecast_chart = f'{output_dir}/forecast_{start_date_str}_days{future_days}.png'
|
||
plt.savefig(forecast_chart, dpi=120) # 增加DPI提高图像质量
|
||
print(f"预测图表已保存为: {forecast_chart}")
|
||
|
||
# 关闭图表以释放内存
|
||
plt.close(forecast_fig)
|
||
|
||
# 2. 创建历史趋势图表
|
||
try:
|
||
print("\n开始生成历史趋势图...")
|
||
history_fig, history_ax = plt.subplots(figsize=(12, 6))
|
||
|
||
# 设置图表样式
|
||
plt.style.use('seaborn-v0_8-whitegrid')
|
||
|
||
# 获取预测起始日期
|
||
current_year = start_date_obj.year
|
||
current_month = start_date_obj.month
|
||
current_day = start_date_obj.day
|
||
print(f"预测起始日期: {start_date_obj}")
|
||
|
||
# 计算同期日期范围(前3天和后3天,共7天)
|
||
days_before = 3
|
||
days_after = 3
|
||
date_range_start = start_date_obj - pd.Timedelta(days=days_before)
|
||
date_range_end = start_date_obj + pd.Timedelta(days=days_after)
|
||
|
||
# 计算去年同期日期范围
|
||
last_year_start = date_range_start.replace(year=date_range_start.year-1)
|
||
last_year_end = date_range_end.replace(year=date_range_end.year-1)
|
||
|
||
# 计算上月同期日期范围
|
||
if date_range_start.month > 1:
|
||
last_month_start = date_range_start.replace(month=date_range_start.month-1)
|
||
last_month_end = date_range_end.replace(month=date_range_end.month-1)
|
||
else:
|
||
# 如果是1月,则转到上一年的12月
|
||
last_month_start = date_range_start.replace(year=date_range_start.year-1, month=12)
|
||
last_month_end = date_range_end.replace(year=date_range_end.year-1, month=12)
|
||
|
||
print(f"当前日期范围: {date_range_start} 到 {date_range_end}")
|
||
print(f"去年同期范围: {last_year_start} 到 {last_year_end}")
|
||
print(f"上月同期范围: {last_month_start} 到 {last_month_end}")
|
||
|
||
# 查找对应日期范围的数据
|
||
current_period_data = product_df[
|
||
(product_df['date'] >= date_range_start) &
|
||
(product_df['date'] <= date_range_end)
|
||
]
|
||
print(f"当前期间数据点数: {len(current_period_data)}")
|
||
|
||
last_year_period_data = product_df[
|
||
(product_df['date'] >= last_year_start) &
|
||
(product_df['date'] <= last_year_end)
|
||
]
|
||
print(f"去年同期数据点数: {len(last_year_period_data)}")
|
||
|
||
last_month_period_data = product_df[
|
||
(product_df['date'] >= last_month_start) &
|
||
(product_df['date'] <= last_month_end)
|
||
]
|
||
print(f"上月同期数据点数: {len(last_month_period_data)}")
|
||
|
||
# 绘制曲线图
|
||
has_data = False
|
||
|
||
if not current_period_data.empty:
|
||
has_data = True
|
||
# 确保日期升序排序
|
||
current_period_data = current_period_data.sort_values('date')
|
||
# 生成相对天数(以date_range_start为基准)
|
||
current_period_data['day_offset'] = (current_period_data['date'] - date_range_start).dt.days
|
||
print(f"当前期间日期: {current_period_data['date'].tolist()}")
|
||
print(f"当前期间相对天数: {current_period_data['day_offset'].tolist()}")
|
||
print(f"当前期间销量: {current_period_data['sales'].tolist()}")
|
||
|
||
history_ax.plot(
|
||
current_period_data['day_offset'],
|
||
current_period_data['sales'],
|
||
'r-',
|
||
marker='o',
|
||
linewidth=2.5,
|
||
markersize=7,
|
||
label=f"当前期间 ({date_range_start.strftime('%Y-%m-%d')} 到 {date_range_end.strftime('%Y-%m-%d')})"
|
||
)
|
||
|
||
# 标记预测起始日期
|
||
current_center_point = current_period_data[current_period_data['date'] == start_date_obj]
|
||
if not current_center_point.empty:
|
||
history_ax.scatter(
|
||
current_center_point['day_offset'],
|
||
current_center_point['sales'],
|
||
color='red',
|
||
s=150,
|
||
marker='*',
|
||
zorder=10,
|
||
label=f"预测起始日 ({start_date_obj.strftime('%Y-%m-%d')})"
|
||
)
|
||
|
||
if not last_year_period_data.empty:
|
||
has_data = True
|
||
# 确保日期升序排序
|
||
last_year_period_data = last_year_period_data.sort_values('date')
|
||
last_year_period_data['day_offset'] = (last_year_period_data['date'] - last_year_start).dt.days
|
||
print(f"去年同期日期: {last_year_period_data['date'].tolist()}")
|
||
print(f"去年同期相对天数: {last_year_period_data['day_offset'].tolist()}")
|
||
print(f"去年同期销量: {last_year_period_data['sales'].tolist()}")
|
||
|
||
history_ax.plot(
|
||
last_year_period_data['day_offset'],
|
||
last_year_period_data['sales'],
|
||
'b-',
|
||
marker='s',
|
||
linewidth=2,
|
||
markersize=6,
|
||
label=f"去年同期 ({last_year_start.strftime('%Y-%m-%d')} 到 {last_year_end.strftime('%Y-%m-%d')})"
|
||
)
|
||
|
||
if not last_month_period_data.empty:
|
||
has_data = True
|
||
# 确保日期升序排序
|
||
last_month_period_data = last_month_period_data.sort_values('date')
|
||
last_month_period_data['day_offset'] = (last_month_period_data['date'] - last_month_start).dt.days
|
||
print(f"上月同期日期: {last_month_period_data['date'].tolist()}")
|
||
print(f"上月同期相对天数: {last_month_period_data['day_offset'].tolist()}")
|
||
print(f"上月同期销量: {last_month_period_data['sales'].tolist()}")
|
||
|
||
history_ax.plot(
|
||
last_month_period_data['day_offset'],
|
||
last_month_period_data['sales'],
|
||
'g-',
|
||
marker='^',
|
||
linewidth=2,
|
||
markersize=6,
|
||
label=f"上月同期 ({last_month_start.strftime('%Y-%m-%d')} 到 {last_month_end.strftime('%Y-%m-%d')})"
|
||
)
|
||
|
||
# 设置X轴标签为相对天数
|
||
days_labels = list(range(7))
|
||
days_offsets = list(range(7))
|
||
day_names = [(date_range_start + pd.Timedelta(days=d)).strftime('%m-%d') for d in range(7)]
|
||
|
||
history_ax.set_xticks(days_offsets)
|
||
history_ax.set_xticklabels(day_names)
|
||
|
||
# 添加垂直参考线标记预测起始日
|
||
history_ax.axvline(x=days_before, color='red', linestyle='--', alpha=0.5)
|
||
|
||
# 美化图表
|
||
history_ax.set_title(f'{product_name} - 同期销量趋势对比 (7天)', fontsize=14, fontweight='bold')
|
||
history_ax.set_xlabel('日期', fontsize=12)
|
||
history_ax.set_ylabel('销量', fontsize=12)
|
||
history_ax.grid(True, linestyle='--', alpha=0.7)
|
||
|
||
# 设置Y轴从0开始
|
||
history_ax.set_ylim(bottom=0)
|
||
|
||
# 添加预测起始日标记区域
|
||
history_ax.axvspan(days_before-0.2, days_before+0.2, color='lightyellow', alpha=0.3)
|
||
|
||
# 只有在有标签的图表元素存在时才添加图例
|
||
if has_data and (not current_period_data.empty or not last_year_period_data.empty or not last_month_period_data.empty):
|
||
history_ax.legend(loc='upper left', frameon=True, fancybox=True, shadow=True)
|
||
|
||
# 如果所有数据集都为空,显示提示
|
||
if not has_data:
|
||
history_ax.text(0.5, 0.5, '没有找到可比较的同期数据',
|
||
horizontalalignment='center', verticalalignment='center',
|
||
transform=history_ax.transAxes, fontsize=14)
|
||
|
||
plt.tight_layout()
|
||
|
||
# 强制重新绘制图表
|
||
history_fig.canvas.draw()
|
||
|
||
# 保存历史趋势图表
|
||
history_chart = f'{output_dir}/history_{start_date_str}.png'
|
||
plt.savefig(history_chart, dpi=120) # 增加DPI提高图像质量
|
||
print(f"历史趋势图表已保存为: {history_chart}")
|
||
|
||
# 关闭图表以释放内存
|
||
plt.close(history_fig)
|
||
|
||
except Exception as e:
|
||
import traceback
|
||
print(f"生成历史趋势图时出错: {e}")
|
||
traceback.print_exc()
|
||
history_chart = None
|
||
|
||
# 创建一个包含历史和预测数据的完整DataFrame供CSV导出和API返回
|
||
history_df['data_type'] = '历史销量'
|
||
|
||
predictions_df = pd.DataFrame({
|
||
'date': future_dates,
|
||
'sales': y_pred,
|
||
'data_type': '预测销量',
|
||
'product_id': product_id,
|
||
'product_name': product_name
|
||
})
|
||
|
||
# 合并历史和预测数据
|
||
complete_df = pd.concat([
|
||
history_df[['date', 'sales', 'data_type']].assign(product_id=product_id, product_name=product_name),
|
||
predictions_df
|
||
]).sort_values('date')
|
||
|
||
except Exception as e:
|
||
import traceback
|
||
print(f"生成预测图表时出错: {e}")
|
||
traceback.print_exc()
|
||
forecast_chart = None
|
||
history_chart = None
|
||
|
||
# 出错时仍然创建预测数据
|
||
predictions_df = pd.DataFrame({
|
||
'date': future_dates,
|
||
'sales': y_pred,
|
||
'data_type': '预测销量',
|
||
'product_id': product_id,
|
||
'product_name': product_name
|
||
})
|
||
complete_df = predictions_df
|
||
|
||
# 保存预测结果到CSV
|
||
try:
|
||
forecast_csv = f'{output_dir}/forecast_{start_date_str}_days{future_days}.csv'
|
||
complete_df.to_csv(forecast_csv, index=False)
|
||
print(f"预测结果已保存到: {forecast_csv}")
|
||
except Exception as e:
|
||
print(f"保存CSV文件时出错: {e}")
|
||
forecast_csv = None
|
||
|
||
# 返回文件路径信息和预测数据
|
||
result = {
|
||
'predictions_df': complete_df, # 返回包含历史数据的完整DataFrame
|
||
'chart_path': forecast_chart,
|
||
'history_chart_path': history_chart,
|
||
'csv_path': forecast_csv
|
||
}
|
||
|
||
# 在函数末尾添加
|
||
analysis = None
|
||
if analyze_result:
|
||
analysis = analyze_prediction_result(product_id, model_type, y_pred, features=None)
|
||
|
||
return result, analysis
|
||
|
||
# 特定加载KAN模型并预测的函数
|
||
def load_kan_model_and_predict(product_id, future_days=7):
|
||
"""
|
||
加载KAN模型并进行未来销量预测,是load_model_and_predict的简化版本,固定模型类型为'kan'
|
||
|
||
Args:
|
||
product_id: 产品ID
|
||
future_days: 预测未来天数,默认7天
|
||
|
||
Returns:
|
||
预测结果字典
|
||
"""
|
||
return load_model_and_predict(product_id, model_type='kan', future_days=future_days)
|
||
|
||
class PharmacyPredictor:
|
||
def __init__(self, data_path='pharmacy_sales.xlsx', model_dir='saved_models'):
|
||
self.data_path = data_path
|
||
self.model_dir = model_dir
|
||
self.device = get_device()
|
||
|
||
if not os.path.exists(model_dir):
|
||
os.makedirs(model_dir)
|
||
|
||
print(f"Using device: {self.device}")
|
||
|
||
# 加载数据
|
||
if os.path.exists(data_path):
|
||
self.data = pd.read_excel(data_path)
|
||
print(f"Loaded data from {data_path}")
|
||
else:
|
||
print(f"Data file {data_path} not found. Please generate data first.")
|
||
self.data = None
|
||
|
||
# ... 保留原有方法 ...
|
||
|
||
# 修改train_model方法,添加优化版KAN模型选项
|
||
def train_model(self, product_id, model_type='transformer', epochs=100, batch_size=32,
|
||
learning_rate=0.001, sequence_length=30, forecast_horizon=7,
|
||
hidden_size=64, num_layers=2, dropout=0.1, use_optimized=False):
|
||
"""
|
||
训练预测模型
|
||
|
||
参数:
|
||
product_id: 产品ID
|
||
model_type: 模型类型 ('transformer', 'slstm', 'mlstm', 'kan')
|
||
epochs: 训练轮数
|
||
batch_size: 批次大小
|
||
learning_rate: 学习率
|
||
sequence_length: 输入序列长度
|
||
forecast_horizon: 预测天数
|
||
hidden_size: 隐藏层大小
|
||
num_layers: 层数
|
||
dropout: Dropout比例
|
||
use_optimized: 是否使用优化版KAN模型(仅对model_type='kan'有效)
|
||
"""
|
||
if self.data is None:
|
||
print("No data available. Please load or generate data first.")
|
||
return None
|
||
|
||
# 获取特定产品数据
|
||
product_data = self.data[self.data['product_id'] == product_id].copy()
|
||
if product_data.empty:
|
||
print(f"No data found for product {product_id}")
|
||
return None
|
||
|
||
# 准备数据
|
||
X, y, X_train, X_val, y_train, y_val, scaler_X, scaler_y = prepare_data(
|
||
product_data, sequence_length, forecast_horizon
|
||
)
|
||
|
||
# 准备序列数据
|
||
train_loader = prepare_sequences(X_train, y_train, batch_size)
|
||
val_loader = prepare_sequences(X_val, y_val, batch_size)
|
||
|
||
input_dim = X.shape[2]
|
||
output_dim = y.shape[1]
|
||
|
||
# 选择模型
|
||
if model_type == 'transformer':
|
||
model = TimeSeriesTransformer(
|
||
input_dim=input_dim,
|
||
hidden_dim=hidden_size,
|
||
output_dim=output_dim,
|
||
num_layers=num_layers,
|
||
dropout=dropout
|
||
).to(self.device)
|
||
print("Using Transformer model")
|
||
elif model_type == 'slstm':
|
||
model = ScalarLSTM(
|
||
input_dim=input_dim,
|
||
hidden_dim=hidden_size,
|
||
output_dim=output_dim,
|
||
num_layers=num_layers,
|
||
dropout=dropout
|
||
).to(self.device)
|
||
print("Using Scalar LSTM model")
|
||
elif model_type == 'mlstm':
|
||
model = MatrixLSTM(
|
||
input_dim=input_dim,
|
||
hidden_dim=hidden_size,
|
||
matrix_dim=8, # 矩阵维度
|
||
output_dim=output_dim,
|
||
num_layers=num_layers,
|
||
dropout=dropout
|
||
).to(self.device)
|
||
print("Using Matrix LSTM model")
|
||
elif model_type == 'kan':
|
||
if use_optimized:
|
||
model = OptimizedKANForecaster(
|
||
input_features=input_dim,
|
||
hidden_sizes=[hidden_size, hidden_size*2, hidden_size],
|
||
output_sequence_length=output_dim,
|
||
grid_size=5,
|
||
spline_order=3,
|
||
dropout_rate=dropout
|
||
).to(self.device)
|
||
print("Using Optimized KAN Forecaster model")
|
||
else:
|
||
model = KANForecaster(
|
||
input_features=input_dim,
|
||
hidden_sizes=[hidden_size, hidden_size*2, hidden_size],
|
||
output_sequence_length=output_dim,
|
||
grid_size=5,
|
||
spline_order=3,
|
||
dropout_rate=dropout
|
||
).to(self.device)
|
||
print("Using KAN Forecaster model")
|
||
else:
|
||
raise ValueError(f"Unknown model type: {model_type}")
|
||
|
||
# 优化器和损失函数
|
||
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
|
||
criterion = nn.MSELoss()
|
||
|
||
# 训练模型
|
||
best_val_loss = float('inf')
|
||
best_model = None
|
||
patience = 10
|
||
counter = 0
|
||
|
||
train_losses = []
|
||
val_losses = []
|
||
|
||
start_time = time.time()
|
||
|
||
for epoch in range(epochs):
|
||
model.train()
|
||
train_loss = 0
|
||
|
||
with tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}") as pbar:
|
||
for X_batch, y_batch in pbar:
|
||
X_batch = X_batch.to(self.device)
|
||
y_batch = y_batch.to(self.device)
|
||
|
||
optimizer.zero_grad()
|
||
|
||
# 前向传播
|
||
if model_type == 'kan':
|
||
outputs = model(X_batch)
|
||
# 添加KAN的正则化损失
|
||
reg_loss = model.regularization_loss() * 0.01
|
||
loss = criterion(outputs, y_batch) + reg_loss
|
||
else:
|
||
outputs = model(X_batch)
|
||
loss = criterion(outputs, y_batch)
|
||
|
||
# 反向传播和优化
|
||
loss.backward()
|
||
optimizer.step()
|
||
|
||
train_loss += loss.item()
|
||
pbar.set_postfix({'loss': loss.item()})
|
||
|
||
train_loss /= len(train_loader)
|
||
train_losses.append(train_loss)
|
||
|
||
# 验证
|
||
model.eval()
|
||
val_loss = 0
|
||
with torch.no_grad():
|
||
for X_batch, y_batch in val_loader:
|
||
X_batch = X_batch.to(self.device)
|
||
y_batch = y_batch.to(self.device)
|
||
|
||
outputs = model(X_batch)
|
||
loss = criterion(outputs, y_batch)
|
||
val_loss += loss.item()
|
||
|
||
val_loss /= len(val_loader)
|
||
val_losses.append(val_loss)
|
||
|
||
print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
|
||
|
||
# 早停
|
||
if val_loss < best_val_loss:
|
||
best_val_loss = val_loss
|
||
best_model = model.state_dict()
|
||
counter = 0
|
||
else:
|
||
counter += 1
|
||
if counter >= patience:
|
||
print(f"Early stopping at epoch {epoch+1}")
|
||
break
|
||
|
||
training_time = time.time() - start_time
|
||
print(f"Training completed in {training_time:.2f} seconds")
|
||
|
||
# 加载最佳模型
|
||
model.load_state_dict(best_model)
|
||
|
||
# 评估模型
|
||
model.eval()
|
||
with torch.no_grad():
|
||
X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(self.device)
|
||
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(self.device)
|
||
|
||
y_pred = model(X_val_tensor).cpu().numpy()
|
||
y_val_np = y_val_tensor.cpu().numpy()
|
||
|
||
# 反归一化
|
||
y_pred = scaler_y.inverse_transform(y_pred)
|
||
y_val_np = scaler_y.inverse_transform(y_val_np)
|
||
|
||
# 计算评估指标
|
||
mse = mean_squared_error(y_val_np, y_pred)
|
||
rmse = np.sqrt(mse)
|
||
mae = mean_absolute_error(y_val_np, y_pred)
|
||
r2 = r2_score(y_val_np.flatten(), y_pred.flatten())
|
||
|
||
# 计算MAPE
|
||
mask = y_val_np != 0 # 避免除以零
|
||
mape = np.mean(np.abs((y_val_np[mask] - y_pred[mask]) / y_val_np[mask])) * 100
|
||
|
||
print(f"MSE: {mse:.4f}")
|
||
print(f"RMSE: {rmse:.4f}")
|
||
print(f"MAE: {mae:.4f}")
|
||
print(f"R²: {r2:.4f}")
|
||
print(f"MAPE: {mape:.2f}%")
|
||
|
||
# 保存模型和指标
|
||
model_suffix = "_optimized" if (model_type == 'kan' and use_optimized) else ""
|
||
model_path = os.path.join(self.model_dir, f"{product_id}_{model_type}{model_suffix}.pt")
|
||
|
||
# 保存模型
|
||
torch.save({
|
||
'model_state_dict': model.state_dict(),
|
||
'optimizer_state_dict': optimizer.state_dict(),
|
||
'scaler_X': scaler_X,
|
||
'scaler_y': scaler_y,
|
||
'metrics': {
|
||
'mse': mse,
|
||
'rmse': rmse,
|
||
'mae': mae,
|
||
'r2': r2,
|
||
'mape': mape
|
||
},
|
||
'params': {
|
||
'model_type': model_type,
|
||
'use_optimized': use_optimized if model_type == 'kan' else False,
|
||
'sequence_length': sequence_length,
|
||
'forecast_horizon': forecast_horizon,
|
||
'hidden_size': hidden_size,
|
||
'num_layers': num_layers,
|
||
'dropout': dropout
|
||
},
|
||
'training_time': training_time,
|
||
'train_losses': train_losses,
|
||
'val_losses': val_losses
|
||
}, model_path)
|
||
|
||
print(f"Model saved to {model_path}")
|
||
|
||
# 绘制损失曲线
|
||
plt.figure(figsize=(10, 5))
|
||
plt.plot(train_losses, label='Train Loss')
|
||
plt.plot(val_losses, label='Validation Loss')
|
||
plt.xlabel('Epoch')
|
||
plt.ylabel('Loss')
|
||
plt.title(f'Training and Validation Loss for {product_id} ({model_type}{model_suffix})')
|
||
plt.legend()
|
||
plt.grid(True)
|
||
|
||
# 保存损失曲线图
|
||
loss_plot_path = os.path.join(self.model_dir, f"{product_id}_{model_type}{model_suffix}_loss.png")
|
||
plt.savefig(loss_plot_path)
|
||
plt.close()
|
||
|
||
# 绘制预测结果
|
||
plt.figure(figsize=(12, 6))
|
||
|
||
# 选择最后30个点进行可视化
|
||
n_points = min(30, len(y_val_np))
|
||
x_axis = np.arange(n_points)
|
||
|
||
for i in range(min(3, y_val_np.shape[1])):
|
||
plt.subplot(min(3, y_val_np.shape[1]), 1, i+1)
|
||
plt.plot(x_axis, y_val_np[-n_points:, i], 'b-', label=f'Actual Day {i+1}')
|
||
plt.plot(x_axis, y_pred[-n_points:, i], 'r--', label=f'Predicted Day {i+1}')
|
||
plt.legend()
|
||
plt.grid(True)
|
||
|
||
plt.tight_layout()
|
||
plt.suptitle(f'Prediction Results for {product_id} ({model_type}{model_suffix})')
|
||
plt.subplots_adjust(top=0.9)
|
||
|
||
# 保存预测结果图
|
||
pred_plot_path = os.path.join(self.model_dir, f"{product_id}_{model_type}{model_suffix}_pred.png")
|
||
plt.savefig(pred_plot_path)
|
||
plt.close()
|
||
|
||
# 返回评估指标
|
||
return {
|
||
'mse': mse,
|
||
'rmse': rmse,
|
||
'mae': mae,
|
||
'r2': r2,
|
||
'mape': mape,
|
||
'training_time': training_time
|
||
}
|
||
|
||
# ... 保留其他方法 ...
|
||
|
||
# 添加一个新方法,专门用于使用优化版KAN模型进行训练
|
||
def train_optimized_kan_model(self, product_id, epochs=100, batch_size=32,
|
||
learning_rate=0.001, sequence_length=30, forecast_horizon=7,
|
||
hidden_size=64, num_layers=2, dropout=0.1):
|
||
"""
|
||
使用优化版KAN模型进行训练
|
||
|
||
参数:
|
||
product_id: 产品ID
|
||
epochs: 训练轮数
|
||
batch_size: 批次大小
|
||
learning_rate: 学习率
|
||
sequence_length: 输入序列长度
|
||
forecast_horizon: 预测天数
|
||
hidden_size: 隐藏层大小
|
||
num_layers: 层数
|
||
dropout: Dropout比例
|
||
"""
|
||
return self.train_model(
|
||
product_id=product_id,
|
||
model_type='kan',
|
||
epochs=epochs,
|
||
batch_size=batch_size,
|
||
learning_rate=learning_rate,
|
||
sequence_length=sequence_length,
|
||
forecast_horizon=forecast_horizon,
|
||
hidden_size=hidden_size,
|
||
num_layers=num_layers,
|
||
dropout=dropout,
|
||
use_optimized=True # 使用优化版KAN模型
|
||
)
|
||
|
||
# 添加比较原始KAN和优化KAN的方法
|
||
def compare_kan_models(self, product_id, epochs=100, batch_size=32,
|
||
learning_rate=0.001, sequence_length=30, forecast_horizon=7,
|
||
hidden_size=64, num_layers=2, dropout=0.1):
|
||
"""
|
||
比较原始KAN模型和优化版KAN模型的性能
|
||
|
||
参数:
|
||
product_id: 产品ID
|
||
epochs: 训练轮数
|
||
batch_size: 批次大小
|
||
learning_rate: 学习率
|
||
sequence_length: 输入序列长度
|
||
forecast_horizon: 预测天数
|
||
hidden_size: 隐藏层大小
|
||
num_layers: 层数
|
||
dropout: Dropout比例
|
||
"""
|
||
print("Training original KAN model...")
|
||
original_metrics = self.train_model(
|
||
product_id=product_id,
|
||
model_type='kan',
|
||
epochs=epochs,
|
||
batch_size=batch_size,
|
||
learning_rate=learning_rate,
|
||
sequence_length=sequence_length,
|
||
forecast_horizon=forecast_horizon,
|
||
hidden_size=hidden_size,
|
||
num_layers=num_layers,
|
||
dropout=dropout,
|
||
use_optimized=False
|
||
)
|
||
|
||
print("\nTraining optimized KAN model...")
|
||
optimized_metrics = self.train_model(
|
||
product_id=product_id,
|
||
model_type='kan',
|
||
epochs=epochs,
|
||
batch_size=batch_size,
|
||
learning_rate=learning_rate,
|
||
sequence_length=sequence_length,
|
||
forecast_horizon=forecast_horizon,
|
||
hidden_size=hidden_size,
|
||
num_layers=num_layers,
|
||
dropout=dropout,
|
||
use_optimized=True
|
||
)
|
||
|
||
# 计算性能提升百分比
|
||
improvement = {
|
||
'mse': (original_metrics['mse'] - optimized_metrics['mse']) / original_metrics['mse'] * 100,
|
||
'rmse': (original_metrics['rmse'] - optimized_metrics['rmse']) / original_metrics['rmse'] * 100,
|
||
'mae': (original_metrics['mae'] - optimized_metrics['mae']) / original_metrics['mae'] * 100,
|
||
'r2': (optimized_metrics['r2'] - original_metrics['r2']) / abs(original_metrics['r2']) * 100 if original_metrics['r2'] != 0 else float('inf'),
|
||
'mape': (original_metrics['mape'] - optimized_metrics['mape']) / original_metrics['mape'] * 100,
|
||
'training_time': (original_metrics['training_time'] - optimized_metrics['training_time']) / original_metrics['training_time'] * 100
|
||
}
|
||
|
||
# 打印比较结果
|
||
print("\n===== KAN Models Comparison =====")
|
||
print(f"Product ID: {product_id}")
|
||
print("\nMetrics:")
|
||
print(f"{'Metric':<15} {'Original KAN':<15} {'Optimized KAN':<15} {'Improvement':<15}")
|
||
print("-" * 60)
|
||
print(f"{'MSE':<15} {original_metrics['mse']:<15.4f} {optimized_metrics['mse']:<15.4f} {improvement['mse']:<15.2f}%")
|
||
print(f"{'RMSE':<15} {original_metrics['rmse']:<15.4f} {optimized_metrics['rmse']:<15.4f} {improvement['rmse']:<15.2f}%")
|
||
print(f"{'MAE':<15} {original_metrics['mae']:<15.4f} {optimized_metrics['mae']:<15.4f} {improvement['mae']:<15.2f}%")
|
||
print(f"{'R²':<15} {original_metrics['r2']:<15.4f} {optimized_metrics['r2']:<15.4f} {improvement['r2']:<15.2f}%")
|
||
print(f"{'MAPE':<15} {original_metrics['mape']:<15.2f}% {optimized_metrics['mape']:<15.2f}% {improvement['mape']:<15.2f}%")
|
||
print(f"{'Training Time':<15} {original_metrics['training_time']:<15.2f}s {optimized_metrics['training_time']:<15.2f}s {improvement['training_time']:<15.2f}%")
|
||
|
||
# 保存比较结果
|
||
comparison_path = os.path.join(self.model_dir, f"{product_id}_kan_comparison.json")
|
||
comparison_results = {
|
||
'product_id': product_id,
|
||
'original_kan': original_metrics,
|
||
'optimized_kan': optimized_metrics,
|
||
'improvement': improvement,
|
||
'params': {
|
||
'epochs': epochs,
|
||
'batch_size': batch_size,
|
||
'learning_rate': learning_rate,
|
||
'sequence_length': sequence_length,
|
||
'forecast_horizon': forecast_horizon,
|
||
'hidden_size': hidden_size,
|
||
'num_layers': num_layers,
|
||
'dropout': dropout
|
||
}
|
||
}
|
||
|
||
with open(comparison_path, 'w') as f:
|
||
json.dump(comparison_results, f, indent=4)
|
||
|
||
print(f"\nComparison results saved to {comparison_path}")
|
||
|
||
# 绘制比较图表
|
||
self._plot_model_comparison(product_id, original_metrics, optimized_metrics)
|
||
|
||
return comparison_results
|
||
|
||
def _plot_model_comparison(self, product_id, original_metrics, optimized_metrics):
|
||
"""
|
||
绘制原始KAN和优化KAN的性能比较图表
|
||
"""
|
||
metrics = ['mse', 'rmse', 'mae', 'r2', 'mape']
|
||
metric_names = ['MSE', 'RMSE', 'MAE', 'R²', 'MAPE (%)']
|
||
|
||
# 准备数据
|
||
original_values = [original_metrics[m] for m in metrics]
|
||
optimized_values = [optimized_metrics[m] for m in metrics]
|
||
|
||
# 绘制条形图
|
||
plt.figure(figsize=(14, 8))
|
||
x = np.arange(len(metrics))
|
||
width = 0.35
|
||
|
||
plt.bar(x - width/2, original_values, width, label='Original KAN')
|
||
plt.bar(x + width/2, optimized_values, width, label='Optimized KAN')
|
||
|
||
plt.xlabel('Metrics')
|
||
plt.ylabel('Values')
|
||
plt.title(f'KAN Models Performance Comparison for {product_id}')
|
||
plt.xticks(x, metric_names)
|
||
plt.legend()
|
||
plt.grid(True, axis='y', linestyle='--', alpha=0.7)
|
||
|
||
# 添加数值标签
|
||
for i, v in enumerate(original_values):
|
||
plt.text(i - width/2, v + 0.01, f'{v:.4f}', ha='center', va='bottom', fontsize=9)
|
||
|
||
for i, v in enumerate(optimized_values):
|
||
plt.text(i + width/2, v + 0.01, f'{v:.4f}', ha='center', va='bottom', fontsize=9)
|
||
|
||
# 保存图表
|
||
comparison_plot_path = os.path.join(self.model_dir, f"{product_id}_kan_comparison.png")
|
||
plt.savefig(comparison_plot_path)
|
||
plt.close()
|
||
|
||
# 绘制训练时间比较
|
||
plt.figure(figsize=(10, 6))
|
||
models = ['Original KAN', 'Optimized KAN']
|
||
times = [original_metrics['training_time'], optimized_metrics['training_time']]
|
||
|
||
plt.bar(models, times, color=['blue', 'green'])
|
||
plt.xlabel('Model')
|
||
plt.ylabel('Training Time (seconds)')
|
||
plt.title(f'Training Time Comparison for {product_id}')
|
||
plt.grid(True, axis='y', linestyle='--', alpha=0.7)
|
||
|
||
# 添加数值标签
|
||
for i, v in enumerate(times):
|
||
plt.text(i, v + 0.1, f'{v:.2f}s', ha='center', va='bottom')
|
||
|
||
# 保存图表
|
||
time_plot_path = os.path.join(self.model_dir, f"{product_id}_kan_training_time.png")
|
||
plt.savefig(time_plot_path)
|
||
plt.close()
|
||
|
||
print(f"Comparison plots saved to {comparison_plot_path} and {time_plot_path}")
|
||
|
||
# 预测结果分析模块
|
||
def analyze_prediction_result(product_id, model_type, predictions, features=None):
|
||
"""
|
||
分析预测结果,提供解释和洞察
|
||
|
||
Args:
|
||
product_id: 产品ID
|
||
model_type: 模型类型
|
||
predictions: 预测结果
|
||
features: 预测使用的特征(可选)
|
||
|
||
Returns:
|
||
analysis: 预测分析结果字典
|
||
"""
|
||
print(f"分析产品 {product_id} 的 {model_type} 模型预测结果...")
|
||
|
||
# 加载历史数据
|
||
df = pd.read_excel('pharmacy_sales.xlsx')
|
||
product_df = df[df['product_id'] == product_id].sort_values('date')
|
||
|
||
# 准备分析结果
|
||
analysis = {
|
||
'trend': analyze_trend(predictions),
|
||
'statistics': calculate_prediction_statistics(predictions),
|
||
'historical_comparison': compare_with_historical(predictions, product_df),
|
||
'factors': analyze_influencing_factors(product_id, model_type, predictions, features)
|
||
}
|
||
|
||
# 生成文本解释
|
||
analysis['explanation'] = generate_prediction_explanation(analysis, product_id, model_type)
|
||
|
||
return analysis
|
||
|
||
def analyze_trend(predictions):
|
||
"""分析预测趋势"""
|
||
if len(predictions) < 2:
|
||
return 'unknown'
|
||
|
||
# 计算趋势线斜率
|
||
x = np.arange(len(predictions))
|
||
slope, _, _, _, _ = stats.linregress(x, predictions)
|
||
|
||
# 判断趋势
|
||
if slope > 0.05:
|
||
return 'increasing'
|
||
elif slope < -0.05:
|
||
return 'decreasing'
|
||
else:
|
||
# 检查波动性
|
||
std_dev = np.std(predictions)
|
||
mean_val = np.mean(predictions)
|
||
cv = std_dev / mean_val if mean_val != 0 else 0
|
||
|
||
if cv > 0.1: # 变异系数大于10%
|
||
return 'fluctuating'
|
||
else:
|
||
return 'stable'
|
||
|
||
def calculate_prediction_statistics(predictions):
|
||
"""计算预测统计数据"""
|
||
return {
|
||
'mean': float(np.mean(predictions)),
|
||
'median': float(np.median(predictions)),
|
||
'min': float(np.min(predictions)),
|
||
'max': float(np.max(predictions)),
|
||
'std': float(np.std(predictions)),
|
||
'day_over_day_changes': calculate_day_over_day_changes(predictions)
|
||
}
|
||
|
||
def calculate_day_over_day_changes(predictions):
|
||
"""计算日环比变化"""
|
||
changes = []
|
||
for i in range(1, len(predictions)):
|
||
if predictions[i-1] != 0:
|
||
pct_change = (predictions[i] - predictions[i-1]) / predictions[i-1] * 100
|
||
else:
|
||
pct_change = 0
|
||
changes.append(float(pct_change))
|
||
return changes
|
||
|
||
def compare_with_historical(predictions, product_df):
|
||
"""与历史数据比较"""
|
||
historical_sales = product_df['sales'].values[-len(predictions):] if len(product_df) >= len(predictions) else []
|
||
|
||
result = {
|
||
'has_historical_data': len(historical_sales) > 0
|
||
}
|
||
|
||
if result['has_historical_data']:
|
||
# 计算同期历史数据的统计信息
|
||
result['historical_mean'] = float(np.mean(historical_sales))
|
||
result['historical_median'] = float(np.median(historical_sales))
|
||
result['historical_min'] = float(np.min(historical_sales))
|
||
result['historical_max'] = float(np.max(historical_sales))
|
||
|
||
# 计算预测值与历史值的差异
|
||
if result['historical_mean'] != 0:
|
||
result['mean_difference_pct'] = float((np.mean(predictions) - result['historical_mean']) / result['historical_mean'] * 100)
|
||
else:
|
||
result['mean_difference_pct'] = 0
|
||
|
||
return result
|
||
|
||
def analyze_influencing_factors(product_id, model_type, predictions, features=None):
|
||
"""分析影响预测的因素"""
|
||
# 这里是简化实现,实际应用中可以基于模型特性提取更详细的影响因素
|
||
factors = []
|
||
|
||
# 基于模型类型添加通用因素
|
||
if model_type == 'transformer':
|
||
factors.append({
|
||
'name': '时间序列模式',
|
||
'importance': 'high',
|
||
'description': 'Transformer模型擅长捕捉时间序列中的长期依赖关系和复杂模式'
|
||
})
|
||
elif model_type == 'mlstm':
|
||
factors.append({
|
||
'name': '序列记忆',
|
||
'importance': 'high',
|
||
'description': '矩阵LSTM模型具有强大的序列记忆能力,能够捕捉时间序列中的长期依赖'
|
||
})
|
||
elif 'kan' in model_type:
|
||
factors.append({
|
||
'name': '非线性关系',
|
||
'importance': 'high',
|
||
'description': 'KAN模型擅长学习复杂的非线性映射关系,适合捕捉销售数据中的复杂模式'
|
||
})
|
||
|
||
# 添加通用影响因素
|
||
factors.extend([
|
||
{
|
||
'name': '季节性模式',
|
||
'importance': 'medium',
|
||
'description': '销售数据通常表现出季节性波动,如周末销量高于工作日'
|
||
},
|
||
{
|
||
'name': '历史趋势',
|
||
'importance': 'high',
|
||
'description': '历史销售趋势是预测的重要基础'
|
||
}
|
||
])
|
||
|
||
# 如果提供了特征,可以进行更详细的分析
|
||
if features is not None and isinstance(features, dict):
|
||
if 'is_promotion' in features and features['is_promotion']:
|
||
factors.append({
|
||
'name': '促销活动',
|
||
'importance': 'high',
|
||
'description': '促销活动通常会显著提升销量'
|
||
})
|
||
|
||
if 'is_holiday' in features and features['is_holiday']:
|
||
factors.append({
|
||
'name': '节假日',
|
||
'importance': 'medium',
|
||
'description': '节假日期间销售模式可能与平日不同'
|
||
})
|
||
|
||
if 'temperature' in features:
|
||
factors.append({
|
||
'name': '温度因素',
|
||
'importance': 'medium',
|
||
'description': '天气温度可能影响客流量和特定药品需求'
|
||
})
|
||
|
||
return factors
|
||
|
||
def generate_prediction_explanation(analysis, product_id, model_type):
|
||
"""生成预测解释文本"""
|
||
explanation = []
|
||
|
||
# 添加趋势解释
|
||
trend = analysis['trend']
|
||
if trend == 'increasing':
|
||
explanation.append(f"预测显示产品 {product_id} 未来销量呈上升趋势,平均日环比增长 {np.mean(analysis['statistics']['day_over_day_changes']):.2f}%。")
|
||
elif trend == 'decreasing':
|
||
explanation.append(f"预测显示产品 {product_id} 未来销量呈下降趋势,平均日环比下降 {abs(np.mean(analysis['statistics']['day_over_day_changes'])):.2f}%。")
|
||
elif trend == 'fluctuating':
|
||
explanation.append(f"预测显示产品 {product_id} 未来销量有波动,但无明显上升或下降趋势。")
|
||
else:
|
||
explanation.append(f"预测显示产品 {product_id} 未来销量相对稳定。")
|
||
|
||
# 添加峰值和低谷解释
|
||
stats = analysis['statistics']
|
||
explanation.append(f"预测期内最高销量为 {stats['max']:.2f},最低销量为 {stats['min']:.2f},平均销量为 {stats['mean']:.2f}。")
|
||
|
||
# 添加与历史数据比较
|
||
hist_comp = analysis['historical_comparison']
|
||
if hist_comp['has_historical_data']:
|
||
if hist_comp['mean_difference_pct'] > 10:
|
||
explanation.append(f"预测销量比历史同期高 {hist_comp['mean_difference_pct']:.2f}%,可能需要增加库存。")
|
||
elif hist_comp['mean_difference_pct'] < -10:
|
||
explanation.append(f"预测销量比历史同期低 {abs(hist_comp['mean_difference_pct']):.2f}%,建议适当减少库存。")
|
||
else:
|
||
explanation.append("预测销量与历史同期相近,可参考历史库存水平。")
|
||
|
||
# 添加影响因素解释
|
||
factors = analysis['factors']
|
||
if factors:
|
||
high_importance_factors = [f for f in factors if f['importance'] == 'high']
|
||
if high_importance_factors:
|
||
factor_names = [f['name'] for f in high_importance_factors[:2]]
|
||
explanation.append(f"主要影响因素包括: {', '.join(factor_names)}。")
|
||
|
||
# 添加模型特性解释
|
||
if model_type == 'transformer':
|
||
explanation.append("Transformer模型善于捕捉时间序列中的长期依赖关系,预测结果更注重整体趋势。")
|
||
elif model_type == 'mlstm':
|
||
explanation.append("矩阵LSTM模型结合了序列记忆和注意力机制,对时间序列中的突变点有较好的适应性。")
|
||
elif model_type == 'kan':
|
||
explanation.append("KAN模型利用B样条基函数自适应学习非线性关系,对复杂非线性模式有较强的表达能力。")
|
||
elif model_type == 'optimized_kan':
|
||
explanation.append("优化版KAN模型在保持预测精度的同时,显著降低了内存占用并提高了训练速度。")
|
||
|
||
return " ".join(explanation)
|
||
|
||
if __name__ == "__main__":
|
||
# 首先生成测试数据
|
||
try:
|
||
print("正在检查是否存在模拟数据...")
|
||
df = pd.read_excel('pharmacy_sales.xlsx')
|
||
print("发现现有数据,跳过数据生成步骤。")
|
||
except:
|
||
print("未找到数据,正在生成模拟数据...")
|
||
import generate_pharmacy_data
|
||
print("数据生成完成!")
|
||
|
||
# 读取数据获取所有产品ID
|
||
df = pd.read_excel('pharmacy_sales.xlsx')
|
||
product_ids = df['product_id'].unique()
|
||
|
||
# 为每个产品训练一个模型
|
||
all_metrics = {}
|
||
for product_id in product_ids:
|
||
print(f"\n{'='*50}")
|
||
print(f"开始训练产品 {product_id} 的模型")
|
||
print(f"{'='*50}")
|
||
|
||
_, metrics = train_product_model(product_id, epochs=epochs)
|
||
all_metrics[product_id] = metrics
|
||
|
||
# 输出所有产品的评估指标
|
||
print("\n所有产品模型评估结果汇总:")
|
||
for product_id, metrics in all_metrics.items():
|
||
product_name = df[df['product_id'] == product_id]['product_name'].iloc[0]
|
||
print(f"\n{product_name} (ID: {product_id}):")
|
||
for metric, value in metrics.items():
|
||
print(f" {metric}: {value:.4f}")
|
||
|
||
print("\n模型训练和评估完成!") |