ShopTRAINING/pharmacy_predictor.py

1495 lines
58 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import matplotlib
# 设置matplotlib后端为Agg适用于无头服务器环境
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import os
from datetime import datetime
import json
import torch.serialization
# 添加tqdm的安全导入
try:
from tqdm import tqdm
TQDM_AVAILABLE = True
except ImportError:
# 如果没有tqdm创建一个简单的替代品
TQDM_AVAILABLE = False
def tqdm(iterable, **kwargs):
# 简单的进度指示器
total = len(iterable) if hasattr(iterable, '__len__') else None
if total and kwargs.get('desc'):
print(f"{kwargs.get('desc')} - 共 {total}")
return iterable
# 添加MinMaxScaler到安全全局列表
torch.serialization.add_safe_globals(['sklearn.preprocessing._data.MinMaxScaler'])
# 设置随机种子以便结果可重现
# 导入模型模块
from models.data_utils import PharmacyDataset, create_dataset, evaluate_model
from models.transformer_model import TimeSeriesTransformer
from models.mlstm_model import MLSTMTransformer
from models.kan_model import KANForecaster
from models.utils import get_device, to_device, DeviceDataLoader
# 解决画图中文显示问题
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 获取设备GPU或CPU
device = get_device()
print(f"使用设备: {device}")
# 全局参数设置
look_back = 14 # 使用过去14天数据
T = 7 # 预测未来7天销量
epochs = 50 # 训练轮次
num_features = 8 # 输入特征数
embed_dim = 32 # 嵌入维度
dense_dim = 32 # 隐藏层神经元数
num_heads = 4 # 注意力头数
dropout_rate = 0.1 # 丢弃率
num_blocks = 3 # 编码器解码器数
learn_rate = 0.001 # 学习率
batch_size = 32 # 批大小
# 默认训练函数 - 使用mLSTM作为默认模型
def train_product_model(product_id, epochs=50):
"""
默认的产品销售预测模型训练函数使用mLSTM作为默认模型
Args:
product_id: 产品ID
epochs: 训练轮次
Returns:
model: 训练好的模型
metrics: 模型评估指标
"""
return train_product_model_with_mlstm(product_id, epochs)
# 使用mLSTM模型训练的函数
def train_product_model_with_mlstm(product_id, epochs=50):
# 读取生成的药店销售数据
df = pd.read_excel('pharmacy_sales.xlsx')
# 筛选特定产品数据
product_df = df[df['product_id'] == product_id].sort_values('date')
product_name = product_df['product_name'].iloc[0]
print(f"使用mLSTM模型训练产品 '{product_name}' (ID: {product_id}) 的销售预测模型")
print(f"使用设备: {device}")
# 创建特征和目标变量
features = ['sales', 'price', 'weekday', 'month', 'is_holiday', 'is_weekend', 'is_promotion', 'temperature']
# 预处理数据
X = product_df[features].values
y = product_df[['sales']].values # 保持为二维数组
# 归一化数据
scaler_X = MinMaxScaler(feature_range=(0, 1))
scaler_y = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)
# 划分训练集和测试集80% 训练20% 测试)
train_size = int(len(X_scaled) * 0.8)
X_train, X_test = X_scaled[:train_size], X_scaled[train_size:]
y_train, y_test = y_scaled[:train_size], y_scaled[train_size:]
# 创建时间序列数据
trainX, trainY = create_dataset(X_train, y_train, look_back, T)
testX, testY = create_dataset(X_test, y_test, look_back, T)
# 转换为PyTorch的Tensor
trainX_tensor = torch.Tensor(trainX)
trainY_tensor = torch.Tensor(trainY)
testX_tensor = torch.Tensor(testX)
testY_tensor = torch.Tensor(testY)
# 创建数据加载器
train_dataset = PharmacyDataset(trainX_tensor, trainY_tensor)
test_dataset = PharmacyDataset(testX_tensor, testY_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
# 将数据加载器包装到设备加载器中
train_loader = DeviceDataLoader(train_loader, device)
test_loader = DeviceDataLoader(test_loader, device)
# 初始化mLSTM结合Transformer模型
model = MLSTMTransformer(
num_features=num_features,
hidden_size=128,
mlstm_layers=1,
embed_dim=embed_dim,
dense_dim=dense_dim,
num_heads=num_heads,
dropout_rate=dropout_rate,
num_blocks=num_blocks,
output_sequence_length=T
)
# 将模型移动到设备上
model = model.to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learn_rate)
# 训练模型
train_losses = []
test_losses = []
for epoch in range(epochs):
model.train()
epoch_loss = 0
for X_batch, y_batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}", leave=False):
# 前向传播
outputs = model(X_batch)
loss = criterion(outputs.squeeze(-1), y_batch)
# 反向传播和优化
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_loss += loss.item()
# 计算训练损失
train_loss = epoch_loss / len(train_loader)
train_losses.append(train_loss)
# 在测试集上评估
model.eval()
test_loss = 0
with torch.no_grad():
for X_batch, y_batch in test_loader:
outputs = model(X_batch)
loss = criterion(outputs.squeeze(-1), y_batch)
test_loss += loss.item()
test_loss = test_loss / len(test_loader)
test_losses.append(test_loss)
if (epoch + 1) % 10 == 0:
print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}")
# 绘制损失曲线
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='训练损失')
plt.plot(test_losses, label='测试损失')
plt.title(f'{product_name} - mLSTM模型训练和测试损失')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.savefig(f'{product_id}_mlstm_loss_curve.png')
# 生成预测
model.eval()
with torch.no_grad():
# 将测试数据移动到设备上
testX_device = to_device(testX_tensor, device)
y_pred_scaled = model(testX_device).squeeze(-1).cpu().numpy()
# 反归一化预测结果
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).reshape(y_pred_scaled.shape)
y_true = scaler_y.inverse_transform(testY.reshape(-1, 1)).reshape(testY.shape)
# 评估模型
metrics = evaluate_model(y_true.flatten(), y_pred.flatten())
print(f"\n{product_name} mLSTM模型评估指标:")
for metric, value in metrics.items():
print(f"{metric}: {value:.4f}")
# 绘制预测结果
plt.figure(figsize=(12, 6))
# 获取测试集的实际日期
test_dates = product_df['date'].iloc[train_size + look_back:train_size + look_back + len(y_true)].values
# 只绘制最后30天的预测
days_to_plot = min(30, len(y_true))
start_idx = max(0, len(y_true) - days_to_plot)
plt.plot(test_dates[start_idx:], y_true[start_idx:, 0], 'b-', label='实际销量')
plt.plot(test_dates[start_idx:], y_pred[start_idx:, 0], 'r--', label='mLSTM预测销量')
plt.title(f'{product_name} - mLSTM销量预测结果')
plt.xlabel('日期')
plt.ylabel('销量')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(f'{product_id}_mlstm_prediction.png')
# 保存预测结果到CSV
all_dates = []
num_samples = len(y_true)
for i in range(num_samples):
start_idx = train_size + i + look_back
dates = product_df['date'].iloc[start_idx : start_idx + T]
all_dates.extend(dates)
# 修正日期长度不匹配的问题
flat_y_true = y_true.flatten()
flat_y_pred = y_pred.flatten()
min_len = min(len(all_dates), len(flat_y_true))
results_df = pd.DataFrame({
'date': all_dates[:min_len],
'actual_sales': flat_y_true[:min_len],
'predicted_sales': flat_y_pred[:min_len]
})
results_df.to_csv(f'{product_id}_mlstm_prediction_results.csv', index=False)
print(f"\nmLSTM预测结果已保存到 {product_id}_mlstm_prediction_results.csv")
# 创建models目录和子目录
model_dir = 'models/mlstm'
os.makedirs(model_dir, exist_ok=True)
# 保存模型
model_path = os.path.join(model_dir, f'{product_id}_model.pt')
torch.save({
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'train_loss': train_losses,
'test_loss': test_losses,
'scaler_X': scaler_X,
'scaler_y': scaler_y,
'features': features,
'look_back': look_back,
'T': T,
'model_type': 'mlstm'
}, model_path)
print(f"模型已成功保存到 {model_path}")
# 保存日志文件
log_path = os.path.join(model_dir, f'{product_id}_log.json')
log_data = {
'product_id': product_id,
'product_name': product_name,
'model_type': 'mlstm',
'training_completed_at': datetime.now().isoformat(),
'epochs': epochs,
'metrics': metrics,
'file_path': model_path
}
with open(log_path, 'w', encoding='utf-8') as f:
json.dump(log_data, f, indent=4, ensure_ascii=False)
print(f"训练日志已保存到 {log_path}")
return model, metrics
# 使用KAN模型训练的函数
def train_product_model_with_kan(product_id, epochs=50):
# 读取生成的药店销售数据
df = pd.read_excel('pharmacy_sales.xlsx')
# 筛选特定产品数据
product_df = df[df['product_id'] == product_id].sort_values('date')
product_name = product_df['product_name'].iloc[0]
print(f"使用KAN模型训练产品 '{product_name}' (ID: {product_id}) 的销售预测模型")
print(f"使用设备: {device}")
# 创建特征和目标变量
features = ['sales', 'price', 'weekday', 'month', 'is_holiday', 'is_weekend', 'is_promotion', 'temperature']
# 预处理数据
X = product_df[features].values
y = product_df[['sales']].values # 保持为二维数组
# 归一化数据
scaler_X = MinMaxScaler(feature_range=(0, 1))
scaler_y = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)
# 划分训练集和测试集80% 训练20% 测试)
train_size = int(len(X_scaled) * 0.8)
X_train, X_test = X_scaled[:train_size], X_scaled[train_size:]
y_train, y_test = y_scaled[:train_size], y_scaled[train_size:]
# 创建时间序列数据
trainX, trainY = create_dataset(X_train, y_train, look_back, T)
testX, testY = create_dataset(X_test, y_test, look_back, T)
# 转换为PyTorch的Tensor
trainX_tensor = torch.Tensor(trainX)
trainY_tensor = torch.Tensor(trainY)
testX_tensor = torch.Tensor(testX)
testY_tensor = torch.Tensor(testY)
# 创建数据加载器
train_dataset = PharmacyDataset(trainX_tensor, trainY_tensor)
test_dataset = PharmacyDataset(testX_tensor, testY_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
# 将数据加载器包装到设备加载器中
train_loader = DeviceDataLoader(train_loader, device)
test_loader = DeviceDataLoader(test_loader, device)
# 初始化KAN模型
model = KANForecaster(
input_features=num_features,
hidden_sizes=[64, 128, 64],
output_size=1,
grid_size=5,
spline_order=3,
dropout_rate=dropout_rate,
output_sequence_length=T
)
# 将模型移动到设备上
model = model.to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learn_rate)
# 训练模型
train_losses = []
test_losses = []
for epoch in range(epochs):
model.train()
epoch_loss = 0
for X_batch, y_batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}", leave=False):
# 前向传播
outputs = model(X_batch)
loss = criterion(outputs.squeeze(-1), y_batch)
# 反向传播和优化
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_loss += loss.item()
# 计算训练损失
train_loss = epoch_loss / len(train_loader)
train_losses.append(train_loss)
# 在测试集上评估
model.eval()
test_loss = 0
with torch.no_grad():
for X_batch, y_batch in test_loader:
outputs = model(X_batch)
loss = criterion(outputs.squeeze(-1), y_batch)
test_loss += loss.item()
test_loss = test_loss / len(test_loader)
test_losses.append(test_loss)
if (epoch + 1) % 10 == 0:
print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}")
# 绘制损失曲线
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='训练损失')
plt.plot(test_losses, label='测试损失')
plt.title(f'{product_name} - KAN模型训练和测试损失')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.savefig(f'{product_id}_kan_loss_curve.png')
# 生成预测
model.eval()
with torch.no_grad():
# 将测试数据移动到设备上
testX_device = to_device(testX_tensor, device)
y_pred_scaled = model(testX_device).squeeze(-1).cpu().numpy()
# 反归一化预测结果
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).reshape(y_pred_scaled.shape)
y_true = scaler_y.inverse_transform(testY.reshape(-1, 1)).reshape(testY.shape)
# 评估模型
metrics = evaluate_model(y_true.flatten(), y_pred.flatten())
print(f"\n{product_name} KAN模型评估指标:")
for metric, value in metrics.items():
print(f"{metric}: {value:.4f}")
# 绘制预测结果
plt.figure(figsize=(12, 6))
# 获取测试集的实际日期
test_dates = product_df['date'].iloc[train_size + look_back:train_size + look_back + len(y_true)].values
# 只绘制最后30天的预测
days_to_plot = min(30, len(y_true))
start_idx = max(0, len(y_true) - days_to_plot)
plt.plot(test_dates[start_idx:], y_true[start_idx:, 0], 'b-', label='实际销量')
plt.plot(test_dates[start_idx:], y_pred[start_idx:, 0], 'r--', label='KAN预测销量')
plt.title(f'{product_name} - KAN销量预测结果')
plt.xlabel('日期')
plt.ylabel('销量')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(f'{product_id}_kan_prediction.png')
# 保存预测结果到CSV
all_dates = []
num_samples = len(y_true)
for i in range(num_samples):
start_idx = train_size + i + look_back
dates = product_df['date'].iloc[start_idx : start_idx + T]
all_dates.extend(dates)
# 修正日期长度不匹配的问题
flat_y_true = y_true.flatten()
flat_y_pred = y_pred.flatten()
min_len = min(len(all_dates), len(flat_y_true))
results_df = pd.DataFrame({
'date': all_dates[:min_len],
'actual_sales': flat_y_true[:min_len],
'predicted_sales': flat_y_pred[:min_len]
})
results_df.to_csv(f'{product_id}_kan_prediction_results.csv', index=False)
print(f"\nKAN预测结果已保存到 {product_id}_kan_prediction_results.csv")
# 创建models目录和子目录
model_dir = 'models/kan'
os.makedirs(model_dir, exist_ok=True)
# 保存模型
model_path = os.path.join(model_dir, f'{product_id}_model.pt')
torch.save({
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'train_loss': train_losses,
'test_loss': test_losses,
'scaler_X': scaler_X,
'scaler_y': scaler_y,
'features': features,
'look_back': look_back,
'T': T,
'model_type': 'kan'
}, model_path)
print(f"模型已成功保存到 {model_path}")
# 保存日志文件
log_path = os.path.join(model_dir, f'{product_id}_log.json')
log_data = {
'product_id': product_id,
'product_name': product_name,
'model_type': 'kan',
'training_completed_at': datetime.now().isoformat(),
'epochs': epochs,
'metrics': metrics,
'file_path': model_path
}
with open(log_path, 'w', encoding='utf-8') as f:
json.dump(log_data, f, indent=4, ensure_ascii=False)
print(f"训练日志已保存到 {log_path}")
return model, metrics
# 使用Transformer模型训练的函数
def train_product_model_with_transformer(product_id, epochs=50):
# 读取生成的药店销售数据
df = pd.read_excel('pharmacy_sales.xlsx')
# 筛选特定产品数据
product_df = df[df['product_id'] == product_id].sort_values('date')
product_name = product_df['product_name'].iloc[0]
print(f"使用Transformer模型训练产品 '{product_name}' (ID: {product_id}) 的销售预测模型")
print(f"使用设备: {device}")
# 创建特征和目标变量
features = ['sales', 'price', 'weekday', 'month', 'is_holiday', 'is_weekend', 'is_promotion', 'temperature']
# 预处理数据
X = product_df[features].values
y = product_df[['sales']].values # 保持为二维数组
# 归一化数据
scaler_X = MinMaxScaler(feature_range=(0, 1))
scaler_y = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)
# 划分训练集和测试集80% 训练20% 测试)
train_size = int(len(X_scaled) * 0.8)
X_train, X_test = X_scaled[:train_size], X_scaled[train_size:]
y_train, y_test = y_scaled[:train_size], y_scaled[train_size:]
# 创建时间序列数据
trainX, trainY = create_dataset(X_train, y_train, look_back, T)
testX, testY = create_dataset(X_test, y_test, look_back, T)
# 转换为PyTorch的Tensor
trainX_tensor = torch.Tensor(trainX)
trainY_tensor = torch.Tensor(trainY)
testX_tensor = torch.Tensor(testX)
testY_tensor = torch.Tensor(testY)
# 创建数据加载器
train_dataset = PharmacyDataset(trainX_tensor, trainY_tensor)
test_dataset = PharmacyDataset(testX_tensor, testY_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
# 将数据加载器包装到设备加载器中
train_loader = DeviceDataLoader(train_loader, device)
test_loader = DeviceDataLoader(test_loader, device)
# 初始化Transformer模型
model = TimeSeriesTransformer(
num_features=num_features,
d_model=embed_dim,
nhead=num_heads,
num_encoder_layers=num_blocks,
dim_feedforward=dense_dim,
dropout=dropout_rate,
output_sequence_length=T
)
# 将模型移动到设备上
model = model.to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learn_rate)
# 训练模型
train_losses = []
test_losses = []
for epoch in range(epochs):
model.train()
epoch_loss = 0
for X_batch, y_batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}", leave=False):
# 前向传播
outputs = model(X_batch)
loss = criterion(outputs.squeeze(-1), y_batch)
# 反向传播和优化
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_loss += loss.item()
# 计算训练损失
train_loss = epoch_loss / len(train_loader)
train_losses.append(train_loss)
# 在测试集上评估
model.eval()
test_loss = 0
with torch.no_grad():
for X_batch, y_batch in test_loader:
outputs = model(X_batch)
loss = criterion(outputs.squeeze(-1), y_batch)
test_loss += loss.item()
test_loss = test_loss / len(test_loader)
test_losses.append(test_loss)
if (epoch + 1) % 10 == 0:
print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}")
# 绘制损失曲线
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='训练损失')
plt.plot(test_losses, label='测试损失')
plt.title(f'{product_name} - Transformer模型训练和测试损失')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.savefig(f'{product_id}_transformer_loss_curve.png')
# 生成预测
model.eval()
with torch.no_grad():
# 将测试数据移动到设备上
testX_device = to_device(testX_tensor, device)
y_pred_scaled = model(testX_device).squeeze(-1).cpu().numpy()
# 反归一化预测结果
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).reshape(y_pred_scaled.shape)
y_true = scaler_y.inverse_transform(testY.reshape(-1, 1)).reshape(testY.shape)
# 评估模型
metrics = evaluate_model(y_true.flatten(), y_pred.flatten())
print(f"\n{product_name} Transformer模型评估指标:")
for metric, value in metrics.items():
print(f"{metric}: {value:.4f}")
# 绘制预测结果
plt.figure(figsize=(12, 6))
# 获取测试集的实际日期
test_dates = product_df['date'].iloc[train_size + look_back:train_size + look_back + len(y_true)].values
# 只绘制最后30天的预测
days_to_plot = min(30, len(y_true))
start_idx = max(0, len(y_true) - days_to_plot)
plt.plot(test_dates[start_idx:], y_true[start_idx:, 0], 'b-', label='实际销量')
plt.plot(test_dates[start_idx:], y_pred[start_idx:, 0], 'r--', label='Transformer预测销量')
plt.title(f'{product_name} - Transformer销量预测结果')
plt.xlabel('日期')
plt.ylabel('销量')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
# 强制重新绘制图表
fig.canvas.draw()
# 将预测起始日期和预测时长添加到文件名中
start_date_str = start_date_obj.strftime('%Y%m%d')
# 保存和显示图表
forecast_chart = f'{product_id}_transformer_forecast_{start_date_str}_days{T}.png'
plt.savefig(forecast_chart)
print(f"预测图表已保存为: {forecast_chart}")
# 关闭图表以释放内存
plt.close()
# 创建预测日期范围
last_date = product_df['date'].iloc[-1]
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=T, freq='D')
# 创建预测结果DataFrame - 确保长度一致
# 使用测试集的最后T个预测作为未来预测
future_predictions = y_pred[-1, :T].flatten() # 取最后一个样本的预测序列
# 确保长度匹配
if len(future_dates) != len(future_predictions):
# 如果长度不匹配调整future_predictions的长度
future_predictions = future_predictions[:len(future_dates)]
predictions_df = pd.DataFrame({
'date': future_dates,
'product_id': product_id,
'product_name': product_name,
'predicted_sales': future_predictions
})
print(f"\n{product_name} 未来 {T} 天销售预测 (使用Transformer模型):")
print(predictions_df[['date', 'predicted_sales']])
# 创建预测结果目录
output_dir = f'predictions/transformer/{product_id}'
os.makedirs(output_dir, exist_ok=True)
# 可视化预测结果
try:
# 1. 创建预测图表
forecast_fig, forecast_ax = plt.subplots(figsize=(12, 6))
# 显示历史数据和预测数据
history_days = 30 # 显示最近30天的历史数据
# 只选择预测起始日期之前30天的历史数据而不是全部历史数据
history_end_date = start_date_obj - pd.Timedelta(days=1) # 预测起始日期的前一天
history_start_date = history_end_date - pd.Timedelta(days=history_days) # 向前推30天
# 过滤历史数据,只保留这个日期范围内的数据
history_df = product_df[(product_df['date'] >= history_start_date) &
(product_df['date'] <= history_end_date)][['date', 'sales']].copy()
if history_df.empty:
print(f"警告: 在日期范围 {history_start_date}{history_end_date} 内没有历史数据")
# 如果没有符合条件的历史数据,就使用最近的数据
history_df = product_df.iloc[-min(history_days, len(product_df)):][['date', 'sales']].copy()
print(f"历史数据日期范围: {history_df['date'].min()}{history_df['date'].max()}")
print(f"预测数据日期范围: {future_dates.min()}{future_dates.max()}")
print(f"预测起始日期: {start_date_obj.strftime('%Y-%m-%d')}")
# 绘制历史数据
forecast_ax.plot(history_df['date'].values, history_df['sales'].values, 'b-', label='历史销量')
# 绘制预测数据确保使用future_dates作为x轴
forecast_ax.plot(future_dates, future_predictions, 'r--', label=f'{model_type}预测销量')
# 强制X轴从预测起始日期的前30天开始如果有历史数据到预测结束日期
date_min = start_date_obj - pd.Timedelta(days=30)
date_max = future_dates.max() + pd.Timedelta(days=2)
print(f"设置X轴范围: {date_min}{date_max}")
forecast_ax.set_xlim(date_min, date_max)
forecast_ax.set_title(f'{product_name} - {model_type}销量预测 (从{start_date_obj.strftime("%Y-%m-%d")}开始,预测{T}天)')
forecast_ax.set_xlabel('日期')
forecast_ax.set_ylabel('销量')
forecast_ax.legend()
forecast_ax.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
# 强制重新绘制图表
forecast_fig.canvas.draw()
# 将预测起始日期和预测时长添加到文件名中
start_date_str = start_date_obj.strftime('%Y%m%d')
# 保存预测图表
forecast_chart = f'{output_dir}/forecast_{start_date_str}_days{T}.png'
plt.savefig(forecast_chart)
print(f"预测图表已保存为: {forecast_chart}")
# 关闭图表以释放内存
plt.close(forecast_fig)
# 2. 创建历史趋势图表
try:
print("\n开始生成历史趋势图...")
history_fig, history_ax = plt.subplots(figsize=(12, 6))
# 获取预测起始日期
current_year = start_date_obj.year
current_month = start_date_obj.month
current_day = start_date_obj.day
print(f"预测起始日期: {start_date_obj}")
# 计算同期日期范围前3天和后3天共7天
days_before = 3
days_after = 3
date_range_start = start_date_obj - pd.Timedelta(days=days_before)
date_range_end = start_date_obj + pd.Timedelta(days=days_after)
# 计算去年同期日期范围
last_year_start = date_range_start.replace(year=date_range_start.year-1)
last_year_end = date_range_end.replace(year=date_range_end.year-1)
# 计算上月同期日期范围
if date_range_start.month > 1:
last_month_start = date_range_start.replace(month=date_range_start.month-1)
last_month_end = date_range_end.replace(month=date_range_end.month-1)
else:
# 如果是1月则转到上一年的12月
last_month_start = date_range_start.replace(year=date_range_start.year-1, month=12)
last_month_end = date_range_end.replace(year=date_range_end.year-1, month=12)
print(f"当前日期范围: {date_range_start}{date_range_end}")
print(f"去年同期范围: {last_year_start}{last_year_end}")
print(f"上月同期范围: {last_month_start}{last_month_end}")
# 查找对应日期范围的数据
current_period_data = product_df[
(product_df['date'] >= date_range_start) &
(product_df['date'] <= date_range_end)
]
print(f"当前期间数据点数: {len(current_period_data)}")
last_year_period_data = product_df[
(product_df['date'] >= last_year_start) &
(product_df['date'] <= last_year_end)
]
print(f"去年同期数据点数: {len(last_year_period_data)}")
last_month_period_data = product_df[
(product_df['date'] >= last_month_start) &
(product_df['date'] <= last_month_end)
]
print(f"上月同期数据点数: {len(last_month_period_data)}")
# 绘制曲线图
has_data = False
if not current_period_data.empty:
has_data = True
# 确保日期升序排序
current_period_data = current_period_data.sort_values('date')
# 生成相对天数以date_range_start为基准
current_period_data['day_offset'] = (current_period_data['date'] - date_range_start).dt.days
print(f"当前期间日期: {current_period_data['date'].tolist()}")
print(f"当前期间相对天数: {current_period_data['day_offset'].tolist()}")
print(f"当前期间销量: {current_period_data['sales'].tolist()}")
history_ax.plot(
current_period_data['day_offset'],
current_period_data['sales'],
'r-',
marker='o',
linewidth=2,
label=f"当前期间 ({date_range_start.strftime('%Y-%m-%d')}{date_range_end.strftime('%Y-%m-%d')})"
)
# 标记预测起始日期
current_center_point = current_period_data[current_period_data['date'] == start_date_obj]
if not current_center_point.empty:
history_ax.scatter(
current_center_point['day_offset'],
current_center_point['sales'],
color='red',
s=100,
marker='*',
zorder=10,
label=f"预测起始日 ({start_date_obj.strftime('%Y-%m-%d')})"
)
if not last_year_period_data.empty:
has_data = True
# 确保日期升序排序
last_year_period_data = last_year_period_data.sort_values('date')
last_year_period_data['day_offset'] = (last_year_period_data['date'] - last_year_start).dt.days
print(f"去年同期日期: {last_year_period_data['date'].tolist()}")
print(f"去年同期相对天数: {last_year_period_data['day_offset'].tolist()}")
print(f"去年同期销量: {last_year_period_data['sales'].tolist()}")
history_ax.plot(
last_year_period_data['day_offset'],
last_year_period_data['sales'],
'b-',
marker='s',
linewidth=2,
label=f"去年同期 ({last_year_start.strftime('%Y-%m-%d')}{last_year_end.strftime('%Y-%m-%d')})"
)
if not last_month_period_data.empty:
has_data = True
# 确保日期升序排序
last_month_period_data = last_month_period_data.sort_values('date')
last_month_period_data['day_offset'] = (last_month_period_data['date'] - last_month_start).dt.days
print(f"上月同期日期: {last_month_period_data['date'].tolist()}")
print(f"上月同期相对天数: {last_month_period_data['day_offset'].tolist()}")
print(f"上月同期销量: {last_month_period_data['sales'].tolist()}")
history_ax.plot(
last_month_period_data['day_offset'],
last_month_period_data['sales'],
'g-',
marker='^',
linewidth=2,
label=f"上月同期 ({last_month_start.strftime('%Y-%m-%d')}{last_month_end.strftime('%Y-%m-%d')})"
)
# 设置X轴标签为相对天数
days_labels = list(range(7))
days_offsets = list(range(7))
day_names = [(date_range_start + pd.Timedelta(days=d)).strftime('%m-%d') for d in range(7)]
history_ax.set_xticks(days_offsets)
history_ax.set_xticklabels(day_names)
# 添加垂直参考线标记预测起始日
history_ax.axvline(x=days_before, color='gray', linestyle='--', alpha=0.5)
# 美化图表
history_ax.set_title(f'{product_name} - 同期销量趋势对比 (7天)')
history_ax.set_xlabel('日期')
history_ax.set_ylabel('销量')
history_ax.grid(True, linestyle='--', alpha=0.7)
history_ax.legend(loc='best')
# 如果所有数据集都为空,显示提示
if not has_data:
history_ax.text(0.5, 0.5, '没有找到可比较的同期数据',
horizontalalignment='center', verticalalignment='center',
transform=history_ax.transAxes, fontsize=14)
plt.tight_layout()
# 强制重新绘制图表
history_fig.canvas.draw()
# 保存历史趋势图表
history_chart = f'{output_dir}/history_{start_date_str}.png'
plt.savefig(history_chart)
print(f"历史趋势图表已保存为: {history_chart}")
# 关闭图表以释放内存
plt.close(history_fig)
except Exception as e:
import traceback
print(f"生成历史趋势图时出错: {e}")
traceback.print_exc()
history_chart = None
# 创建一个包含历史和预测数据的完整DataFrame供CSV导出和API返回
history_df['data_type'] = '历史销量'
predictions_df = pd.DataFrame({
'date': future_dates,
'sales': y_pred,
'data_type': '预测销量',
'product_id': product_id,
'product_name': product_name
})
# 合并历史和预测数据
complete_df = pd.concat([
history_df[['date', 'sales', 'data_type']].assign(product_id=product_id, product_name=product_name),
predictions_df
]).sort_values('date')
except Exception as e:
import traceback
print(f"生成预测图表时出错: {e}")
traceback.print_exc()
forecast_chart = None
history_chart = None
# 出错时仍然创建预测数据
predictions_df = pd.DataFrame({
'date': future_dates,
'sales': y_pred,
'data_type': '预测销量',
'product_id': product_id,
'product_name': product_name
})
complete_df = predictions_df
# 保存预测结果到CSV
try:
forecast_csv = f'{output_dir}/forecast_{start_date_str}_days{T}.csv'
complete_df.to_csv(forecast_csv, index=False)
print(f"预测结果已保存到: {forecast_csv}")
except Exception as e:
print(f"保存CSV文件时出错: {e}")
forecast_csv = None
# 返回文件路径信息和预测数据
result = {
'predictions_df': complete_df, # 返回包含历史数据的完整DataFrame
'chart_path': forecast_chart,
'history_chart_path': history_chart,
'csv_path': forecast_csv
}
# 保存模型
model_dir = f'models/transformer'
os.makedirs(model_dir, exist_ok=True)
model_path = f'{model_dir}/{product_id}_model.pt'
# 保存模型和相关数据
checkpoint = {
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'scaler_X': scaler_X,
'scaler_y': scaler_y,
'metrics': metrics,
'epochs': epochs,
'look_back': look_back,
'T': T,
'features': features
}
torch.save(checkpoint, model_path)
print(f"模型已保存到 {model_path}")
# 保存训练日志
log_path = f'{model_dir}/{product_id}_log.json'
log_data = {
'product_id': product_id,
'product_name': product_name,
'model_type': 'transformer',
'training_completed_at': datetime.now().isoformat(),
'epochs': epochs,
'metrics': metrics,
'file_path': model_path
}
with open(log_path, 'w', encoding='utf-8') as f:
json.dump(log_data, f, indent=4, ensure_ascii=False)
print(f"训练日志已保存到 {log_path}")
return model, metrics
# 加载模型并进行预测的函数
def load_model_and_predict(product_id, model_type, future_days=7, start_date=None):
"""
加载指定类型的模型并进行未来销量预测
Args:
product_id: 产品ID
model_type: 模型类型,可选 'mlstm', 'kan', 'transformer'
future_days: 预测未来天数默认7天
start_date: 预测起始日期,格式为'YYYY-MM-DD'默认为None表示使用数据集最后日期的下一天
"""
print("\n" + "="*80)
print(f"加载模型并预测 - 详细调试信息:")
print(f"产品ID: {product_id}, 模型类型: {model_type}, 预测天数: {future_days}, 预测起始日期: {start_date}")
print("="*80 + "\n")
model_path = f'models/{model_type}/{product_id}_model.pt'
if not os.path.exists(model_path):
print(f"错误: 未找到产品 {product_id}{model_type} 模型文件")
return None
# 获取设备
device = get_device()
print(f"使用设备: {device} 进行预测")
# 加载模型和相关数据
checkpoint = torch.load(model_path, map_location=device, weights_only=False)
# 读取原始数据以获取最新的记录
df = pd.read_excel('pharmacy_sales.xlsx')
product_df = df[df['product_id'] == product_id].sort_values('date')
product_name = product_df['product_name'].iloc[0]
# 获取最近的look_back天数据
features = checkpoint['features']
look_back = checkpoint['look_back']
T = checkpoint['T']
scaler_X = checkpoint['scaler_X']
scaler_y = checkpoint['scaler_y']
last_data = product_df[features].values[-look_back:]
last_data_scaled = scaler_X.transform(last_data)
# 创建模型并加载参数
if model_type == 'mlstm':
ModelClass = MLSTMTransformer
model_params = {
'num_features': len(features),
'hidden_size': 128,
'mlstm_layers': 1,
'embed_dim': embed_dim,
'dense_dim': dense_dim,
'num_heads': num_heads,
'dropout_rate': dropout_rate,
'num_blocks': num_blocks,
'output_sequence_length': T
}
elif model_type == 'kan':
ModelClass = KANForecaster
model_params = {
'input_features': len(features),
'hidden_sizes': [64, 128, 64],
'output_size': 1,
'grid_size': 5,
'spline_order': 3,
'dropout_rate': dropout_rate,
'output_sequence_length': T
}
elif model_type == 'transformer':
ModelClass = TimeSeriesTransformer
model_params = {
'num_features': len(features),
'd_model': embed_dim,
'nhead': num_heads,
'num_encoder_layers': num_blocks,
'dim_feedforward': dense_dim,
'dropout': dropout_rate,
'output_sequence_length': T
}
else:
print(f"错误: 不支持的模型类型 '{model_type}'")
return None
model = ModelClass(**model_params)
model.load_state_dict(checkpoint['model_state_dict'])
# 将模型移动到设备上
model = model.to(device)
model.eval()
# 准备输入数据
X_input = torch.Tensor(last_data_scaled).unsqueeze(0) # 添加批次维度
X_input = X_input.to(device) # 移动到设备上
# 进行预测
with torch.no_grad():
# 获取模型默认预测长度
default_pred_length = T
print(f"模型默认预测长度: {default_pred_length}")
# 使用模型进行预测 - 如果请求的预测天数小于模型默认值,截断结果
if future_days <= default_pred_length:
print(f"请求的预测天数 {future_days} 小于等于模型默认值 {default_pred_length},截取需要的部分")
y_pred_scaled = model(X_input).squeeze(0).cpu().numpy()[:future_days]
else:
# 如果请求的预测天数大于模型默认值,需要多次预测并拼接结果
print(f"请求的预测天数 {future_days} 大于模型默认值 {default_pred_length},需要多次预测")
y_pred_scaled = model(X_input).squeeze(0).cpu().numpy()
# 只取默认预测长度的结果
y_pred_scaled = y_pred_scaled[:min(future_days, default_pred_length)]
# 反归一化预测结果
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
# 创建预测日期范围
last_date = product_df['date'].iloc[-1]
if start_date:
try:
# 使用用户指定的日期作为预测起点
start_date_obj = pd.Timestamp(start_date)
print(f"成功解析用户指定的预测起始日期: {start_date_obj.strftime('%Y-%m-%d')}")
except Exception as e:
# 如果日期格式无效,使用当前日期
start_date_obj = pd.Timestamp.now().normalize()
print(f"日期解析错误: {e}")
print(f"使用当前日期 {start_date_obj.strftime('%Y-%m-%d')} 作为预测起点")
else:
# 如果未指定日期,使用数据集最后日期的下一天
start_date_obj = last_date + pd.Timedelta(days=1)
print(f"未指定起始日期,使用数据集最后日期 {last_date.strftime('%Y-%m-%d')} 的下一天作为预测起点: {start_date_obj.strftime('%Y-%m-%d')}")
future_dates = pd.date_range(start=start_date_obj, periods=future_days, freq='D')
print(f"生成预测日期范围: {future_dates[0]}{future_dates[-1]}, 共 {len(future_dates)}")
# 创建预测结果目录
output_dir = f'predictions/{model_type}/{product_id}'
os.makedirs(output_dir, exist_ok=True)
# 可视化预测结果
try:
# 1. 创建预测图表
forecast_fig, forecast_ax = plt.subplots(figsize=(12, 6))
# 显示历史数据和预测数据
history_days = 30 # 显示最近30天的历史数据
# 只选择预测起始日期之前30天的历史数据而不是全部历史数据
history_end_date = start_date_obj - pd.Timedelta(days=1) # 预测起始日期的前一天
history_start_date = history_end_date - pd.Timedelta(days=history_days) # 向前推30天
# 过滤历史数据,只保留这个日期范围内的数据
history_df = product_df[(product_df['date'] >= history_start_date) &
(product_df['date'] <= history_end_date)][['date', 'sales']].copy()
if history_df.empty:
print(f"警告: 在日期范围 {history_start_date}{history_end_date} 内没有历史数据")
# 如果没有符合条件的历史数据,就使用最近的数据
history_df = product_df.iloc[-min(history_days, len(product_df)):][['date', 'sales']].copy()
print(f"历史数据日期范围: {history_df['date'].min()}{history_df['date'].max()}")
print(f"预测数据日期范围: {future_dates.min()}{future_dates.max()}")
print(f"预测起始日期: {start_date_obj.strftime('%Y-%m-%d')}")
# 绘制历史数据
forecast_ax.plot(history_df['date'].values, history_df['sales'].values, 'b-', label='历史销量')
# 绘制预测数据确保使用future_dates作为x轴
forecast_ax.plot(future_dates, y_pred, 'r--', label=f'{model_type}预测销量')
# 强制X轴从预测起始日期的前30天开始如果有历史数据到预测结束日期
date_min = start_date_obj - pd.Timedelta(days=30)
date_max = future_dates.max() + pd.Timedelta(days=2)
print(f"设置X轴范围: {date_min}{date_max}")
forecast_ax.set_xlim(date_min, date_max)
forecast_ax.set_title(f'{product_name} - {model_type}销量预测 (从{start_date_obj.strftime("%Y-%m-%d")}开始,预测{future_days}天)')
forecast_ax.set_xlabel('日期')
forecast_ax.set_ylabel('销量')
forecast_ax.legend()
forecast_ax.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
# 强制重新绘制图表
forecast_fig.canvas.draw()
# 将预测起始日期和预测时长添加到文件名中
start_date_str = start_date_obj.strftime('%Y%m%d')
# 保存预测图表
forecast_chart = f'{output_dir}/forecast_{start_date_str}_days{future_days}.png'
plt.savefig(forecast_chart)
print(f"预测图表已保存为: {forecast_chart}")
# 关闭图表以释放内存
plt.close(forecast_fig)
# 2. 创建历史趋势图表
try:
print("\n开始生成历史趋势图...")
history_fig, history_ax = plt.subplots(figsize=(12, 6))
# 获取预测起始日期
current_year = start_date_obj.year
current_month = start_date_obj.month
current_day = start_date_obj.day
print(f"预测起始日期: {start_date_obj}")
# 计算同期日期范围前3天和后3天共7天
days_before = 3
days_after = 3
date_range_start = start_date_obj - pd.Timedelta(days=days_before)
date_range_end = start_date_obj + pd.Timedelta(days=days_after)
# 计算去年同期日期范围
last_year_start = date_range_start.replace(year=date_range_start.year-1)
last_year_end = date_range_end.replace(year=date_range_end.year-1)
# 计算上月同期日期范围
if date_range_start.month > 1:
last_month_start = date_range_start.replace(month=date_range_start.month-1)
last_month_end = date_range_end.replace(month=date_range_end.month-1)
else:
# 如果是1月则转到上一年的12月
last_month_start = date_range_start.replace(year=date_range_start.year-1, month=12)
last_month_end = date_range_end.replace(year=date_range_end.year-1, month=12)
print(f"当前日期范围: {date_range_start}{date_range_end}")
print(f"去年同期范围: {last_year_start}{last_year_end}")
print(f"上月同期范围: {last_month_start}{last_month_end}")
# 查找对应日期范围的数据
current_period_data = product_df[
(product_df['date'] >= date_range_start) &
(product_df['date'] <= date_range_end)
]
print(f"当前期间数据点数: {len(current_period_data)}")
last_year_period_data = product_df[
(product_df['date'] >= last_year_start) &
(product_df['date'] <= last_year_end)
]
print(f"去年同期数据点数: {len(last_year_period_data)}")
last_month_period_data = product_df[
(product_df['date'] >= last_month_start) &
(product_df['date'] <= last_month_end)
]
print(f"上月同期数据点数: {len(last_month_period_data)}")
# 绘制曲线图
has_data = False
if not current_period_data.empty:
has_data = True
# 确保日期升序排序
current_period_data = current_period_data.sort_values('date')
# 生成相对天数以date_range_start为基准
current_period_data['day_offset'] = (current_period_data['date'] - date_range_start).dt.days
print(f"当前期间日期: {current_period_data['date'].tolist()}")
print(f"当前期间相对天数: {current_period_data['day_offset'].tolist()}")
print(f"当前期间销量: {current_period_data['sales'].tolist()}")
history_ax.plot(
current_period_data['day_offset'],
current_period_data['sales'],
'r-',
marker='o',
linewidth=2,
label=f"当前期间 ({date_range_start.strftime('%Y-%m-%d')}{date_range_end.strftime('%Y-%m-%d')})"
)
# 标记预测起始日期
current_center_point = current_period_data[current_period_data['date'] == start_date_obj]
if not current_center_point.empty:
history_ax.scatter(
current_center_point['day_offset'],
current_center_point['sales'],
color='red',
s=100,
marker='*',
zorder=10,
label=f"预测起始日 ({start_date_obj.strftime('%Y-%m-%d')})"
)
if not last_year_period_data.empty:
has_data = True
# 确保日期升序排序
last_year_period_data = last_year_period_data.sort_values('date')
last_year_period_data['day_offset'] = (last_year_period_data['date'] - last_year_start).dt.days
print(f"去年同期日期: {last_year_period_data['date'].tolist()}")
print(f"去年同期相对天数: {last_year_period_data['day_offset'].tolist()}")
print(f"去年同期销量: {last_year_period_data['sales'].tolist()}")
history_ax.plot(
last_year_period_data['day_offset'],
last_year_period_data['sales'],
'b-',
marker='s',
linewidth=2,
label=f"去年同期 ({last_year_start.strftime('%Y-%m-%d')}{last_year_end.strftime('%Y-%m-%d')})"
)
if not last_month_period_data.empty:
has_data = True
# 确保日期升序排序
last_month_period_data = last_month_period_data.sort_values('date')
last_month_period_data['day_offset'] = (last_month_period_data['date'] - last_month_start).dt.days
print(f"上月同期日期: {last_month_period_data['date'].tolist()}")
print(f"上月同期相对天数: {last_month_period_data['day_offset'].tolist()}")
print(f"上月同期销量: {last_month_period_data['sales'].tolist()}")
history_ax.plot(
last_month_period_data['day_offset'],
last_month_period_data['sales'],
'g-',
marker='^',
linewidth=2,
label=f"上月同期 ({last_month_start.strftime('%Y-%m-%d')}{last_month_end.strftime('%Y-%m-%d')})"
)
# 设置X轴标签为相对天数
days_labels = list(range(7))
days_offsets = list(range(7))
day_names = [(date_range_start + pd.Timedelta(days=d)).strftime('%m-%d') for d in range(7)]
history_ax.set_xticks(days_offsets)
history_ax.set_xticklabels(day_names)
# 添加垂直参考线标记预测起始日
history_ax.axvline(x=days_before, color='gray', linestyle='--', alpha=0.5)
# 美化图表
history_ax.set_title(f'{product_name} - 同期销量趋势对比 (7天)')
history_ax.set_xlabel('日期')
history_ax.set_ylabel('销量')
history_ax.grid(True, linestyle='--', alpha=0.7)
history_ax.legend(loc='best')
# 如果所有数据集都为空,显示提示
if not has_data:
history_ax.text(0.5, 0.5, '没有找到可比较的同期数据',
horizontalalignment='center', verticalalignment='center',
transform=history_ax.transAxes, fontsize=14)
plt.tight_layout()
# 强制重新绘制图表
history_fig.canvas.draw()
# 保存历史趋势图表
history_chart = f'{output_dir}/history_{start_date_str}.png'
plt.savefig(history_chart)
print(f"历史趋势图表已保存为: {history_chart}")
# 关闭图表以释放内存
plt.close(history_fig)
except Exception as e:
import traceback
print(f"生成历史趋势图时出错: {e}")
traceback.print_exc()
history_chart = None
# 创建一个包含历史和预测数据的完整DataFrame供CSV导出和API返回
history_df['data_type'] = '历史销量'
predictions_df = pd.DataFrame({
'date': future_dates,
'sales': y_pred,
'data_type': '预测销量',
'product_id': product_id,
'product_name': product_name
})
# 合并历史和预测数据
complete_df = pd.concat([
history_df[['date', 'sales', 'data_type']].assign(product_id=product_id, product_name=product_name),
predictions_df
]).sort_values('date')
except Exception as e:
import traceback
print(f"生成预测图表时出错: {e}")
traceback.print_exc()
forecast_chart = None
history_chart = None
# 出错时仍然创建预测数据
predictions_df = pd.DataFrame({
'date': future_dates,
'sales': y_pred,
'data_type': '预测销量',
'product_id': product_id,
'product_name': product_name
})
complete_df = predictions_df
# 保存预测结果到CSV
try:
forecast_csv = f'{output_dir}/forecast_{start_date_str}_days{future_days}.csv'
complete_df.to_csv(forecast_csv, index=False)
print(f"预测结果已保存到: {forecast_csv}")
except Exception as e:
print(f"保存CSV文件时出错: {e}")
forecast_csv = None
# 返回文件路径信息和预测数据
result = {
'predictions_df': complete_df, # 返回包含历史数据的完整DataFrame
'chart_path': forecast_chart,
'history_chart_path': history_chart,
'csv_path': forecast_csv
}
return result
# 特定加载KAN模型并预测的函数
def load_kan_model_and_predict(product_id, future_days=7):
"""
加载KAN模型并进行未来销量预测是load_model_and_predict的简化版本固定模型类型为'kan'
Args:
product_id: 产品ID
future_days: 预测未来天数默认7天
Returns:
预测结果字典
"""
return load_model_and_predict(product_id, model_type='kan', future_days=future_days)
if __name__ == "__main__":
# 首先生成测试数据
try:
print("正在检查是否存在模拟数据...")
df = pd.read_excel('pharmacy_sales.xlsx')
print("发现现有数据,跳过数据生成步骤。")
except:
print("未找到数据,正在生成模拟数据...")
import generate_pharmacy_data
print("数据生成完成!")
# 读取数据获取所有产品ID
df = pd.read_excel('pharmacy_sales.xlsx')
product_ids = df['product_id'].unique()
# 为每个产品训练一个模型
all_metrics = {}
for product_id in product_ids:
print(f"\n{'='*50}")
print(f"开始训练产品 {product_id} 的模型")
print(f"{'='*50}")
_, metrics = train_product_model(product_id, epochs=epochs)
all_metrics[product_id] = metrics
# 输出所有产品的评估指标
print("\n所有产品模型评估结果汇总:")
for product_id, metrics in all_metrics.items():
product_name = df[df['product_id'] == product_id]['product_name'].iloc[0]
print(f"\n{product_name} (ID: {product_id}):")
for metric, value in metrics.items():
print(f" {metric}: {value:.4f}")
print("\n模型训练和评估完成!")