ShopTRAINING/server/analysis/explanation.py

136 lines
5.3 KiB
Python
Raw Normal View History

"""
药店销售预测系统 - 预测解释函数
"""
import numpy as np
from sklearn.linear_model import LinearRegression
def analyze_influencing_factors(product_id, model_type, predictions, features):
"""
分析影响预测结果的因素
参数:
product_id: 产品ID
model_type: 模型类型
predictions: 预测结果
features: 特征数据包含各个特征的值
返回:
影响因素分析结果字典
"""
# 如果特征数据为空,则返回空结果
if features is None or len(features) == 0:
return {}
# 确保特征数据和预测结果长度匹配
if len(features) != len(predictions):
# 如果不匹配,可能是因为预测结果是未来几天的,而特征是历史的
# 这种情况下,我们只能分析历史数据中的特征与销量的关系
features = features[-len(predictions):] if len(features) > len(predictions) else features
predictions = predictions[:len(features)]
# 将特征数据转换为numpy数组
X = np.array(features)
y = np.array(predictions)
# 使用线性回归分析特征重要性
model = LinearRegression()
model.fit(X, y)
# 获取特征重要性(系数的绝对值)
importance = np.abs(model.coef_)
# 归一化特征重要性
total_importance = np.sum(importance)
if total_importance > 0:
normalized_importance = importance / total_importance
else:
normalized_importance = np.zeros_like(importance)
# 创建特征重要性字典
feature_names = ['sales', 'price', 'weekday', 'month', 'is_holiday', 'is_weekend', 'is_promotion', 'temperature']
importance_dict = {feature_names[i]: float(normalized_importance[i]) for i in range(min(len(feature_names), len(normalized_importance)))}
# 按重要性排序
sorted_importance = sorted(importance_dict.items(), key=lambda x: x[1], reverse=True)
# 找出最重要的特征重要性大于10%的特征)
key_factors = [factor for factor, imp in sorted_importance if imp > 0.1]
return {
'feature_importance': importance_dict,
'key_factors': key_factors,
'sorted_importance': sorted_importance
}
def generate_prediction_explanation(analysis, product_id, model_type):
"""
根据分析结果生成预测解释文本
参数:
analysis: 分析结果字典
product_id: 产品ID
model_type: 模型类型
返回:
解释文本
"""
explanation = f"产品 {product_id}{model_type} 模型预测分析:\n\n"
# 添加趋势分析
if 'trend' in analysis:
trend = analysis['trend']
explanation += f"1. 销量趋势: {trend['trend_type']}\n"
explanation += f" - 趋势斜率: {trend['slope']:.4f}\n"
explanation += f" - 确定系数 (R²): {trend['r_squared']:.4f}\n"
explanation += f" - 波动程度: {trend['volatility_level']} (波动率: {trend['volatility']:.2f})\n\n"
# 添加统计数据
if 'statistics' in analysis:
stats = analysis['statistics']
explanation += "2. 销量统计:\n"
explanation += f" - 平均销量: {stats['mean']:.2f}\n"
explanation += f" - 中位数销量: {stats['median']:.2f}\n"
explanation += f" - 最低销量: {stats['min']:.2f}\n"
explanation += f" - 最高销量: {stats['max']:.2f}\n"
explanation += f" - 标准差: {stats['std']:.2f}\n\n"
# 添加日环比变化
if 'day_over_day' in analysis and analysis['day_over_day']:
dod = analysis['day_over_day']
avg_change = sum(dod) / len(dod)
explanation += "3. 日环比变化:\n"
explanation += f" - 平均日环比变化: {avg_change:.2f}%\n"
explanation += f" - 最大单日增长: {max(dod):.2f}%\n"
explanation += f" - 最大单日下降: {min(dod):.2f}%\n\n"
# 添加影响因素分析
if 'influencing_factors' in analysis:
factors = analysis['influencing_factors']
if factors and 'key_factors' in factors and factors['key_factors']:
explanation += "4. 主要影响因素:\n"
for i, factor in enumerate(factors['key_factors']):
importance = factors['feature_importance'][factor]
explanation += f" - {factor}: {importance*100:.2f}%\n"
explanation += "\n"
# 添加总结
if 'trend' in analysis:
trend_type = analysis['trend']['trend_type']
if trend_type == "上升":
explanation += "总结: 预测显示销量呈上升趋势,"
elif trend_type == "下降":
explanation += "总结: 预测显示销量呈下降趋势,"
else:
explanation += "总结: 预测显示销量相对平稳,"
if 'influencing_factors' in analysis and analysis['influencing_factors'] and 'key_factors' in analysis['influencing_factors']:
key_factors = analysis['influencing_factors']['key_factors']
if key_factors:
explanation += f"主要受 {', '.join(key_factors)} 因素影响。"
else:
explanation += "没有明显的单一影响因素。"
else:
explanation += "影响因素分析不可用。"
return explanation