ShopTRAINING/server/analysis/explanation.py

136 lines
5.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
药店销售预测系统 - 预测解释函数
"""
import numpy as np
from sklearn.linear_model import LinearRegression
def analyze_influencing_factors(product_id, model_type, predictions, features):
"""
分析影响预测结果的因素
参数:
product_id: 产品ID
model_type: 模型类型
predictions: 预测结果
features: 特征数据,包含各个特征的值
返回:
影响因素分析结果字典
"""
# 如果特征数据为空,则返回空结果
if features is None or len(features) == 0:
return {}
# 确保特征数据和预测结果长度匹配
if len(features) != len(predictions):
# 如果不匹配,可能是因为预测结果是未来几天的,而特征是历史的
# 这种情况下,我们只能分析历史数据中的特征与销量的关系
features = features[-len(predictions):] if len(features) > len(predictions) else features
predictions = predictions[:len(features)]
# 将特征数据转换为numpy数组
X = np.array(features)
y = np.array(predictions)
# 使用线性回归分析特征重要性
model = LinearRegression()
model.fit(X, y)
# 获取特征重要性(系数的绝对值)
importance = np.abs(model.coef_)
# 归一化特征重要性
total_importance = np.sum(importance)
if total_importance > 0:
normalized_importance = importance / total_importance
else:
normalized_importance = np.zeros_like(importance)
# 创建特征重要性字典
feature_names = ['sales', 'price', 'weekday', 'month', 'is_holiday', 'is_weekend', 'is_promotion', 'temperature']
importance_dict = {feature_names[i]: float(normalized_importance[i]) for i in range(min(len(feature_names), len(normalized_importance)))}
# 按重要性排序
sorted_importance = sorted(importance_dict.items(), key=lambda x: x[1], reverse=True)
# 找出最重要的特征重要性大于10%的特征)
key_factors = [factor for factor, imp in sorted_importance if imp > 0.1]
return {
'feature_importance': importance_dict,
'key_factors': key_factors,
'sorted_importance': sorted_importance
}
def generate_prediction_explanation(analysis, product_id, model_type):
"""
根据分析结果生成预测解释文本
参数:
analysis: 分析结果字典
product_id: 产品ID
model_type: 模型类型
返回:
解释文本
"""
explanation = f"产品 {product_id}{model_type} 模型预测分析:\n\n"
# 添加趋势分析
if 'trend' in analysis:
trend = analysis['trend']
explanation += f"1. 销量趋势: {trend['trend_type']}\n"
explanation += f" - 趋势斜率: {trend['slope']:.4f}\n"
explanation += f" - 确定系数 (R²): {trend['r_squared']:.4f}\n"
explanation += f" - 波动程度: {trend['volatility_level']} (波动率: {trend['volatility']:.2f})\n\n"
# 添加统计数据
if 'statistics' in analysis:
stats = analysis['statistics']
explanation += "2. 销量统计:\n"
explanation += f" - 平均销量: {stats['mean']:.2f}\n"
explanation += f" - 中位数销量: {stats['median']:.2f}\n"
explanation += f" - 最低销量: {stats['min']:.2f}\n"
explanation += f" - 最高销量: {stats['max']:.2f}\n"
explanation += f" - 标准差: {stats['std']:.2f}\n\n"
# 添加日环比变化
if 'day_over_day' in analysis and analysis['day_over_day']:
dod = analysis['day_over_day']
avg_change = sum(dod) / len(dod)
explanation += "3. 日环比变化:\n"
explanation += f" - 平均日环比变化: {avg_change:.2f}%\n"
explanation += f" - 最大单日增长: {max(dod):.2f}%\n"
explanation += f" - 最大单日下降: {min(dod):.2f}%\n\n"
# 添加影响因素分析
if 'influencing_factors' in analysis:
factors = analysis['influencing_factors']
if factors and 'key_factors' in factors and factors['key_factors']:
explanation += "4. 主要影响因素:\n"
for i, factor in enumerate(factors['key_factors']):
importance = factors['feature_importance'][factor]
explanation += f" - {factor}: {importance*100:.2f}%\n"
explanation += "\n"
# 添加总结
if 'trend' in analysis:
trend_type = analysis['trend']['trend_type']
if trend_type == "上升":
explanation += "总结: 预测显示销量呈上升趋势,"
elif trend_type == "下降":
explanation += "总结: 预测显示销量呈下降趋势,"
else:
explanation += "总结: 预测显示销量相对平稳,"
if 'influencing_factors' in analysis and analysis['influencing_factors'] and 'key_factors' in analysis['influencing_factors']:
key_factors = analysis['influencing_factors']['key_factors']
if key_factors:
explanation += f"主要受 {', '.join(key_factors)} 因素影响。"
else:
explanation += "没有明显的单一影响因素。"
else:
explanation += "影响因素分析不可用。"
return explanation