136 lines
5.3 KiB
Python
136 lines
5.3 KiB
Python
"""
|
||
药店销售预测系统 - 预测解释函数
|
||
"""
|
||
|
||
import numpy as np
|
||
from sklearn.linear_model import LinearRegression
|
||
|
||
def analyze_influencing_factors(product_id, model_type, predictions, features):
|
||
"""
|
||
分析影响预测结果的因素
|
||
|
||
参数:
|
||
product_id: 产品ID
|
||
model_type: 模型类型
|
||
predictions: 预测结果
|
||
features: 特征数据,包含各个特征的值
|
||
|
||
返回:
|
||
影响因素分析结果字典
|
||
"""
|
||
# 如果特征数据为空,则返回空结果
|
||
if features is None or len(features) == 0:
|
||
return {}
|
||
|
||
# 确保特征数据和预测结果长度匹配
|
||
if len(features) != len(predictions):
|
||
# 如果不匹配,可能是因为预测结果是未来几天的,而特征是历史的
|
||
# 这种情况下,我们只能分析历史数据中的特征与销量的关系
|
||
features = features[-len(predictions):] if len(features) > len(predictions) else features
|
||
predictions = predictions[:len(features)]
|
||
|
||
# 将特征数据转换为numpy数组
|
||
X = np.array(features)
|
||
y = np.array(predictions)
|
||
|
||
# 使用线性回归分析特征重要性
|
||
model = LinearRegression()
|
||
model.fit(X, y)
|
||
|
||
# 获取特征重要性(系数的绝对值)
|
||
importance = np.abs(model.coef_)
|
||
|
||
# 归一化特征重要性
|
||
total_importance = np.sum(importance)
|
||
if total_importance > 0:
|
||
normalized_importance = importance / total_importance
|
||
else:
|
||
normalized_importance = np.zeros_like(importance)
|
||
|
||
# 创建特征重要性字典
|
||
feature_names = ['sales', 'price', 'weekday', 'month', 'is_holiday', 'is_weekend', 'is_promotion', 'temperature']
|
||
importance_dict = {feature_names[i]: float(normalized_importance[i]) for i in range(min(len(feature_names), len(normalized_importance)))}
|
||
|
||
# 按重要性排序
|
||
sorted_importance = sorted(importance_dict.items(), key=lambda x: x[1], reverse=True)
|
||
|
||
# 找出最重要的特征(重要性大于10%的特征)
|
||
key_factors = [factor for factor, imp in sorted_importance if imp > 0.1]
|
||
|
||
return {
|
||
'feature_importance': importance_dict,
|
||
'key_factors': key_factors,
|
||
'sorted_importance': sorted_importance
|
||
}
|
||
|
||
def generate_prediction_explanation(analysis, product_id, model_type):
|
||
"""
|
||
根据分析结果生成预测解释文本
|
||
|
||
参数:
|
||
analysis: 分析结果字典
|
||
product_id: 产品ID
|
||
model_type: 模型类型
|
||
|
||
返回:
|
||
解释文本
|
||
"""
|
||
explanation = f"产品 {product_id} 的 {model_type} 模型预测分析:\n\n"
|
||
|
||
# 添加趋势分析
|
||
if 'trend' in analysis:
|
||
trend = analysis['trend']
|
||
explanation += f"1. 销量趋势: {trend['trend_type']}\n"
|
||
explanation += f" - 趋势斜率: {trend['slope']:.4f}\n"
|
||
explanation += f" - 确定系数 (R²): {trend['r_squared']:.4f}\n"
|
||
explanation += f" - 波动程度: {trend['volatility_level']} (波动率: {trend['volatility']:.2f})\n\n"
|
||
|
||
# 添加统计数据
|
||
if 'statistics' in analysis:
|
||
stats = analysis['statistics']
|
||
explanation += "2. 销量统计:\n"
|
||
explanation += f" - 平均销量: {stats['mean']:.2f}\n"
|
||
explanation += f" - 中位数销量: {stats['median']:.2f}\n"
|
||
explanation += f" - 最低销量: {stats['min']:.2f}\n"
|
||
explanation += f" - 最高销量: {stats['max']:.2f}\n"
|
||
explanation += f" - 标准差: {stats['std']:.2f}\n\n"
|
||
|
||
# 添加日环比变化
|
||
if 'day_over_day' in analysis and analysis['day_over_day']:
|
||
dod = analysis['day_over_day']
|
||
avg_change = sum(dod) / len(dod)
|
||
explanation += "3. 日环比变化:\n"
|
||
explanation += f" - 平均日环比变化: {avg_change:.2f}%\n"
|
||
explanation += f" - 最大单日增长: {max(dod):.2f}%\n"
|
||
explanation += f" - 最大单日下降: {min(dod):.2f}%\n\n"
|
||
|
||
# 添加影响因素分析
|
||
if 'influencing_factors' in analysis:
|
||
factors = analysis['influencing_factors']
|
||
if factors and 'key_factors' in factors and factors['key_factors']:
|
||
explanation += "4. 主要影响因素:\n"
|
||
for i, factor in enumerate(factors['key_factors']):
|
||
importance = factors['feature_importance'][factor]
|
||
explanation += f" - {factor}: {importance*100:.2f}%\n"
|
||
explanation += "\n"
|
||
|
||
# 添加总结
|
||
if 'trend' in analysis:
|
||
trend_type = analysis['trend']['trend_type']
|
||
if trend_type == "上升":
|
||
explanation += "总结: 预测显示销量呈上升趋势,"
|
||
elif trend_type == "下降":
|
||
explanation += "总结: 预测显示销量呈下降趋势,"
|
||
else:
|
||
explanation += "总结: 预测显示销量相对平稳,"
|
||
|
||
if 'influencing_factors' in analysis and analysis['influencing_factors'] and 'key_factors' in analysis['influencing_factors']:
|
||
key_factors = analysis['influencing_factors']['key_factors']
|
||
if key_factors:
|
||
explanation += f"主要受 {', '.join(key_factors)} 因素影响。"
|
||
else:
|
||
explanation += "没有明显的单一影响因素。"
|
||
else:
|
||
explanation += "影响因素分析不可用。"
|
||
|
||
return explanation |