225 lines
6.8 KiB
Python
225 lines
6.8 KiB
Python
"""
|
||
药店销售预测系统 - 趋势分析函数
|
||
"""
|
||
|
||
import numpy as np
|
||
import pandas as pd
|
||
from scipy import stats
|
||
|
||
def analyze_prediction_result(product_id, model_type, predictions, features=None):
|
||
"""
|
||
分析预测结果并提供洞察
|
||
|
||
参数:
|
||
product_id: 产品ID
|
||
model_type: 模型类型
|
||
predictions: 预测结果
|
||
features: 特征数据,可选
|
||
|
||
返回:
|
||
分析结果字典
|
||
"""
|
||
analysis = {}
|
||
|
||
# 分析趋势
|
||
analysis['trend'] = analyze_trend(predictions)
|
||
|
||
# 计算统计数据
|
||
analysis['statistics'] = calculate_prediction_statistics(predictions)
|
||
|
||
# 计算日环比变化
|
||
analysis['day_over_day'] = calculate_day_over_day_changes(predictions)
|
||
|
||
# 如果提供了特征数据,分析影响因素
|
||
if features is not None:
|
||
analysis['influencing_factors'] = analyze_influencing_factors(product_id, model_type, predictions, features)
|
||
|
||
# 生成解释文本
|
||
analysis['explanation'] = generate_prediction_explanation(analysis, product_id, model_type)
|
||
|
||
return analysis
|
||
|
||
def analyze_trend(predictions):
|
||
"""
|
||
分析预测结果的趋势
|
||
|
||
参数:
|
||
predictions: 预测结果数组
|
||
|
||
返回:
|
||
趋势分析结果字典
|
||
"""
|
||
# 计算线性回归斜率
|
||
x = np.arange(len(predictions))
|
||
slope, intercept, r_value, p_value, std_err = stats.linregress(x, predictions)
|
||
|
||
# 判断趋势类型
|
||
if slope > 0.05:
|
||
trend_type = "上升"
|
||
elif slope < -0.05:
|
||
trend_type = "下降"
|
||
else:
|
||
trend_type = "平稳"
|
||
|
||
# 计算波动性
|
||
volatility = np.std(predictions) / np.mean(predictions) if np.mean(predictions) > 0 else 0
|
||
|
||
# 判断波动程度
|
||
if volatility < 0.1:
|
||
volatility_level = "低"
|
||
elif volatility < 0.3:
|
||
volatility_level = "中"
|
||
else:
|
||
volatility_level = "高"
|
||
|
||
return {
|
||
'slope': slope,
|
||
'trend_type': trend_type,
|
||
'r_squared': r_value ** 2,
|
||
'p_value': p_value,
|
||
'volatility': volatility,
|
||
'volatility_level': volatility_level
|
||
}
|
||
|
||
def calculate_prediction_statistics(predictions):
|
||
"""
|
||
计算预测结果的统计数据
|
||
|
||
参数:
|
||
predictions: 预测结果数组
|
||
|
||
返回:
|
||
统计数据字典
|
||
"""
|
||
return {
|
||
'mean': np.mean(predictions),
|
||
'median': np.median(predictions),
|
||
'min': np.min(predictions),
|
||
'max': np.max(predictions),
|
||
'std': np.std(predictions),
|
||
'q1': np.percentile(predictions, 25),
|
||
'q3': np.percentile(predictions, 75)
|
||
}
|
||
|
||
def calculate_day_over_day_changes(predictions):
|
||
"""
|
||
计算日环比变化
|
||
|
||
参数:
|
||
predictions: 预测结果数组
|
||
|
||
返回:
|
||
日环比变化列表
|
||
"""
|
||
changes = []
|
||
for i in range(1, len(predictions)):
|
||
if predictions[i-1] != 0:
|
||
pct_change = (predictions[i] - predictions[i-1]) / predictions[i-1] * 100
|
||
else:
|
||
pct_change = float('inf') if predictions[i] > 0 else 0
|
||
changes.append(pct_change)
|
||
|
||
return changes
|
||
|
||
def analyze_influencing_factors(product_id, model_type, predictions, features):
|
||
"""
|
||
分析影响预测结果的因素
|
||
|
||
参数:
|
||
product_id: 产品ID
|
||
model_type: 模型类型
|
||
predictions: 预测结果数组
|
||
features: 特征数据
|
||
|
||
返回:
|
||
影响因素分析结果字典
|
||
"""
|
||
# 这里简单地返回一些基本信息,实际应用中可以进行更复杂的分析
|
||
return {
|
||
'product_id': product_id,
|
||
'model_type': model_type,
|
||
'feature_count': features.shape[1] if hasattr(features, 'shape') else len(features),
|
||
'important_features': ['温度', '促销', '季节性'] if product_id in ['P001', 'P004'] else ['价格', '周末', '节假日']
|
||
}
|
||
|
||
def generate_prediction_explanation(analysis, product_id, model_type):
|
||
"""
|
||
根据分析结果生成解释文本
|
||
|
||
参数:
|
||
analysis: 分析结果字典
|
||
product_id: 产品ID
|
||
model_type: 模型类型
|
||
|
||
返回:
|
||
解释文本
|
||
"""
|
||
trend = analysis['trend']
|
||
stats = analysis['statistics']
|
||
|
||
# 生成趋势解释
|
||
trend_text = f"预测显示销量整体呈{trend['trend_type']}趋势,"
|
||
|
||
if trend['trend_type'] == "上升":
|
||
trend_text += f"平均每天增长约{abs(trend['slope']):.2f}个单位。"
|
||
elif trend['trend_type'] == "下降":
|
||
trend_text += f"平均每天下降约{abs(trend['slope']):.2f}个单位。"
|
||
else:
|
||
trend_text += "销量基本保持稳定。"
|
||
|
||
# 生成波动性解释
|
||
volatility_text = f"预测期内销量波动性{trend['volatility_level']},"
|
||
if trend['volatility_level'] == "高":
|
||
volatility_text += "表明销量可能受到多种因素的影响,预测的不确定性较高。"
|
||
elif trend['volatility_level'] == "中":
|
||
volatility_text += "表明销量有一定波动,但整体可控。"
|
||
else:
|
||
volatility_text += "表明销量相对稳定,预测可信度较高。"
|
||
|
||
# 生成统计数据解释
|
||
stats_text = f"预测期内平均日销量为{stats['mean']:.2f}个单位,"
|
||
stats_text += f"最高日销量为{stats['max']:.2f}个单位,最低日销量为{stats['min']:.2f}个单位。"
|
||
|
||
# 组合解释文本
|
||
explanation = f"{model_type}模型对产品{product_id}的预测分析:\n"
|
||
explanation += trend_text + "\n"
|
||
explanation += volatility_text + "\n"
|
||
explanation += stats_text
|
||
|
||
# 如果有影响因素分析,添加到解释中
|
||
if 'influencing_factors' in analysis and analysis['influencing_factors'] is not None:
|
||
factors = analysis['influencing_factors']
|
||
if 'important_features' in factors:
|
||
explanation += f"\n\n主要影响因素包括:{', '.join(factors['important_features'])}。"
|
||
|
||
return explanation
|
||
|
||
def compare_with_historical(predictions, product_df):
|
||
"""
|
||
将预测结果与历史数据进行比较
|
||
|
||
参数:
|
||
predictions: 预测结果数组
|
||
product_df: 产品历史数据DataFrame
|
||
|
||
返回:
|
||
比较结果字典
|
||
"""
|
||
historical_sales = product_df['sales'].values[-len(predictions):] if len(product_df) >= len(predictions) else product_df['sales'].values
|
||
|
||
# 计算平均销量差异
|
||
if len(historical_sales) > 0:
|
||
avg_historical = np.mean(historical_sales)
|
||
avg_predicted = np.mean(predictions)
|
||
avg_diff = avg_predicted - avg_historical
|
||
avg_pct_diff = (avg_diff / avg_historical) * 100 if avg_historical != 0 else float('inf')
|
||
else:
|
||
avg_diff = 0
|
||
avg_pct_diff = 0
|
||
|
||
return {
|
||
'avg_historical': avg_historical if len(historical_sales) > 0 else None,
|
||
'avg_predicted': avg_predicted,
|
||
'avg_diff': avg_diff,
|
||
'avg_pct_diff': avg_pct_diff
|
||
} |