ShopTRAINING/server/analysis/trend_analysis.py

"""
药店销售预测系统 - 趋势分析函数
"""

import numpy as np
import pandas as pd
from scipy import stats

def analyze_prediction_result(product_id, model_type, predictions, features=None):
    """
    分析预测结果并提供洞察
    
    参数:
    product_id: 产品ID
    model_type: 模型类型
    predictions: 预测结果
    features: 特征数据，可选
    
    返回:
    分析结果字典
    """
    analysis = {}
    
    # 分析趋势
    analysis['trend'] = analyze_trend(predictions)
    
    # 计算统计数据
    analysis['statistics'] = calculate_prediction_statistics(predictions)
    
    # 计算日环比变化
    analysis['day_over_day'] = calculate_day_over_day_changes(predictions)
    
    # 如果提供了特征数据，分析影响因素
    if features is not None:
        analysis['influencing_factors'] = analyze_influencing_factors(product_id, model_type, predictions, features)
    
    # 生成解释文本
    analysis['explanation'] = generate_prediction_explanation(analysis, product_id, model_type)
    
    return analysis

def analyze_trend(predictions):
    """
    分析预测结果的趋势
    
    参数:
    predictions: 预测结果数组
    
    返回:
    趋势分析结果字典
    """
    # 计算线性回归斜率
    x = np.arange(len(predictions))
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, predictions)
    
    # 判断趋势类型
    if slope > 0.05:
        trend_type = "上升"
    elif slope < -0.05:
        trend_type = "下降"
    else:
        trend_type = "平稳"
    
    # 计算波动性
    volatility = np.std(predictions) / np.mean(predictions) if np.mean(predictions) > 0 else 0
    
    # 判断波动程度
    if volatility < 0.1:
        volatility_level = "低"
    elif volatility < 0.3:
        volatility_level = "中"
    else:
        volatility_level = "高"
    
    return {
        'slope': slope,
        'trend_type': trend_type,
        'r_squared': r_value ** 2,
        'p_value': p_value,
        'volatility': volatility,
        'volatility_level': volatility_level
    }

def calculate_prediction_statistics(predictions):
    """
    计算预测结果的统计数据
    
    参数:
    predictions: 预测结果数组
    
    返回:
    统计数据字典
    """
    return {
        'mean': np.mean(predictions),
        'median': np.median(predictions),
        'min': np.min(predictions),
        'max': np.max(predictions),
        'std': np.std(predictions),
        'q1': np.percentile(predictions, 25),
        'q3': np.percentile(predictions, 75)
    }

def calculate_day_over_day_changes(predictions):
    """
    计算日环比变化
    
    参数:
    predictions: 预测结果数组
    
    返回:
    日环比变化列表
    """
    changes = []
    for i in range(1, len(predictions)):
        if predictions[i-1] != 0:
            pct_change = (predictions[i] - predictions[i-1]) / predictions[i-1] * 100
        else:
            pct_change = float('inf') if predictions[i] > 0 else 0
        changes.append(pct_change)
    
    return changes

def analyze_influencing_factors(product_id, model_type, predictions, features):
    """
    分析影响预测结果的因素
    
    参数:
    product_id: 产品ID
    model_type: 模型类型
    predictions: 预测结果数组
    features: 特征数据
    
    返回:
    影响因素分析结果字典
    """
    # 这里简单地返回一些基本信息，实际应用中可以进行更复杂的分析
    return {
        'product_id': product_id,
        'model_type': model_type,
        'feature_count': features.shape[1] if hasattr(features, 'shape') else len(features),
        'important_features': ['温度', '促销', '季节性'] if product_id in ['P001', 'P004'] else ['价格', '周末', '节假日']
    }

def generate_prediction_explanation(analysis, product_id, model_type):
    """
    根据分析结果生成解释文本
    
    参数:
    analysis: 分析结果字典
    product_id: 产品ID
    model_type: 模型类型
    
    返回:
    解释文本
    """
    trend = analysis['trend']
    stats = analysis['statistics']
    
    # 生成趋势解释
    trend_text = f"预测显示销量整体呈{trend['trend_type']}趋势，"
    
    if trend['trend_type'] == "上升":
        trend_text += f"平均每天增长约{abs(trend['slope']):.2f}个单位。"
    elif trend['trend_type'] == "下降":
        trend_text += f"平均每天下降约{abs(trend['slope']):.2f}个单位。"
    else:
        trend_text += "销量基本保持稳定。"
    
    # 生成波动性解释
    volatility_text = f"预测期内销量波动性{trend['volatility_level']}，"
    if trend['volatility_level'] == "高":
        volatility_text += "表明销量可能受到多种因素的影响，预测的不确定性较高。"
    elif trend['volatility_level'] == "中":
        volatility_text += "表明销量有一定波动，但整体可控。"
    else:
        volatility_text += "表明销量相对稳定，预测可信度较高。"
    
    # 生成统计数据解释
    stats_text = f"预测期内平均日销量为{stats['mean']:.2f}个单位，"
    stats_text += f"最高日销量为{stats['max']:.2f}个单位，最低日销量为{stats['min']:.2f}个单位。"
    
    # 组合解释文本
    explanation = f"{model_type}模型对产品{product_id}的预测分析：\n"
    explanation += trend_text + "\n"
    explanation += volatility_text + "\n"
    explanation += stats_text
    
    # 如果有影响因素分析，添加到解释中
    if 'influencing_factors' in analysis and analysis['influencing_factors'] is not None:
        factors = analysis['influencing_factors']
        if 'important_features' in factors:
            explanation += f"\n\n主要影响因素包括：{', '.join(factors['important_features'])}。"
    
    return explanation

def compare_with_historical(predictions, product_df):
    """
    将预测结果与历史数据进行比较
    
    参数:
    predictions: 预测结果数组
    product_df: 产品历史数据DataFrame
    
    返回:
    比较结果字典
    """
    historical_sales = product_df['sales'].values[-len(predictions):] if len(product_df) >= len(predictions) else product_df['sales'].values
    
    # 计算平均销量差异
    if len(historical_sales) > 0:
        avg_historical = np.mean(historical_sales)
        avg_predicted = np.mean(predictions)
        avg_diff = avg_predicted - avg_historical
        avg_pct_diff = (avg_diff / avg_historical) * 100 if avg_historical != 0 else float('inf')
    else:
        avg_diff = 0
        avg_pct_diff = 0
    
    return {
        'avg_historical': avg_historical if len(historical_sales) > 0 else None,
        'avg_predicted': avg_predicted,
        'avg_diff': avg_diff,
        'avg_pct_diff': avg_pct_diff
    }