更换数据源

2025-07-25 18:42:58 +08:00 · 2025-07-25 18:42:58 +08:00 · aaf2672b7f
commit aaf2672b7f
parent 8e450ce64b
29 changed files with 467 additions and 1157 deletions
--- a/data/timeseries_training_data_sample_10s50p.parquet
+++ b/data/timeseries_training_data_sample_10s50p.parquet
--- a/prediction_history.db
+++ b/prediction_history.db
--- a/saved_predictions/prediction_001cb97e-299a-40df-9cca-048df9c96f78.json
+++ b/saved_predictions/prediction_001cb97e-299a-40df-9cca-048df9c96f78.json
--- a/saved_predictions/prediction_022a844c-7fd4-4bef-9543-68edc07ecb68.json
+++ b/saved_predictions/prediction_022a844c-7fd4-4bef-9543-68edc07ecb68.json
--- a/saved_predictions/prediction_0b270059-cc62-4512-910a-e42554a14705.json
+++ b/saved_predictions/prediction_0b270059-cc62-4512-910a-e42554a14705.json
--- a/saved_predictions/prediction_0d18b861-3d92-4459-a249-366874171050.json
+++ b/saved_predictions/prediction_0d18b861-3d92-4459-a249-366874171050.json
--- a/saved_predictions/prediction_2e57a0ae-d09f-4c55-9e8d-9f07c3ab2987.json
+++ b/saved_predictions/prediction_2e57a0ae-d09f-4c55-9e8d-9f07c3ab2987.json
--- a/saved_predictions/prediction_34e230fe-00ef-4a43-900f-49ac495f7f1f.json
+++ b/saved_predictions/prediction_34e230fe-00ef-4a43-900f-49ac495f7f1f.json
--- a/saved_predictions/prediction_4d8bd747-d3b7-4ebd-bd1a-066b8023cf66.json
+++ b/saved_predictions/prediction_4d8bd747-d3b7-4ebd-bd1a-066b8023cf66.json
@ -0,0 +1 @@
 {"product_id": "11020059", "product_name": "Product 11020059", "model_type": "kan", "predictions": [{"date": "2022-01-01", "predicted_sales": 0.12464457750320435}, {"date": "2022-01-02", "predicted_sales": 0.11735431849956512}, {"date": "2022-01-03", "predicted_sales": 0.11733274161815643}, {"date": "2022-01-04", "predicted_sales": 0.11753901839256287}, {"date": "2022-01-05", "predicted_sales": 0.11754211783409119}, {"date": "2022-01-06", "predicted_sales": 0.11753572523593903}, {"date": "2022-01-07", "predicted_sales": 0.11738376319408417}], "prediction_data": [{"date": "2022-01-01", "predicted_sales": 0.12464457750320435}, {"date": "2022-01-02", "predicted_sales": 0.11735431849956512}, {"date": "2022-01-03", "predicted_sales": 0.11733274161815643}, {"date": "2022-01-04", "predicted_sales": 0.11753901839256287}, {"date": "2022-01-05", "predicted_sales": 0.11754211783409119}, {"date": "2022-01-06", "predicted_sales": 0.11753572523593903}, {"date": "2022-01-07", "predicted_sales": 0.11738376319408417}], "history_data": [{"store_id": "01010108", "product_id": "11020059", "date": "2021-12-02", "sales_quantity": 0.0, "return_quantity": 0.0, "sales": 0.0, "gross_profit_total": 0.0, "transaction_count": 0, "sales_quantity_rolling_mean_7d": 0.5, "return_quantity_rolling_mean_7d": 0.0, "net_sales_quantity_rolling_mean_7d": 0.5, "sales_quantity_rolling_sum_7d": 1.0, "return_quantity_rolling_sum_7d": 0.0, "net_sales_quantity_rolling_sum_7d": 1.0, "sales_quantity_rolling_mean_15d": 0.5, "return_quantity_rolling_mean_15d": 0.0, "net_sales_quantity_rolling_mean_15d": 0.5, "sales_quantity_rolling_sum_15d": 1.0, "return_quantity_rolling_sum_15d": 0.0, "net_sales_quantity_rolling_sum_15d": 1.0, "sales_quantity_rolling_mean_30d": 0.71, "return_quantity_rolling_mean_30d": -0.14, "net_sales_quantity_rolling_mean_30d": 0.57, "sales_quantity_rolling_sum_30d": 5.0, "return_quantity_rolling_sum_30d": -1.0, "net_sales_quantity_rolling_sum_30d": 4.0, "sales_quantity_rolling_mean_90d": 0.77, "return_quantity_rolling_mean_90d": -0.09, "net_sales_quantity_rolling_mean_90d": 0.68, "sales_quantity_rolling_sum_90d": 17.0, "return_quantity_rolling_sum_90d": -2.0, "net_sales_quantity_rolling_sum_90d": 15.0, "is_weekend": false, "weekday": 3, "day_of_month": 2, "day_of_year": 336, "week_of_month": 1, "month": 12, "quarter": 4, "is_holiday": false, "first_sale_date": 
--- a/saved_predictions/prediction_66b2f258-bd18-4274-8116-29c79e777524.json
+++ b/saved_predictions/prediction_66b2f258-bd18-4274-8116-29c79e777524.json
@ -0,0 +1 @@
 {"product_id": "11020059", "product_name": "Product 11020059", "model_type": "kan", "predictions": [{"date": "2022-01-01", "predicted_sales": 0.12464457750320435}, {"date": "2022-01-02", "predicted_sales": 0.11735431849956512}, {"date": "2022-01-03", "predicted_sales": 0.11733274161815643}, {"date": "2022-01-04", "predicted_sales": 0.11753901839256287}, {"date": "2022-01-05", "predicted_sales": 0.11754211783409119}, {"date": "2022-01-06", "predicted_sales": 0.11753572523593903}, {"date": "2022-01-07", "predicted_sales": 0.11738376319408417}], "prediction_data": [{"date": "2022-01-01", "predicted_sales": 0.12464457750320435}, {"date": "2022-01-02", "predicted_sales": 0.11735431849956512}, {"date": "2022-01-03", "predicted_sales": 0.11733274161815643}, {"date": "2022-01-04", "predicted_sales": 0.11753901839256287}, {"date": "2022-01-05", "predicted_sales": 0.11754211783409119}, {"date": "2022-01-06", "predicted_sales": 0.11753572523593903}, {"date": "2022-01-07", "predicted_sales": 0.11738376319408417}], "history_data": [{"store_id": "01010108", "product_id": "11020059", "date": "2021-12-02", "sales_quantity": 0.0, "return_quantity": 0.0, "sales": 0.0, "gross_profit_total": 0.0, "transaction_count": 0, "sales_quantity_rolling_mean_7d": 0.5, "return_quantity_rolling_mean_7d": 0.0, "net_sales_quantity_rolling_mean_7d": 0.5, "sales_quantity_rolling_sum_7d": 1.0, "return_quantity_rolling_sum_7d": 0.0, "net_sales_quantity_rolling_sum_7d": 1.0, "sales_quantity_rolling_mean_15d": 0.5, "return_quantity_rolling_mean_15d": 0.0, "net_sales_quantity_rolling_mean_15d": 0.5, "sales_quantity_rolling_sum_15d": 1.0, "return_quantity_rolling_sum_15d": 0.0, "net_sales_quantity_rolling_sum_15d": 1.0, "sales_quantity_rolling_mean_30d": 0.71, "return_quantity_rolling_mean_30d": -0.14, "net_sales_quantity_rolling_mean_30d": 0.57, "sales_quantity_rolling_sum_30d": 5.0, "return_quantity_rolling_sum_30d": -1.0, "net_sales_quantity_rolling_sum_30d": 4.0, "sales_quantity_rolling_mean_90d": 0.77, "return_quantity_rolling_mean_90d": -0.09, "net_sales_quantity_rolling_mean_90d": 0.68, "sales_quantity_rolling_sum_90d": 17.0, "return_quantity_rolling_sum_90d": -2.0, "net_sales_quantity_rolling_sum_90d": 15.0, "is_weekend": false, "weekday": 3, "day_of_month": 2, "day_of_year": 336, "week_of_month": 1, "month": 12, "quarter": 4, "is_holiday": false, "first_sale_date": 
--- a/saved_predictions/prediction_75863964-89ef-4540-811f-ce0a2cf43aba.json
+++ b/saved_predictions/prediction_75863964-89ef-4540-811f-ce0a2cf43aba.json
--- a/saved_predictions/prediction_79e8f86f-4760-447e-9aab-37dc823b3b65.json
+++ b/saved_predictions/prediction_79e8f86f-4760-447e-9aab-37dc823b3b65.json
--- a/saved_predictions/prediction_89ef0d6b-f018-4f4c-8d86-2318d24b9cb0.json
+++ b/saved_predictions/prediction_89ef0d6b-f018-4f4c-8d86-2318d24b9cb0.json
--- a/saved_predictions/prediction_91cbb6bc-52c7-4bbe-8abb-69c82eb5b565.json
+++ b/saved_predictions/prediction_91cbb6bc-52c7-4bbe-8abb-69c82eb5b565.json
--- a/saved_predictions/prediction_a66b9aa3-ad16-4ad1-96d1-8ae1063fad69.json
+++ b/saved_predictions/prediction_a66b9aa3-ad16-4ad1-96d1-8ae1063fad69.json
--- a/saved_predictions/prediction_b00ec055-7dae-4ef6-a8b5-cfcccfb56baa.json
+++ b/saved_predictions/prediction_b00ec055-7dae-4ef6-a8b5-cfcccfb56baa.json
--- a/saved_predictions/prediction_b37dbac5-dc53-4528-9182-1bdf8d8bb836.json
+++ b/saved_predictions/prediction_b37dbac5-dc53-4528-9182-1bdf8d8bb836.json
--- a/saved_predictions/prediction_bd11c5d4-49d0-4f72-af4b-2885ad3ba385.json
+++ b/saved_predictions/prediction_bd11c5d4-49d0-4f72-af4b-2885ad3ba385.json
@ -1,525 +0,0 @@
 {
    "product_id": "17021449",
    "product_name": "布洛芬混悬液(美林)",
    "model_type": "cnn_bilstm_attention",
    "predictions": [
        {
            "date": "2025-07-25",
            "predicted_sales": 0.8147072196006775
        },
        {
            "date": "2025-07-26",
            "predicted_sales": 0.8167740106582642
        },
        {
            "date": "2025-07-27",
            "predicted_sales": 0.8197348117828369
        },
        {
            "date": "2025-07-28",
            "predicted_sales": 0.8219858407974243
        },
        {
            "date": "2025-07-29",
            "predicted_sales": 0.8112776875495911
        },
        {
            "date": "2025-07-30",
            "predicted_sales": 0.8004958629608154
        },
        {
            "date": "2025-07-31",
            "predicted_sales": 0.8058184385299683
        }
    ],
    "prediction_data": [
        {
            "date": "2025-07-25",
            "predicted_sales": 0.8147072196006775
        },
        {
            "date": "2025-07-26",
            "predicted_sales": 0.8167740106582642
        },
        {
            "date": "2025-07-27",
            "predicted_sales": 0.8197348117828369
        },
        {
            "date": "2025-07-28",
            "predicted_sales": 0.8219858407974243
        },
        {
            "date": "2025-07-29",
            "predicted_sales": 0.8112776875495911
        },
        {
            "date": "2025-07-30",
            "predicted_sales": 0.8004958629608154
        },
        {
            "date": "2025-07-31",
            "predicted_sales": 0.8058184385299683
        }
    ],
    "history_data": [
        {
            "date": "2025-06-25",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 2,
            "month": 6,
            "is_holiday": false,
            "is_weekend": false,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-06-26",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 3,
            "month": 6,
            "is_holiday": false,
            "is_weekend": false,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-06-27",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 4,
            "month": 6,
            "is_holiday": false,
            "is_weekend": false,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-06-28",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 5,
            "month": 6,
            "is_holiday": false,
            "is_weekend": true,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-06-29",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 6,
            "month": 6,
            "is_holiday": false,
            "is_weekend": true,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-06-30",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 0,
            "month": 6,
            "is_holiday": false,
            "is_weekend": false,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-01",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 1,
            "month": 7,
            "is_holiday": false,
            "is_weekend": false,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-02",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 2,
            "month": 7,
            "is_holiday": false,
            "is_weekend": false,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-03",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 3,
            "month": 7,
            "is_holiday": false,
            "is_weekend": false,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-04",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 4,
            "month": 7,
            "is_holiday": false,
            "is_weekend": false,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-05",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 5,
            "month": 7,
            "is_holiday": false,
            "is_weekend": true,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-06",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 6,
            "month": 7,
            "is_holiday": false,
            "is_weekend": true,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-07",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 0,
            "month": 7,
            "is_holiday": false,
            "is_weekend": false,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-08",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 1,
            "month": 7,
            "is_holiday": false,
            "is_weekend": false,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-09",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 2,
            "month": 7,
            "is_holiday": false,
            "is_weekend": false,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-10",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 3,
            "month": 7,
            "is_holiday": false,
            "is_weekend": false,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-11",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 4,
            "month": 7,
            "is_holiday": false,
            "is_weekend": false,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-12",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 5,
            "month": 7,
            "is_holiday": false,
            "is_weekend": true,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-13",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 6,
            "month": 7,
            "is_holiday": false,
            "is_weekend": true,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-14",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 0,
            "month": 7,
            "is_holiday": false,
            "is_weekend": false,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-15",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 1,
            "month": 7,
            "is_holiday": false,
            "is_weekend": false,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-16",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 2,
            "month": 7,
            "is_holiday": false,
            "is_weekend": false,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-17",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 3,
            "month": 7,
            "is_holiday": false,
            "is_weekend": false,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-18",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 4,
            "month": 7,
            "is_holiday": false,
            "is_weekend": false,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-19",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 5,
            "month": 7,
            "is_holiday": false,
            "is_weekend": true,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-20",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 6,
            "month": 7,
            "is_holiday": false,
            "is_weekend": true,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-21",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 0,
            "month": 7,
            "is_holiday": false,
            "is_weekend": false,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-22",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 1,
            "month": 7,
            "is_holiday": false,
            "is_weekend": false,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-23",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 2,
            "month": 7,
            "is_holiday": false,
            "is_weekend": false,
            "is_promotion": false,
            "temperature": 20.0
        },
        {
            "date": "2025-07-24",
            "sales": 0.0,
            "product_id": "17021449",
            "product_name": "布洛芬混悬液(美林)",
            "store_id": "GLOBAL",
            "store_name": "全部店铺-SUM",
            "weekday": 3,
            "month": 7,
            "is_holiday": false,
            "is_weekend": false,
            "is_promotion": false,
            "temperature": 20.0
        }
    ],
    "analysis": {
        "trend": {
            "slope": -0.0024171343871525353,
            "trend_type": "平稳",
            "r_squared": 0.4619268323887481,
            "p_value": 0.0930247330579927,
            "volatility": 0.008749220910445412,
            "volatility_level": "低"
        },
        "statistics": {
            "mean": 0.8129705531256539,
            "median": 0.8147072196006775,
            "min": 0.8004958629608154,
            "max": 0.8219858407974243,
            "std": 0.007112858962983344,
            "q1": 0.8085480630397797,
            "q3": 0.8182544112205505
        },
        "day_over_day": [
            0.25368512857903625,
            0.36249942896524706,
            0.2746045406674448,
            -1.3027174820243943,
            -1.328993112252526,
            0.6649098159565847
        ],
        "influencing_factors": {
            "product_id": "17021449",
            "model_type": "cnn_bilstm_attention",
            "feature_count": 7,
            "important_features": [
                "价格",
                "周末",
                "节假日"
            ]
        },
        "explanation": "cnn_bilstm_attention模型对产品17021449的预测分析：\n预测显示销量整体呈平稳趋势，销量基本保持稳定。\n预测期内销量波动性低，表明销量相对稳定，预测可信度较高。\n预测期内平均日销量为0.81个单位，最高日销量为0.82个单位，最低日销量为0.80个单位。\n\n主要影响因素包括：价格, 周末, 节假日。"
    }
 }
--- a/saved_predictions/prediction_ed968d3b-8365-4ace-b14f-04debe8b8103.json
+++ b/saved_predictions/prediction_ed968d3b-8365-4ace-b14f-04debe8b8103.json
--- a/saved_predictions/prediction_f42cdd61-8f4e-47e7-a5e4-74e9689e949c.json
+++ b/saved_predictions/prediction_f42cdd61-8f4e-47e7-a5e4-74e9689e949c.json
--- a/server/api.py
+++ b/server/api.py
@ -63,9 +63,11 @@ from core.config import (
 )
 # 导入多店铺数据工具
-from utils.multi_store_data_utils import (
+# from utils.multi_store_data_utils import (
-    get_available_stores, get_available_products, get_sales_statistics
+#     get_available_stores, get_available_products, get_sales_statistics
-)
+# )
 # 以上旧模块将被新的统一数据加载器替代
 from utils.new_data_loader import load_new_data
 # 导入数据库初始化工具
 from init_multi_store_db import get_db_connection
@ -202,6 +204,9 @@ class CustomJSONEncoder(json.JSONEncoder):
        # 处理日期时间类型
        elif isinstance(obj, datetime):
            return obj.isoformat()
        # 新增：处理date对象
        elif isinstance(obj, pd.Timestamp) or hasattr(obj, 'isoformat'):
            return obj.isoformat()
        return super(CustomJSONEncoder, self).default(obj)
 # Helper function to convert numpy types to native python types for JSON serialization
@ -515,11 +520,21 @@ def swagger_ui():
    }
 })
 def get_products():
    """获取所有产品列表 (已重构为使用新数据源)"""
    try:
-        from utils.multi_store_data_utils import get_available_products
+        df = load_new_data()
-        products = get_available_products()
+        # 从新数据中提取唯一的产品ID
        products_df = df[['product_id']].drop_duplicates().sort_values('product_id')
        # 由于新数据没有product_name，我们创建一个兼容的格式
        products = [
            {'product_id': pid, 'product_name': f'产品 {pid}'}
            for pid in products_df['product_id']
        ]
        return jsonify({"status": "success", "data": products})
    except Exception as e:
        logger.error(f"获取产品列表失败: {traceback.format_exc()}")
        return jsonify({"status": "error", "message": str(e)}), 500
@app.route('/api/products/<product_id>', methods=['GET'])
@ -570,25 +585,32 @@ def get_products():
    }
 })
 def get_product(product_id):
    """获取单个产品详情 (已重构为使用新数据源)"""
    try:
-        from utils.multi_store_data_utils import load_multi_store_data
+        df = load_new_data()
-        df = load_multi_store_data(product_id=product_id)
+        product_df = df[df['product_id'] == product_id]
-        if df.empty:
+        if product_df.empty:
            return jsonify({"status": "error", "message": "产品不存在"}), 404
        # 从新数据中提取信息
        product_name = f"产品 {product_id}" # 备用名称
        if 'product_name' in product_df.columns and not product_df['product_name'].empty:
            product_name = product_df['product_name'].iloc[0]
        product_info = {
            "product_id": product_id,
-            "product_name": df['product_name'].iloc[0],
+            "product_name": product_name,
-            "data_points": len(df),
+            "data_points": len(product_df),
            "date_range": {
-                "start": df['date'].min().strftime('%Y-%m-%d'),
+                "start": product_df['date'].min().strftime('%Y-%m-%d'),
-                "end": df['date'].max().strftime('%Y-%m-%d')
+                "end": product_df['date'].max().strftime('%Y-%m-%d')
            }
        }
        return jsonify({"status": "success", "data": product_info})
    except Exception as e:
        logger.error(f"获取产品详情失败: {traceback.format_exc()}")
        return jsonify({"status": "error", "message": str(e)}), 500
@app.route('/api/products/<product_id>/sales', methods=['GET'])
@ -644,29 +666,29 @@ def get_product(product_id):
    }
 })
 def get_product_sales(product_id):
    """获取产品销售数据 (已重构为使用新数据源)"""
    try:
        start_date = request.args.get('start_date')
        end_date = request.args.get('end_date')
-        from utils.multi_store_data_utils import load_multi_store_data
+        df = load_new_data()
-        df = load_multi_store_data(
+        df_product = df[df['product_id'] == product_id]
-            product_id=product_id,
+
-            start_date=start_date,
+        if start_date:
-            end_date=end_date
+            df_product = df_product[df_product['date'] >= pd.to_datetime(start_date)]
-        )
+        if end_date:
            df_product = df_product[df_product['date'] <= pd.to_datetime(end_date)]
        if df_product.empty:
            return jsonify({"status": "error", "message": "产品不存在或在指定日期范围内无数据"}), 404
-        if df.empty:
+        df_product = df_product.sort_values('date')
-            return jsonify({"status": "error", "message": "产品不存在或无数据"}), 404
+        df_product['date'] = df_product['date'].dt.strftime('%Y-%m-%d')
-        # 确保数据按日期排序
+        sales_data = df_product.to_dict('records')
        df = df.sort_values('date')
        # 转换日期为字符串以便JSON序列化
        df['date'] = df['date'].dt.strftime('%Y-%m-%d')
        sales_data = df.to_dict('records')
        return jsonify({"status": "success", "data": sales_data})
    except Exception as e:
        logger.error(f"获取产品销售数据失败: {traceback.format_exc()}")
        return jsonify({"status": "error", "message": str(e)}), 500
@app.route('/api/data/upload', methods=['POST'])
@ -1500,7 +1522,9 @@ def predict():
            traceback.print_exc()
            # 不应阻止向用户返回结果，因此只打印警告
-        return jsonify(response_data)
+        # 在返回前，使用我们的辅助函数对整个响应进行一次深度清洗
        cleaned_response_data = convert_numpy_types_for_json(response_data)
        return jsonify(cleaned_response_data)
    except Exception as e:
        print(f"预测失败: {str(e)}")
        import traceback
@ -2550,32 +2574,24 @@ def get_latest_model_id(model_type, product_id):
 # 获取产品名称的辅助函数
 def get_product_name(product_id):
-    """根据产品ID获取产品名称"""
+    """根据产品ID获取产品名称 (已重构)"""
    try:
-        # 从Excel文件中查找产品名称
+        # 注意：新数据源中没有 'product_name'。此函数现在返回一个占位符。
-        from utils.multi_store_data_utils import load_multi_store_data
+        # 在未来的迭代中，可能需要关联一个产品信息表。
-        df = load_multi_store_data()
+        return f"产品 {product_id}"
        product_df = df[df['product_id'] == product_id]
        if not product_df.empty:
            return product_df['product_name'].iloc[0]
        return None
    except Exception as e:
-        print(f"获取产品名称失败: {str(e)}")
+        logger.warning(f"获取产品名称时出现问题: {e}")
-        return None
+        return product_id
-# 获取店铺名称的辅助函数
+
 def get_store_name(store_id):
-    """根据店铺ID获取店铺名称"""
+    """根据店铺ID获取店铺名称 (已重构)"""
    try:
-        from utils.multi_store_data_utils import get_available_stores
+        # 注意：新数据源中没有 'store_name'。此函数现在返回一个占位符。
-        stores = get_available_stores()
+        # 在未来的迭代中，可能需要关联一个店铺信息表。
-        for store in stores:
+        return f"店铺 {store_id}"
            if store['store_id'] == store_id:
                return store['store_name']
        return None
    except Exception as e:
-        print(f"获取店铺名称失败: {str(e)}")
+        logger.warning(f"获取店铺名称时出现问题: {e}")
-        return None
+        return store_id
 # run_prediction 函数已被移除，因为其逻辑已完全整合到 /api/prediction 路由处理函数中
@ -3837,23 +3853,38 @@ def update_train_task_with_websocket():
@app.route('/api/stores', methods=['GET'])
 def get_stores():
-    """
+    """获取所有店铺列表 (已重构为使用新数据源并填充信息)"""
    获取所有店铺列表
    """
    try:
-        from utils.multi_store_data_utils import get_available_stores
+        df = load_new_data()
        stores = get_available_stores()
        # 从新数据中提取唯一的店铺信息
        # 修正：只选择数据文件中实际存在的列
        # 根据之前的分析，新数据有 'district' 列，但没有 'city' 和 'province'
        stores_df = df[['store_id', 'district']].drop_duplicates('store_id')
        stores_data = []
        for _, row in stores_df.iterrows():
            # 构建位置信息
            location = row['district'] if pd.notna(row['district']) else "Unknown Location"
            stores_data.append({
                "store_id": row['store_id'],
                "store_name": f"店铺 {row['store_id']}", # 使用ID作为临时名称
                "location": location,
                "type": "标准药店", # 填充默认值
                "size": "120㎡", # 填充默认值
                "opening_date": "2023-01-01", # 填充默认值
                "status": "营业中" # 填充默认值
            })
        return jsonify({
            "status": "success",
-            "data": stores,
+            "data": stores_data,
-            "count": len(stores)
+            "count": len(stores_data)
        })
    except Exception as e:
-        return jsonify({
+        logger.error(f"获取店铺列表失败: {traceback.format_exc()}")
-            "status": "error",
+        return jsonify({"status": "error", "message": f"获取店铺列表失败: {str(e)}"}), 500
            "message": f"获取店铺列表失败: {str(e)}"
        }), 500
@app.route('/api/stores/<store_id>', methods=['GET'])
 def get_store(store_id):
@ -4043,11 +4074,20 @@ def delete_store(store_id):
@app.route('/api/stores/<store_id>/products', methods=['GET'])
 def get_store_products(store_id):
-    """
+    """获取店铺的产品列表 (已重构为使用新数据源)"""
    获取店铺的产品列表
    """
    try:
-        products = get_available_products(store_id=store_id)
+        df = load_new_data()
        store_df = df[df['store_id'] == store_id]
        if store_df.empty:
            return jsonify({"status": "success", "data": [], "count": 0})
        products_df = store_df[['product_id']].drop_duplicates().sort_values('product_id')
        products = [
            {'product_id': pid, 'product_name': f'产品 {pid}'}
            for pid in products_df['product_id']
        ]
        return jsonify({
            "status": "success",
@ -4055,10 +4095,8 @@ def get_store_products(store_id):
            "count": len(products)
        })
    except Exception as e:
-        return jsonify({
+        logger.error(f"获取店铺产品列表失败: {traceback.format_exc()}")
-            "status": "error",
+        return jsonify({"status": "error", "message": f"获取店铺产品列表失败: {str(e)}"}), 500
            "message": f"获取店铺产品列表失败: {str(e)}"
        }), 500
@app.route('/api/stores/<store_id>/statistics', methods=['GET'])
 def get_store_statistics(store_id):
--- a/server/core/predictor.py
+++ b/server/core/predictor.py
@ -20,11 +20,13 @@ from datetime import datetime
 # 上述导入已不再需要，因为我们现在通过模型注册表动态获取训练器
 from predictors.model_predictor import load_model_and_predict
 from utils.data_utils import prepare_data, prepare_sequences
-from utils.multi_store_data_utils import (
+# from utils.multi_store_data_utils import (
-    load_multi_store_data, 
+#     load_multi_store_data,
-    get_store_product_sales_data,
+#     get_store_product_sales_data,
-    aggregate_multi_store_data
+#     aggregate_multi_store_data
-)
+# )
 # 以上旧模块已被新的统一数据加载器替代
 from utils.new_data_loader import load_new_data
 from analysis.metrics import evaluate_model
 from core.config import DEVICE, DEFAULT_MODEL_DIR, DEFAULT_DATA_PATH
@ -53,13 +55,10 @@ class PharmacyPredictor:
        print(f"使用设备: {self.device}")
-        # 尝试加载多店铺数据
+        # 重构：不再预加载整个数据集到内存
-        try:
+        # self.data 将在需要时动态加载
-            self.data = load_multi_store_data(data_path)
+        self.data = None
-            print(f"已加载多店铺数据，来源: {data_path}")
+        print("PharmacyPredictor 已初始化，将在需要时动态加载数据。")
        except Exception as e:
            print(f"加载数据失败: {e}")
            self.data = None
    def train_model(self, product_id, model_type='transformer', epochs=100, batch_size=32,
                learning_rate=0.001, sequence_length=30, forecast_horizon=7,
@ -104,76 +103,54 @@ class PharmacyPredictor:
                except Exception as e:
                    print(f"进度回调失败: {e}", flush=True)
-        if self.data is None:
+        # --- 数据加载与筛选重构 ---
-            log_message("没有可用的数据，请先加载或生成数据", 'error')
+        # 统一使用新的数据加载器，替换掉所有旧的、分散的加载逻辑
        log_message("正在使用新的统一数据加载器...", 'info')
        try:
            full_df = load_new_data()
        except Exception as e:
            log_message(f"使用新数据加载器失败: {e}", 'error')
            return None
        # 根据训练模式准备数据
        if training_mode == 'product':
-            # 按产品训练：使用所有店铺的该产品数据
+            product_data = full_df[full_df['product_id'] == product_id].copy()
            product_data = self.data[self.data['product_id'] == product_id].copy()
            if product_data.empty:
                log_message(f"找不到产品 {product_id} 的数据", 'error')
                return None
-            log_message(f"按产品训练模式: 产品 {product_id}, 数据量: {len(product_data)}")
+            log_message(f"按产品训练模式: 产品 {product_id}, 数据量: {len(product_data)}", 'info')
-            
+
        elif training_mode == 'store':
            # 按店铺训练
            if not store_id:
                log_message("店铺训练模式需要指定 store_id", 'error')
                return None
-            # 如果product_id是'unknown'，则表示为店铺所有商品训练一个聚合模型
+            # 筛选出该店铺的所有数据
-            if product_id == 'unknown':
+            store_df = full_df[full_df['store_id'] == store_id].copy()
-                try:
+
-                    # 使用新的聚合函数，按店铺聚合
+            # 判断是为单个产品训练还是为整个店铺聚合训练
-                    product_data = aggregate_multi_store_data(
+            if product_id and product_id != 'unknown' and product_id != 'all_products':
                        store_id=store_id,
                        aggregation_method=aggregation_method,
                        file_path=self.data_path
                    )
                    log_message(f"按店铺聚合训练: 店铺 {store_id}, 聚合方法 {aggregation_method}, 数据量: {len(product_data)}")
                    # 将product_id设置为'store_{store_id}'，与API查找逻辑保持一致
                    product_id = f"store_{store_id}"
                except Exception as e:
                    log_message(f"聚合店铺 {store_id} 数据失败: {e}", 'error')
                    return None
            else:
                # 为店铺的单个特定产品训练
-                try:
+                product_data = store_df[store_df['product_id'] == product_id].copy()
-                    product_data = get_store_product_sales_data(
+                log_message(f"按店铺-产品训练: 店铺 {store_id}, 产品 {product_id}, 数据量: {len(product_data)}", 'info')
-                        store_id=store_id,
+            else:
-                        product_id=product_id,
+                # 为整个店铺聚合训练
-                        file_path=self.data_path
+                log_message(f"按店铺聚合训练: 店铺 {store_id} (所有药品)", 'info')
-                    )
+                product_data = store_df.groupby('date').agg({
-                    log_message(f"按店铺-产品训练: 店铺 {store_id}, 产品 {product_id}, 数据量: {len(product_data)}")
+                    'sales': 'sum',
-                except Exception as e:
+                    'weekday': 'first', 'month': 'first', 'is_holiday': 'first',
-                    log_message(f"获取店铺产品数据失败: {e}", 'error')
+                    'is_weekend': 'first', 'is_promotion': 'first', 'temperature': 'mean'
-                    return None
+                }).reset_index()
-                
+                log_message(f"聚合后数据量: {len(product_data)}", 'info')
        elif training_mode == 'global':
-            # 全局训练：聚合所有店铺的产品数据
+            product_data = full_df[full_df['product_id'] == product_id].copy()
-            try:
+            product_data = product_data.groupby('date').agg({
-                # 如果product_id是'unknown'，则表示为全局所有商品训练一个聚合模型
+                'sales': aggregation_method,
-                if product_id == 'unknown':
+                'weekday': 'first', 'month': 'first', 'is_holiday': 'first',
-                    product_data = aggregate_multi_store_data(
+                'is_weekend': 'first', 'is_promotion': 'first', 'temperature': 'mean'
-                        product_id=None,  # 传递None以触发真正的全局聚合
+            }).reset_index()
-                        aggregation_method=aggregation_method,
+            log_message(f"全局训练模式: 产品 {product_id}, 聚合方法 {aggregation_method}, 数据量: {len(product_data)}", 'info')
-                        file_path=self.data_path
+            
                    )
                    log_message(f"全局训练模式: 所有产品, 聚合方法 {aggregation_method}, 数据量: {len(product_data)}")
                    # 将product_id设置为一个有意义的标识符
                    product_id = 'all_products'
                else:
                    product_data = aggregate_multi_store_data(
                        product_id=product_id,
                        aggregation_method=aggregation_method,
                        file_path=self.data_path
                    )
                    log_message(f"全局训练模式: 产品 {product_id}, 聚合方法 {aggregation_method}, 数据量: {len(product_data)}")
            except Exception as e:
                log_message(f"聚合全局数据失败: {e}", 'error')
                return None
        else:
            log_message(f"不支持的训练模式: {training_mode}", 'error')
            return None
--- a/server/predictors/model_predictor.py
+++ b/server/predictors/model_predictor.py
@ -23,7 +23,7 @@ import xgboost as xgb
 from analysis.trend_analysis import analyze_prediction_result
 from utils.visualization import plot_prediction_results
-from utils.multi_store_data_utils import get_store_product_sales_data, aggregate_multi_store_data
+from utils.new_data_loader import load_new_data
 from core.config import DEVICE, get_model_file_path, DEFAULT_DATA_PATH
 from models.model_registry import get_predictor, register_predictor
@ -96,19 +96,40 @@ def load_model_and_predict(model_path: str, product_id: str, model_type: str, st
        if not os.path.exists(model_path):
            raise FileNotFoundError(f"模型文件 {model_path} 不存在")
-        # --- 数据加载部分保持不变 ---
+        # --- 数据加载重构 ---
-        from utils.multi_store_data_utils import aggregate_multi_store_data
+        # 统一使用新的数据加载器，确保与训练时的数据源和处理逻辑完全一致
        print("正在使用新的统一数据加载器进行预测...")
        full_df = load_new_data()
        if training_mode == 'store' and store_id:
-            from utils.multi_store_data_utils import load_multi_store_data
+            store_df = full_df[full_df['store_id'] == store_id].copy()
-            store_df_for_name = load_multi_store_data(store_id=store_id)
+            # 判断是为单个产品预测还是为整个店铺聚合预测
-            product_name = store_df_for_name['store_name'].iloc[0] if not store_df_for_name.empty else f"店铺 {store_id}"
+            if product_id and product_id != 'unknown' and product_id != 'all_products':
-            product_df = aggregate_multi_store_data(store_id=store_id, aggregation_method='sum', file_path=DEFAULT_DATA_PATH)
+                product_df = store_df[store_df['product_id'] == product_id].copy()
                product_name = f"店铺 {store_id} - 产品 {product_id}"
            else:
                # 为整个店铺的聚合销售额进行预测
                product_df = store_df.groupby('date').agg({
                    'sales': 'sum',
                    'weekday': 'first', 'month': 'first', 'is_holiday': 'first',
                    'is_weekend': 'first', 'is_promotion': 'first', 'temperature': 'mean'
                }).reset_index()
                product_name = f"店铺 {store_id} (所有药品聚合)"
        elif training_mode == 'global':
-            product_df = aggregate_multi_store_data(aggregation_method='sum', file_path=DEFAULT_DATA_PATH)
+            product_df = full_df[full_df['product_id'] == product_id].copy()
-            product_name = "全局销售数据"
+            product_df = product_df.groupby('date').agg({
-        else:
+                'sales': 'sum',
-            product_df = aggregate_multi_store_data(product_id=product_id, aggregation_method='sum', file_path=DEFAULT_DATA_PATH)
+                'weekday': 'first', 'month': 'first', 'is_holiday': 'first',
-            product_name = product_df['product_name'].iloc[0] if not product_df.empty else product_id
+                'is_weekend': 'first', 'is_promotion': 'first', 'temperature': 'mean'
            }).reset_index()
            product_name = f"全局聚合 - 产品 {product_id}"
        else: # 默认 'product' 模式
            product_df = full_df[full_df['product_id'] == product_id].copy()
            # 兼容性处理：新数据可能没有 product_name 列
            if 'product_name' in product_df.columns and not product_df['product_name'].empty:
                product_name = product_df['product_name'].iloc[0]
            else:
                product_name = f"Product {product_id}"
        if product_df.empty:
            raise ValueError(f"产品 {product_id} 或店铺 {store_id} 没有销售数据")
--- a/server/trainers/kan_trainer.py
+++ b/server/trainers/kan_trainer.py
@ -17,6 +17,7 @@ from tqdm import tqdm
 from models.kan_model import KANForecaster
 from models.optimized_kan_forecaster import OptimizedKANForecaster
 from utils.data_utils import create_dataset, PharmacyDataset
 from utils.new_data_loader import load_new_data
 from utils.visualization import plot_loss_curve
 from analysis.metrics import evaluate_model
 from core.config import DEVICE, DEFAULT_MODEL_DIR, LOOK_BACK, FORECAST_HORIZON
@ -35,45 +36,44 @@ def train_product_model_with_kan(product_id, model_identifier, product_df=None,
    model: 训练好的模型
    metrics: 模型评估指标
    """
-    # 如果没有传入product_df，则根据训练模式加载数据
+    # --- 数据加载与筛选重构 ---
-    if product_df is None:
+    # 统一使用新的数据加载器，替换掉所有旧的、分散的加载逻辑
-        from utils.multi_store_data_utils import load_multi_store_data, get_store_product_sales_data, aggregate_multi_store_data
+    print("正在使用新的统一数据加载器...")
-        
+    full_df = load_new_data() # 加载完整的、适配后的新数据
-        try:
+
-            if training_mode == 'store' and store_id:
+    if training_mode == 'store' and store_id:
-                # 加载特定店铺的数据
+        store_df = full_df[full_df['store_id'] == store_id].copy()
-                product_df = get_store_product_sales_data(
+        if product_id and product_id != 'unknown' and product_id != 'all_products':
-                    store_id,
+            product_df = store_df[store_df['product_id'] == product_id].copy()
-                    product_id,
+            training_scope = f"店铺 {store_id} - 产品 {product_id}"
                    'pharmacy_sales_multi_store.csv'
                )
                training_scope = f"店铺 {store_id}"
            elif training_mode == 'global':
                # 聚合所有店铺的数据
                product_df = aggregate_multi_store_data(
                    product_id,
                    aggregation_method=aggregation_method,
                    file_path='pharmacy_sales_multi_store.csv'
                )
                training_scope = f"全局聚合({aggregation_method})"
            else:
                # 默认：加载所有店铺的产品数据
                product_df = load_multi_store_data('pharmacy_sales_multi_store.csv', product_id=product_id)
                training_scope = "所有店铺"
        except Exception as e:
            print(f"多店铺数据加载失败: {e}")
            # 后备方案：尝试原始数据
            df = pd.read_excel('pharmacy_sales.xlsx')
            product_df = df[df['product_id'] == product_id].sort_values('date')
            training_scope = "原始数据"
    else:
        # 如果传入了product_df，直接使用
        if training_mode == 'store' and store_id:
            training_scope = f"店铺 {store_id}"
        elif training_mode == 'global':
            training_scope = f"全局聚合({aggregation_method})"
        else:
-            training_scope = "所有店铺"
+            product_df = store_df.groupby('date').agg({
                'sales': 'sum', 'weekday': 'first', 'month': 'first',
                'is_holiday': 'first', 'is_weekend': 'first',
                'is_promotion': 'first', 'temperature': 'mean'
            }).reset_index()
            training_scope = f"店铺 {store_id} (所有药品聚合)"
    elif training_mode == 'global':
        # 筛选特定产品在所有店铺的聚合数据
        # 注意：新数据已经是按 (store_id, product_id, date) 展开的，聚合逻辑可能需要重新审视
        # 此处暂时只筛选产品ID
        product_df = full_df[full_df['product_id'] == product_id].copy()
        # 按日期对同一产品在不同店铺的销售额求和
        product_df = product_df.groupby('date').agg({
            'sales': 'sum',
            # 保留其他需要的特征，例如取第一个非空值或平均值
            'weekday': 'first',
            'month': 'first',
            'is_holiday': 'first',
            'is_weekend': 'first',
            'is_promotion': 'first',
            'temperature': 'mean'
        }).reset_index()
        training_scope = f"全局聚合({aggregation_method})"
    else: # 默认 'product' 模式
        # 筛选特定产品的数据（可能跨越多个店铺，但此处不聚合）
        product_df = full_df[full_df['product_id'] == product_id].copy()
        training_scope = f"所有店铺中的产品 {product_id}"
    if product_df.empty:
        raise ValueError(f"产品 {product_id} 没有可用的销售数据")
@ -95,7 +95,11 @@ def train_product_model_with_kan(product_id, model_identifier, product_df=None,
        raise ValueError(error_msg)
    product_df = product_df.sort_values('date')
-    product_name = product_df['product_name'].iloc[0]
+    # 兼容性处理：新数据可能没有 product_name 列
    if 'product_name' in product_df.columns:
        product_name = product_df['product_name'].iloc[0]
    else:
        product_name = f"Product {product_id}" # 使用 product_id 作为备用名称
    model_type = "优化版KAN" if use_optimized else "KAN"
    print(f"使用{model_type}模型训练产品 '{product_name}' (ID: {product_id}) 的销售预测模型")
--- a/server/trainers/mlstm_trainer.py
+++ b/server/trainers/mlstm_trainer.py
@ -16,7 +16,8 @@ from tqdm import tqdm
 from models.mlstm_model import MLSTMTransformer as MatrixLSTM
 from utils.data_utils import create_dataset, PharmacyDataset
-from utils.multi_store_data_utils import get_store_product_sales_data, aggregate_multi_store_data
+# from utils.multi_store_data_utils import get_store_product_sales_data, aggregate_multi_store_data
 from utils.new_data_loader import load_new_data
 from utils.visualization import plot_loss_curve
 from analysis.metrics import evaluate_model
 from core.config import (
@ -124,13 +125,43 @@ def train_product_model_with_mlstm(
        except Exception as e:
            print(f"[mLSTM] 任务 {task_id}: 进度管理器初始化失败: {e}", flush=True)
-    # 数据现在由调用方传入，不再在此处加载
+    # --- 数据加载与筛选重构 ---
-    if training_mode == 'store' and store_id:
+    # 统一使用新的数据加载器
-        training_scope = f"店铺 {store_id}"
+    if product_df is None:
-    elif training_mode == 'global':
+        print("正在使用新的统一数据加载器...")
-        training_scope = f"全局聚合({aggregation_method})"
+        full_df = load_new_data()
        if training_mode == 'store' and store_id:
            store_df = full_df[full_df['store_id'] == store_id].copy()
            if product_id and product_id != 'unknown' and product_id != 'all_products':
                product_df = store_df[store_df['product_id'] == product_id].copy()
                training_scope = f"店铺 {store_id} - 产品 {product_id}"
            else:
                product_df = store_df.groupby('date').agg({
                    'sales': 'sum', 'weekday': 'first', 'month': 'first',
                    'is_holiday': 'first', 'is_weekend': 'first',
                    'is_promotion': 'first', 'temperature': 'mean'
                }).reset_index()
                training_scope = f"店铺 {store_id} (所有药品聚合)"
        elif training_mode == 'global':
            product_df = full_df[full_df['product_id'] == product_id].copy()
            product_df = product_df.groupby('date').agg({
                'sales': 'sum', 'weekday': 'first', 'month': 'first',
                'is_holiday': 'first', 'is_weekend': 'first',
                'is_promotion': 'first', 'temperature': 'mean'
            }).reset_index()
            training_scope = f"全局聚合({aggregation_method})"
        else: # 默认 'product' 模式
            product_df = full_df[full_df['product_id'] == product_id].copy()
            training_scope = f"所有店铺中的产品 {product_id}"
    else:
-        training_scope = "所有店铺"
+        # 如果传入了product_df，直接使用
        if training_mode == 'store' and store_id:
            training_scope = f"店铺 {store_id}"
        elif training_mode == 'global':
            training_scope = f"全局聚合({aggregation_method})"
        else:
            training_scope = "所有店铺"
    # 数据量检查
    min_required_samples = sequence_length + forecast_horizon
@ -149,7 +180,11 @@ def train_product_model_with_mlstm(
        emit_progress(f"训练失败：数据不足 ({len(product_df)}/{min_required_samples} 天)")
        raise ValueError(error_msg)
-    product_name = product_df['product_name'].iloc[0]
+    # 兼容性处理：新数据可能没有 product_name 列
    if 'product_name' in product_df.columns and not product_df['product_name'].empty:
        product_name = product_df['product_name'].iloc[0]
    else:
        product_name = f"产品 {product_id}"
    print(f"[mLSTM] 使用mLSTM模型训练产品 '{product_name}' (ID: {product_id}) 的销售预测模型", flush=True)
    print(f"[mLSTM] 训练范围: {training_scope}", flush=True)
--- a/server/trainers/transformer_trainer.py
+++ b/server/trainers/transformer_trainer.py
@ -17,7 +17,8 @@ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
 from models.transformer_model import TimeSeriesTransformer
 from utils.data_utils import create_dataset, PharmacyDataset
-from utils.multi_store_data_utils import get_store_product_sales_data, aggregate_multi_store_data
+# from utils.multi_store_data_utils import get_store_product_sales_data, aggregate_multi_store_data
 from utils.new_data_loader import load_new_data
 from utils.visualization import plot_loss_curve
 from analysis.metrics import evaluate_model
 from core.config import (
@ -81,15 +82,42 @@ def train_product_model_with_transformer(
            def finish_training(self, *args, **kwargs): pass
        progress_manager = DummyProgressManager()
    # --- 数据加载与筛选重构 ---
    if product_df is None:
-        from utils.multi_store_data_utils import aggregate_multi_store_data
+        print("正在使用新的统一数据加载器...")
-        product_df = aggregate_multi_store_data(
+        full_df = load_new_data()
-            product_id=product_id,
+
-            aggregation_method=aggregation_method
+        if training_mode == 'store' and store_id:
-        )
+            store_df = full_df[full_df['store_id'] == store_id].copy()
-        training_scope = f"全局聚合({aggregation_method})"
+            if product_id and product_id != 'unknown' and product_id != 'all_products':
                product_df = store_df[store_df['product_id'] == product_id].copy()
                training_scope = f"店铺 {store_id} - 产品 {product_id}"
            else:
                product_df = store_df.groupby('date').agg({
                    'sales': 'sum', 'weekday': 'first', 'month': 'first',
                    'is_holiday': 'first', 'is_weekend': 'first',
                    'is_promotion': 'first', 'temperature': 'mean'
                }).reset_index()
                training_scope = f"店铺 {store_id} (所有药品聚合)"
        elif training_mode == 'global':
            product_df = full_df[full_df['product_id'] == product_id].copy()
            product_df = product_df.groupby('date').agg({
                'sales': 'sum', 'weekday': 'first', 'month': 'first',
                'is_holiday': 'first', 'is_weekend': 'first',
                'is_promotion': 'first', 'temperature': 'mean'
            }).reset_index()
            training_scope = f"全局聚合({aggregation_method})"
        else: # 默认 'product' 模式
            product_df = full_df[full_df['product_id'] == product_id].copy()
            training_scope = f"所有店铺中的产品 {product_id}"
    else:
-        training_scope = "所有店铺"
+        # 如果传入了product_df，直接使用
        if training_mode == 'store' and store_id:
            training_scope = f"店铺 {store_id}"
        elif training_mode == 'global':
            training_scope = f"全局聚合({aggregation_method})"
        else:
            training_scope = "所有店铺"
    if product_df.empty:
        raise ValueError(f"产品 {product_id} 没有可用的销售数据")
@ -106,7 +134,11 @@ def train_product_model_with_transformer(
        raise ValueError(error_msg)
    product_df = product_df.sort_values('date')
-    product_name = product_df['product_name'].iloc[0]
+    # 兼容性处理：新数据可能没有 product_name 列
    if 'product_name' in product_df.columns and not product_df['product_name'].empty:
        product_name = product_df['product_name'].iloc[0]
    else:
        product_name = f"产品 {product_id}"
    print(f"[Transformer] 训练产品 '{product_name}' (ID: {product_id}) 的销售预测模型", flush=True)
    print(f"[Device] 使用设备: {DEVICE}", flush=True)
--- a/server/utils/multi_store_data_utils.py
+++ b/server/utils/multi_store_data_utils.py
@ -1,424 +0,0 @@
 """
 多店铺销售预测系统 - 数据处理工具函数
 支持多店铺数据的加载、过滤和处理
 """
 import pandas as pd
 import numpy as np
 import os
 from datetime import datetime, timedelta
 from typing import Optional, List, Tuple, Dict, Any
 from core.config import DEFAULT_DATA_PATH
 def load_multi_store_data(file_path: str = None,
                         store_id: Optional[str] = None,
                         product_id: Optional[str] = None,
                         start_date: Optional[str] = None,
                         end_date: Optional[str] = None) -> pd.DataFrame:
    """
    加载多店铺销售数据，支持按店铺、产品、时间范围过滤
    参数:
    file_path: 数据文件路径 (支持 .csv, .xlsx, .parquet)。如果为None，则使用config中定义的默认路径。
    store_id: 店铺ID，为None时返回所有店铺数据
    product_id: 产品ID，为None时返回所有产品数据
    start_date: 开始日期 (YYYY-MM-DD)
    end_date: 结束日期 (YYYY-MM-DD)
    返回:
    DataFrame: 过滤后的销售数据
    """
    # 如果未提供文件路径，则使用配置文件中的默认路径
    if file_path is None:
        file_path = DEFAULT_DATA_PATH
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"数据文件不存在: {file_path}")
    try:
        if file_path.endswith('.csv'):
            df = pd.read_csv(file_path)
        elif file_path.endswith('.xlsx'):
            df = pd.read_excel(file_path)
        elif file_path.endswith('.parquet'):
            df = pd.read_parquet(file_path)
        else:
            raise ValueError(f"不支持的文件格式: {file_path}")
        print(f"成功加载数据文件: {file_path}")
    except Exception as e:
        print(f"加载文件 {file_path} 失败: {e}")
        raise
    # 按店铺过滤
    if store_id:
        df = df[df['store_id'] == store_id].copy()
        print(f"按店铺过滤: {store_id}, 剩余记录数: {len(df)}")
    # 按产品过滤
    if product_id:
        df = df[df['product_id'] == product_id].copy()
        print(f"按产品过滤: {product_id}, 剩余记录数: {len(df)}")
    # 标准化列名和数据类型
    df = standardize_column_names(df)
    # 在标准化之后进行时间范围过滤
    if start_date:
        try:
            start_date_dt = pd.to_datetime(start_date)
            # 确保比较是在datetime对象之间
            if 'date' in df.columns:
                df = df[df['date'] >= start_date_dt].copy()
                print(f"开始日期过滤: {start_date_dt}, 剩余记录数: {len(df)}")
        except (ValueError, TypeError):
            print(f"警告: 无效的开始日期格式 '{start_date}'，已忽略。")
    if end_date:
        try:
            end_date_dt = pd.to_datetime(end_date)
            # 确保比较是在datetime对象之间
            if 'date' in df.columns:
                df = df[df['date'] <= end_date_dt].copy()
                print(f"结束日期过滤: {end_date_dt}, 剩余记录数: {len(df)}")
        except (ValueError, TypeError):
            print(f"警告: 无效的结束日期格式 '{end_date}'，已忽略。")
    if len(df) == 0:
        print("警告: 过滤后没有数据")
    return df
 def standardize_column_names(df: pd.DataFrame) -> pd.DataFrame:
    """
    标准化列名以匹配训练代码和API期望的格式
    参数:
    df: 原始DataFrame
    返回:
    DataFrame: 标准化列名后的DataFrame
    """
    df = df.copy()
    # 定义列名映射并强制重命名
    rename_map = {
        'sales_quantity': 'sales',  # 修复：匹配原始列名
        'temperature_2m_mean': 'temperature', # 新增：处理温度列
        'dayofweek': 'weekday' # 修复：匹配原始列名
    }
    df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns}, inplace=True)
    # 确保date列是datetime类型
    if 'date' in df.columns:
        df['date'] = pd.to_datetime(df['date'], errors='coerce')
        df.dropna(subset=['date'], inplace=True) # 移除无法解析的日期行
    else:
        # 如果没有date列，无法继续，返回空DataFrame
        return pd.DataFrame()
    # 计算 sales_amount
    # 由于没有price列，sales_amount的计算逻辑需要调整或移除
    # 这里我们注释掉它，因为原始数据中已有sales_amount
    # if 'sales_amount' not in df.columns and 'sales' in df.columns and 'price' in df.columns:
    #     # 先确保sales和price是数字
    #     df['sales'] = pd.to_numeric(df['sales'], errors='coerce')
    #     df['price'] = pd.to_numeric(df['price'], errors='coerce')
    #     df['sales_amount'] = df['sales'] * df['price']
    # 创建缺失的特征列
    if 'weekday' not in df.columns:
        df['weekday'] = df['date'].dt.dayofweek
    if 'month' not in df.columns:
        df['month'] = df['date'].dt.month
    # 添加缺失的元数据列
    meta_columns = {
        'store_name': 'Unknown Store',
        'store_location': 'Unknown Location',
        'store_type': 'Unknown',
        'product_name': 'Unknown Product',
        'product_category': 'Unknown Category'
    }
    for col, default in meta_columns.items():
        if col not in df.columns:
            df[col] = default
    # 添加缺失的布尔特征列
    default_features = {
        'is_holiday': False,
        'is_weekend': None,
        'is_promotion': False,
        'temperature': 20.0
    }
    for feature, default_value in default_features.items():
        if feature not in df.columns:
            if feature == 'is_weekend':
                df['is_weekend'] = df['weekday'].isin([5, 6])
            else:
                df[feature] = default_value
    # 确保数值类型正确
    numeric_columns = ['sales', 'sales_amount', 'weekday', 'month', 'temperature']
    for col in numeric_columns:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
    # 确保布尔类型正确
    boolean_columns = ['is_holiday', 'is_weekend', 'is_promotion']
    for col in boolean_columns:
        if col in df.columns:
            df[col] = df[col].astype(bool)
    print(f"数据标准化完成，可用特征列: {[col for col in ['sales', 'weekday', 'month', 'is_holiday', 'is_weekend', 'is_promotion', 'temperature'] if col in df.columns]}")
    return df
 def get_available_stores(file_path: str = None) -> List[Dict[str, Any]]:
    """
    获取可用的店铺列表
    参数:
    file_path: 数据文件路径
    返回:
    List[Dict]: 店铺信息列表
    """
    try:
        df = load_multi_store_data(file_path)
        if 'store_id' not in df.columns:
            print("数据文件中缺少 'store_id' 列")
            return []
        # 智能地获取店铺信息，即使某些列缺失
        store_info = []
        # 使用drop_duplicates获取唯一的店铺组合
        stores_df = df.drop_duplicates(subset=['store_id'])
        for _, row in stores_df.iterrows():
            store_info.append({
                'store_id': row['store_id'],
                'store_name': row.get('store_name', f"店铺 {row['store_id']}"),
                'location': row.get('store_location', '未知位置'),
                'type': row.get('store_type', '标准'),
                'opening_date': row.get('opening_date', '未知'),
            })
        return store_info
    except Exception as e:
        print(f"获取店铺列表失败: {e}")
        return []
 def get_available_products(file_path: str = None,
                         store_id: Optional[str] = None) -> List[Dict[str, Any]]:
    """
    获取可用的产品列表
    参数:
    file_path: 数据文件路径
    store_id: 店铺ID，为None时返回所有产品
    返回:
    List[Dict]: 产品信息列表
    """
    try:
        df = load_multi_store_data(file_path, store_id=store_id)
        # 获取唯一产品信息
        product_columns = ['product_id', 'product_name']
        if 'product_category' in df.columns:
            product_columns.append('product_category')
        if 'unit_price' in df.columns:
            product_columns.append('unit_price')
        products = df[product_columns].drop_duplicates()
        return products.to_dict('records')
    except Exception as e:
        print(f"获取产品列表失败: {e}")
        return []
 def get_store_product_sales_data(store_id: str,
                               product_id: str,
                               file_path: str = None) -> pd.DataFrame:
    """
    获取特定店铺和产品的销售数据，用于模型训练
    参数:
    file_path: 数据文件路径
    store_id: 店铺ID
    product_id: 产品ID
    返回:
    DataFrame: 处理后的销售数据，包含模型需要的特征
    """
    # 加载数据
    df = load_multi_store_data(file_path, store_id=store_id, product_id=product_id)
    if len(df) == 0:
        raise ValueError(f"没有找到店铺 {store_id} 产品 {product_id} 的销售数据")
    # 确保数据按日期排序
    df = df.sort_values('date').copy()
    # 数据标准化已在load_multi_store_data中完成
    # 验证必要的列是否存在
    required_columns = ['sales', 'price', 'weekday', 'month', 'is_holiday', 'is_weekend', 'is_promotion', 'temperature']
    missing_columns = [col for col in required_columns if col not in df.columns]
    if missing_columns:
        print(f"警告: 数据标准化后仍缺少列 {missing_columns}")
        raise ValueError(f"无法获取完整的特征数据，缺少列: {missing_columns}")
    # 定义模型训练所需的所有列（特征 + 目标）
    final_columns = [
        'date', 'sales', 'product_id', 'product_name', 'store_id', 'store_name',
        'weekday', 'month', 'is_holiday', 'is_weekend', 'is_promotion', 'temperature'
    ]
    # 筛选出DataFrame中实际存在的列
    existing_columns = [col for col in final_columns if col in df.columns]
    # 返回只包含这些必需列的DataFrame
    return df[existing_columns]
 def aggregate_multi_store_data(product_id: Optional[str] = None,
                              store_id: Optional[str] = None,
                              aggregation_method: str = 'sum',
                              file_path: str = None) -> pd.DataFrame:
    """
    聚合销售数据，可按产品（全局）或按店铺（所有产品）
    参数:
    file_path: 数据文件路径
    product_id: 产品ID (用于全局模型)
    store_id: 店铺ID (用于店铺聚合模型)
    aggregation_method: 聚合方法 ('sum', 'mean', 'median')
    返回:
    DataFrame: 聚合后的销售数据
    """
    # 根据是全局聚合、店铺聚合还是真正全局聚合来加载数据
    if store_id:
        # 店铺聚合：加载该店铺的所有数据
        df = load_multi_store_data(file_path, store_id=store_id)
        if len(df) == 0:
            raise ValueError(f"没有找到店铺 {store_id} 的销售数据")
        grouping_entity = f"店铺 {store_id}"
    elif product_id:
        # 按产品聚合：加载该产品在所有店铺的数据
        df = load_multi_store_data(file_path, product_id=product_id)
        if len(df) == 0:
            raise ValueError(f"没有找到产品 {product_id} 的销售数据")
        grouping_entity = f"产品 {product_id}"
    else:
        # 真正全局聚合：加载所有数据
        df = load_multi_store_data(file_path)
        if len(df) == 0:
            raise ValueError("数据文件为空，无法进行全局聚合")
        grouping_entity = "所有产品"
    # 按日期聚合（使用标准化后的列名）
    agg_dict = {}
    if aggregation_method == 'sum':
        agg_dict = {
            'sales': 'sum',           # 标准化后的销量列
            'sales_amount': 'sum',
            'price': 'mean'           # 标准化后的价格列，取平均值
        }
    elif aggregation_method == 'mean':
        agg_dict = {
            'sales': 'mean',
            'sales_amount': 'mean',
            'price': 'mean'
        }
    elif aggregation_method == 'median':
        agg_dict = {
            'sales': 'median',
            'sales_amount': 'median',
            'price': 'median'
        }
    # 确保列名存在
    available_cols = df.columns.tolist()
    agg_dict = {k: v for k, v in agg_dict.items() if k in available_cols}
    # 聚合数据
    aggregated_df = df.groupby('date').agg(agg_dict).reset_index()
    # 获取产品信息（取第一个店铺的信息）
    product_info = df[['product_id', 'product_name', 'product_category']].iloc[0]
    for col, val in product_info.items():
        aggregated_df[col] = val
    # 添加店铺信息标识为全局
    aggregated_df['store_id'] = 'GLOBAL'
    aggregated_df['store_name'] = f'全部店铺-{aggregation_method.upper()}'
    aggregated_df['store_location'] = '全局聚合'
    aggregated_df['store_type'] = 'global'
    # 对聚合后的数据进行标准化（添加缺失的特征列）
    aggregated_df = aggregated_df.sort_values('date').copy()
    aggregated_df = standardize_column_names(aggregated_df)
    # 定义模型训练所需的所有列（特征 + 目标）
    final_columns = [
        'date', 'sales', 'product_id', 'product_name', 'store_id', 'store_name',
        'weekday', 'month', 'is_holiday', 'is_weekend', 'is_promotion', 'temperature'
    ]
    # 筛选出DataFrame中实际存在的列
    existing_columns = [col for col in final_columns if col in aggregated_df.columns]
    # 返回只包含这些必需列的DataFrame
    return aggregated_df[existing_columns]
 def get_sales_statistics(file_path: str = None,
                        store_id: Optional[str] = None,
                        product_id: Optional[str] = None) -> Dict[str, Any]:
    """
    获取销售数据统计信息
    参数:
    file_path: 数据文件路径
    store_id: 店铺ID
    product_id: 产品ID
    返回:
    Dict: 统计信息
    """
    try:
        df = load_multi_store_data(file_path, store_id=store_id, product_id=product_id)
        if len(df) == 0:
            return {'error': '没有数据'}
        stats = {
            'total_records': len(df),
            'date_range': {
                'start': df['date'].min().strftime('%Y-%m-%d'),
                'end': df['date'].max().strftime('%Y-%m-%d')
            },
            'stores': df['store_id'].nunique(),
            'products': df['product_id'].nunique(),
            'total_sales_amount': float(df['sales_amount'].sum()) if 'sales_amount' in df.columns else 0,
            'total_quantity': int(df['quantity_sold'].sum()) if 'quantity_sold' in df.columns else 0,
            'avg_daily_sales': float(df.groupby('date')['quantity_sold'].sum().mean()) if 'quantity_sold' in df.columns else 0
        }
        return stats
    except Exception as e:
        return {'error': str(e)}
 # 向后兼容的函数
 def load_data(file_path=None, store_id=None):
    """
    向后兼容的数据加载函数
    """
    return load_multi_store_data(file_path, store_id=store_id)
--- a/server/utils/new_data_loader.py
+++ b/server/utils/new_data_loader.py
@ -0,0 +1,86 @@
 import pandas as pd
 import os
 def load_new_data(file_path='data/old_5shops_50skus.parquet'):
    """
    加载并适配新的Parquet数据文件，为现有系统提供兼容的数据格式。
    核心原则:
    1. 保证新数据的完整性，不丢弃任何原始特征。
    2. 优先适配新数据，通过重命名和创建代理列来兼容旧代码。
    参数:
    file_path (str): 新数据文件的路径。
    返回:
    pandas.DataFrame: 经过适配处理的、包含所有原始特征的DataFrame。
    """
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"数据文件不存在: {file_path}")
    print(f"正在从 {file_path} 加载新数据...")
    df = pd.read_parquet(file_path)
    print("数据加载完成，开始进行适配处理...")
    # 创建一个副本以进行修改，保留原始df的纯净
    df_adapted = df.copy()
    # --- 1. 列名映射 (适配旧代码的命名习惯) ---
    # 步骤1.1: 安全地删除新数据中多余的 'date' 列，以 'kdrq' 为准
    if 'date' in df_adapted.columns and 'kdrq' in df.columns:
        df_adapted.drop(columns=['date'], inplace=True)
        print("已删除新数据中多余的 'date' 列，以 'kdrq' 为准。")
    rename_map = {
        'subbh': 'store_id',
        'hh': 'product_id',
        'kdrq': 'date', # 现在可以安全地将 kdrq 重命名为 date
        'net_sales_quantity': 'sales', # 将目标变量映射为 'sales'
        'temperature_2m_mean': 'temperature',
        'day_of_week': 'weekday'
    }
    df_adapted.rename(columns=rename_map, inplace=True)
    print(f"已完成列名映射: {list(rename_map.keys())} -> {list(rename_map.values())}")
    # --- 2. 数据类型转换 ---
    # 将 'date' 列转换为标准的datetime对象
    df_adapted['date'] = pd.to_datetime(df_adapted['date'])
    print("已将 'date' 列转换为 datetime 类型。")
    # --- 3. 关键特征工程 (创建代理列) ---
    # 现有模型依赖 'is_promotion' 和 'is_weekend' 特征。
    # 'is_weekend' 在新数据中已存在，无需处理。
    # 'is_promotion' 在新数据中不存在，创建一个默认值为0的代理列。
    if 'is_promotion' not in df_adapted.columns:
        df_adapted['is_promotion'] = 0
        print("创建了代理列 'is_promotion' 并填充默认值 0。")
    # 确保 'month' 列存在，如果不存在则从日期中提取
    if 'month' not in df_adapted.columns and 'date' in df_adapted.columns:
        df_adapted['month'] = df_adapted['date'].dt.month
        print("从 'date' 列中提取并创建了 'month' 列。")
    print("数据适配处理完成。")
    # 返回包含所有列的适配后DataFrame
    return df_adapted
 if __name__ == '__main__':
    # 用于直接运行此脚本进行测试
    print("--- 测试数据加载器 ---")
    try:
        adapted_df = load_new_data()
        print("\n--- 适配后数据信息 ---")
        adapted_df.info()
        print("\n--- 检查关键列 ---")
        key_cols = [
            'store_id', 'product_id', 'date', 'sales', 
            'temperature', 'weekday', 'is_promotion', 'month'
        ]
        print(adapted_df[key_cols].head())
        print(f"\n测试成功！适配后的DataFrame包含 {len(adapted_df.columns)} 列。")
    except Exception as e:
        print(f"\n测试失败: {e}")
--- a/temp_data_analysis.py
+++ b/temp_data_analysis.py
@ -0,0 +1,55 @@
 import pandas as pd
 import os
 def analyze_parquet_files():
    """
    分析两个Parquet数据文件的结构差异。
    """
    data_path = 'data'
    current_data_file = os.path.join(data_path, 'timeseries_training_data_sample_10s50p.parquet')
    new_data_file = os.path.join(data_path, 'old_5shops_50skus.parquet')
    print("="*50)
    print("数据文件差异分析报告")
    print("="*50)
    try:
        # --- 分析当前数据文件 ---
        print(f"\n--- 1. 分析当前数据: {current_data_file} ---\n")
        if os.path.exists(current_data_file):
            df_current = pd.read_parquet(current_data_file)
            print("【列名和数据类型】:")
            df_current.info(verbose=False)
            print("\n【前5行样本数据】:")
            print(df_current.head())
            print(f"\n【总行数】: {len(df_current)}")
            print(f"【唯一店铺数】: {df_current['store_id'].nunique()}")
            print(f"【唯一商品数】: {df_current['product_id'].nunique()}")
        else:
            print(f"错误: 文件不存在 {current_data_file}")
        print("\n" + "-"*40 + "\n")
        # --- 分析新数据文件 ---
        print(f"\n--- 2. 分析新数据: {new_data_file} ---\n")
        if os.path.exists(new_data_file):
            df_new = pd.read_parquet(new_data_file)
            print("【列名和数据类型 (仅显示部分)】:")
            df_new.info(verbose=True, max_cols=10, show_counts=True) # 显示更详细的信息
            print("\n【所有列名列表】:")
            print(df_new.columns.tolist())
            print("\n【前5行样本数据 (部分列)】:")
            # 选择一些关键列进行展示
            display_cols = ['subbh', 'hh', 'kdrq', 'net_sales_quantity', 'is_weekend', 'sales_quantity_rolling_mean_7d', 'province', 'temperature_2m_mean', 'brand_encoded']
            print(df_new[display_cols].head())
            print(f"\n【总行数】: {len(df_new)}")
            print(f"【唯一店铺数 (subbh)】: {df_new['subbh'].nunique()}")
            print(f"【唯一商品数 (hh)】: {df_new['hh'].nunique()}")
        else:
            print(f"错误: 文件不存在 {new_data_file}")
    except Exception as e:
        print(f"\n分析过程中出现错误: {e}")
 if __name__ == '__main__':
    analyze_parquet_files()
		`@ -0,0 +1 @@`
							{"product_id": "11020059", "product_name": "Product 11020059", "model_type": "kan", "predictions": [{"date": "2022-01-01", "predicted_sales": 0.12464457750320435}, {"date": "2022-01-02", "predicted_sales": 0.11735431849956512}, {"date": "2022-01-03", "predicted_sales": 0.11733274161815643}, {"date": "2022-01-04", "predicted_sales": 0.11753901839256287}, {"date": "2022-01-05", "predicted_sales": 0.11754211783409119}, {"date": "2022-01-06", "predicted_sales": 0.11753572523593903}, {"date": "2022-01-07", "predicted_sales": 0.11738376319408417}], "prediction_data": [{"date": "2022-01-01", "predicted_sales": 0.12464457750320435}, {"date": "2022-01-02", "predicted_sales": 0.11735431849956512}, {"date": "2022-01-03", "predicted_sales": 0.11733274161815643}, {"date": "2022-01-04", "predicted_sales": 0.11753901839256287}, {"date": "2022-01-05", "predicted_sales": 0.11754211783409119}, {"date": "2022-01-06", "predicted_sales": 0.11753572523593903}, {"date": "2022-01-07", "predicted_sales": 0.11738376319408417}], "history_data": [{"store_id": "01010108", "product_id": "11020059", "date": "2021-12-02", "sales_quantity": 0.0, "return_quantity": 0.0, "sales": 0.0, "gross_profit_total": 0.0, "transaction_count": 0, "sales_quantity_rolling_mean_7d": 0.5, "return_quantity_rolling_mean_7d": 0.0, "net_sales_quantity_rolling_mean_7d": 0.5, "sales_quantity_rolling_sum_7d": 1.0, "return_quantity_rolling_sum_7d": 0.0, "net_sales_quantity_rolling_sum_7d": 1.0, "sales_quantity_rolling_mean_15d": 0.5, "return_quantity_rolling_mean_15d": 0.0, "net_sales_quantity_rolling_mean_15d": 0.5, "sales_quantity_rolling_sum_15d": 1.0, "return_quantity_rolling_sum_15d": 0.0, "net_sales_quantity_rolling_sum_15d": 1.0, "sales_quantity_rolling_mean_30d": 0.71, "return_quantity_rolling_mean_30d": -0.14, "net_sales_quantity_rolling_mean_30d": 0.57, "sales_quantity_rolling_sum_30d": 5.0, "return_quantity_rolling_sum_30d": -1.0, "net_sales_quantity_rolling_sum_30d": 4.0, "sales_quantity_rolling_mean_90d": 0.77, "return_quantity_rolling_mean_90d": -0.09, "net_sales_quantity_rolling_mean_90d": 0.68, "sales_quantity_rolling_sum_90d": 17.0, "return_quantity_rolling_sum_90d": -2.0, "net_sales_quantity_rolling_sum_90d": 15.0, "is_weekend": false, "weekday": 3, "day_of_month": 2, "day_of_year": 336, "week_of_month": 1, "month": 12, "quarter": 4, "is_holiday": false, "first_sale_date":