更换数据源
This commit is contained in:
parent
8e450ce64b
commit
aaf2672b7f
Binary file not shown.
Binary file not shown.
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -0,0 +1 @@
|
|||||||
|
{"product_id": "11020059", "product_name": "Product 11020059", "model_type": "kan", "predictions": [{"date": "2022-01-01", "predicted_sales": 0.12464457750320435}, {"date": "2022-01-02", "predicted_sales": 0.11735431849956512}, {"date": "2022-01-03", "predicted_sales": 0.11733274161815643}, {"date": "2022-01-04", "predicted_sales": 0.11753901839256287}, {"date": "2022-01-05", "predicted_sales": 0.11754211783409119}, {"date": "2022-01-06", "predicted_sales": 0.11753572523593903}, {"date": "2022-01-07", "predicted_sales": 0.11738376319408417}], "prediction_data": [{"date": "2022-01-01", "predicted_sales": 0.12464457750320435}, {"date": "2022-01-02", "predicted_sales": 0.11735431849956512}, {"date": "2022-01-03", "predicted_sales": 0.11733274161815643}, {"date": "2022-01-04", "predicted_sales": 0.11753901839256287}, {"date": "2022-01-05", "predicted_sales": 0.11754211783409119}, {"date": "2022-01-06", "predicted_sales": 0.11753572523593903}, {"date": "2022-01-07", "predicted_sales": 0.11738376319408417}], "history_data": [{"store_id": "01010108", "product_id": "11020059", "date": "2021-12-02", "sales_quantity": 0.0, "return_quantity": 0.0, "sales": 0.0, "gross_profit_total": 0.0, "transaction_count": 0, "sales_quantity_rolling_mean_7d": 0.5, "return_quantity_rolling_mean_7d": 0.0, "net_sales_quantity_rolling_mean_7d": 0.5, "sales_quantity_rolling_sum_7d": 1.0, "return_quantity_rolling_sum_7d": 0.0, "net_sales_quantity_rolling_sum_7d": 1.0, "sales_quantity_rolling_mean_15d": 0.5, "return_quantity_rolling_mean_15d": 0.0, "net_sales_quantity_rolling_mean_15d": 0.5, "sales_quantity_rolling_sum_15d": 1.0, "return_quantity_rolling_sum_15d": 0.0, "net_sales_quantity_rolling_sum_15d": 1.0, "sales_quantity_rolling_mean_30d": 0.71, "return_quantity_rolling_mean_30d": -0.14, "net_sales_quantity_rolling_mean_30d": 0.57, "sales_quantity_rolling_sum_30d": 5.0, "return_quantity_rolling_sum_30d": -1.0, "net_sales_quantity_rolling_sum_30d": 4.0, "sales_quantity_rolling_mean_90d": 0.77, "return_quantity_rolling_mean_90d": -0.09, "net_sales_quantity_rolling_mean_90d": 0.68, "sales_quantity_rolling_sum_90d": 17.0, "return_quantity_rolling_sum_90d": -2.0, "net_sales_quantity_rolling_sum_90d": 15.0, "is_weekend": false, "weekday": 3, "day_of_month": 2, "day_of_year": 336, "week_of_month": 1, "month": 12, "quarter": 4, "is_holiday": false, "first_sale_date":
|
@ -0,0 +1 @@
|
|||||||
|
{"product_id": "11020059", "product_name": "Product 11020059", "model_type": "kan", "predictions": [{"date": "2022-01-01", "predicted_sales": 0.12464457750320435}, {"date": "2022-01-02", "predicted_sales": 0.11735431849956512}, {"date": "2022-01-03", "predicted_sales": 0.11733274161815643}, {"date": "2022-01-04", "predicted_sales": 0.11753901839256287}, {"date": "2022-01-05", "predicted_sales": 0.11754211783409119}, {"date": "2022-01-06", "predicted_sales": 0.11753572523593903}, {"date": "2022-01-07", "predicted_sales": 0.11738376319408417}], "prediction_data": [{"date": "2022-01-01", "predicted_sales": 0.12464457750320435}, {"date": "2022-01-02", "predicted_sales": 0.11735431849956512}, {"date": "2022-01-03", "predicted_sales": 0.11733274161815643}, {"date": "2022-01-04", "predicted_sales": 0.11753901839256287}, {"date": "2022-01-05", "predicted_sales": 0.11754211783409119}, {"date": "2022-01-06", "predicted_sales": 0.11753572523593903}, {"date": "2022-01-07", "predicted_sales": 0.11738376319408417}], "history_data": [{"store_id": "01010108", "product_id": "11020059", "date": "2021-12-02", "sales_quantity": 0.0, "return_quantity": 0.0, "sales": 0.0, "gross_profit_total": 0.0, "transaction_count": 0, "sales_quantity_rolling_mean_7d": 0.5, "return_quantity_rolling_mean_7d": 0.0, "net_sales_quantity_rolling_mean_7d": 0.5, "sales_quantity_rolling_sum_7d": 1.0, "return_quantity_rolling_sum_7d": 0.0, "net_sales_quantity_rolling_sum_7d": 1.0, "sales_quantity_rolling_mean_15d": 0.5, "return_quantity_rolling_mean_15d": 0.0, "net_sales_quantity_rolling_mean_15d": 0.5, "sales_quantity_rolling_sum_15d": 1.0, "return_quantity_rolling_sum_15d": 0.0, "net_sales_quantity_rolling_sum_15d": 1.0, "sales_quantity_rolling_mean_30d": 0.71, "return_quantity_rolling_mean_30d": -0.14, "net_sales_quantity_rolling_mean_30d": 0.57, "sales_quantity_rolling_sum_30d": 5.0, "return_quantity_rolling_sum_30d": -1.0, "net_sales_quantity_rolling_sum_30d": 4.0, "sales_quantity_rolling_mean_90d": 0.77, "return_quantity_rolling_mean_90d": -0.09, "net_sales_quantity_rolling_mean_90d": 0.68, "sales_quantity_rolling_sum_90d": 17.0, "return_quantity_rolling_sum_90d": -2.0, "net_sales_quantity_rolling_sum_90d": 15.0, "is_weekend": false, "weekday": 3, "day_of_month": 2, "day_of_year": 336, "week_of_month": 1, "month": 12, "quarter": 4, "is_holiday": false, "first_sale_date":
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -1,525 +0,0 @@
|
|||||||
{
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"model_type": "cnn_bilstm_attention",
|
|
||||||
"predictions": [
|
|
||||||
{
|
|
||||||
"date": "2025-07-25",
|
|
||||||
"predicted_sales": 0.8147072196006775
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-26",
|
|
||||||
"predicted_sales": 0.8167740106582642
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-27",
|
|
||||||
"predicted_sales": 0.8197348117828369
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-28",
|
|
||||||
"predicted_sales": 0.8219858407974243
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-29",
|
|
||||||
"predicted_sales": 0.8112776875495911
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-30",
|
|
||||||
"predicted_sales": 0.8004958629608154
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-31",
|
|
||||||
"predicted_sales": 0.8058184385299683
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"prediction_data": [
|
|
||||||
{
|
|
||||||
"date": "2025-07-25",
|
|
||||||
"predicted_sales": 0.8147072196006775
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-26",
|
|
||||||
"predicted_sales": 0.8167740106582642
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-27",
|
|
||||||
"predicted_sales": 0.8197348117828369
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-28",
|
|
||||||
"predicted_sales": 0.8219858407974243
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-29",
|
|
||||||
"predicted_sales": 0.8112776875495911
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-30",
|
|
||||||
"predicted_sales": 0.8004958629608154
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-31",
|
|
||||||
"predicted_sales": 0.8058184385299683
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"history_data": [
|
|
||||||
{
|
|
||||||
"date": "2025-06-25",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 2,
|
|
||||||
"month": 6,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": false,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-06-26",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 3,
|
|
||||||
"month": 6,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": false,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-06-27",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 4,
|
|
||||||
"month": 6,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": false,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-06-28",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 5,
|
|
||||||
"month": 6,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": true,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-06-29",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 6,
|
|
||||||
"month": 6,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": true,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-06-30",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 0,
|
|
||||||
"month": 6,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": false,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-01",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 1,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": false,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-02",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 2,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": false,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-03",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 3,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": false,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-04",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 4,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": false,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-05",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 5,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": true,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-06",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 6,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": true,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-07",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 0,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": false,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-08",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 1,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": false,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-09",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 2,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": false,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-10",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 3,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": false,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-11",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 4,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": false,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-12",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 5,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": true,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-13",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 6,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": true,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-14",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 0,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": false,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-15",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 1,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": false,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-16",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 2,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": false,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-17",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 3,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": false,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-18",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 4,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": false,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-19",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 5,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": true,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-20",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 6,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": true,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-21",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 0,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": false,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-22",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 1,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": false,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-23",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 2,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": false,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"date": "2025-07-24",
|
|
||||||
"sales": 0.0,
|
|
||||||
"product_id": "17021449",
|
|
||||||
"product_name": "布洛芬混悬液(美林)",
|
|
||||||
"store_id": "GLOBAL",
|
|
||||||
"store_name": "全部店铺-SUM",
|
|
||||||
"weekday": 3,
|
|
||||||
"month": 7,
|
|
||||||
"is_holiday": false,
|
|
||||||
"is_weekend": false,
|
|
||||||
"is_promotion": false,
|
|
||||||
"temperature": 20.0
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"analysis": {
|
|
||||||
"trend": {
|
|
||||||
"slope": -0.0024171343871525353,
|
|
||||||
"trend_type": "平稳",
|
|
||||||
"r_squared": 0.4619268323887481,
|
|
||||||
"p_value": 0.0930247330579927,
|
|
||||||
"volatility": 0.008749220910445412,
|
|
||||||
"volatility_level": "低"
|
|
||||||
},
|
|
||||||
"statistics": {
|
|
||||||
"mean": 0.8129705531256539,
|
|
||||||
"median": 0.8147072196006775,
|
|
||||||
"min": 0.8004958629608154,
|
|
||||||
"max": 0.8219858407974243,
|
|
||||||
"std": 0.007112858962983344,
|
|
||||||
"q1": 0.8085480630397797,
|
|
||||||
"q3": 0.8182544112205505
|
|
||||||
},
|
|
||||||
"day_over_day": [
|
|
||||||
0.25368512857903625,
|
|
||||||
0.36249942896524706,
|
|
||||||
0.2746045406674448,
|
|
||||||
-1.3027174820243943,
|
|
||||||
-1.328993112252526,
|
|
||||||
0.6649098159565847
|
|
||||||
],
|
|
||||||
"influencing_factors": {
|
|
||||||
"product_id": "17021449",
|
|
||||||
"model_type": "cnn_bilstm_attention",
|
|
||||||
"feature_count": 7,
|
|
||||||
"important_features": [
|
|
||||||
"价格",
|
|
||||||
"周末",
|
|
||||||
"节假日"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"explanation": "cnn_bilstm_attention模型对产品17021449的预测分析:\n预测显示销量整体呈平稳趋势,销量基本保持稳定。\n预测期内销量波动性低,表明销量相对稳定,预测可信度较高。\n预测期内平均日销量为0.81个单位,最高日销量为0.82个单位,最低日销量为0.80个单位。\n\n主要影响因素包括:价格, 周末, 节假日。"
|
|
||||||
}
|
|
||||||
}
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
174
server/api.py
174
server/api.py
@ -63,9 +63,11 @@ from core.config import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
# 导入多店铺数据工具
|
# 导入多店铺数据工具
|
||||||
from utils.multi_store_data_utils import (
|
# from utils.multi_store_data_utils import (
|
||||||
get_available_stores, get_available_products, get_sales_statistics
|
# get_available_stores, get_available_products, get_sales_statistics
|
||||||
)
|
# )
|
||||||
|
# 以上旧模块将被新的统一数据加载器替代
|
||||||
|
from utils.new_data_loader import load_new_data
|
||||||
|
|
||||||
# 导入数据库初始化工具
|
# 导入数据库初始化工具
|
||||||
from init_multi_store_db import get_db_connection
|
from init_multi_store_db import get_db_connection
|
||||||
@ -202,6 +204,9 @@ class CustomJSONEncoder(json.JSONEncoder):
|
|||||||
# 处理日期时间类型
|
# 处理日期时间类型
|
||||||
elif isinstance(obj, datetime):
|
elif isinstance(obj, datetime):
|
||||||
return obj.isoformat()
|
return obj.isoformat()
|
||||||
|
# 新增:处理date对象
|
||||||
|
elif isinstance(obj, pd.Timestamp) or hasattr(obj, 'isoformat'):
|
||||||
|
return obj.isoformat()
|
||||||
return super(CustomJSONEncoder, self).default(obj)
|
return super(CustomJSONEncoder, self).default(obj)
|
||||||
|
|
||||||
# Helper function to convert numpy types to native python types for JSON serialization
|
# Helper function to convert numpy types to native python types for JSON serialization
|
||||||
@ -515,11 +520,21 @@ def swagger_ui():
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
def get_products():
|
def get_products():
|
||||||
|
"""获取所有产品列表 (已重构为使用新数据源)"""
|
||||||
try:
|
try:
|
||||||
from utils.multi_store_data_utils import get_available_products
|
df = load_new_data()
|
||||||
products = get_available_products()
|
# 从新数据中提取唯一的产品ID
|
||||||
|
products_df = df[['product_id']].drop_duplicates().sort_values('product_id')
|
||||||
|
|
||||||
|
# 由于新数据没有product_name,我们创建一个兼容的格式
|
||||||
|
products = [
|
||||||
|
{'product_id': pid, 'product_name': f'产品 {pid}'}
|
||||||
|
for pid in products_df['product_id']
|
||||||
|
]
|
||||||
|
|
||||||
return jsonify({"status": "success", "data": products})
|
return jsonify({"status": "success", "data": products})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
logger.error(f"获取产品列表失败: {traceback.format_exc()}")
|
||||||
return jsonify({"status": "error", "message": str(e)}), 500
|
return jsonify({"status": "error", "message": str(e)}), 500
|
||||||
|
|
||||||
@app.route('/api/products/<product_id>', methods=['GET'])
|
@app.route('/api/products/<product_id>', methods=['GET'])
|
||||||
@ -570,25 +585,32 @@ def get_products():
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
def get_product(product_id):
|
def get_product(product_id):
|
||||||
|
"""获取单个产品详情 (已重构为使用新数据源)"""
|
||||||
try:
|
try:
|
||||||
from utils.multi_store_data_utils import load_multi_store_data
|
df = load_new_data()
|
||||||
df = load_multi_store_data(product_id=product_id)
|
product_df = df[df['product_id'] == product_id]
|
||||||
|
|
||||||
if df.empty:
|
if product_df.empty:
|
||||||
return jsonify({"status": "error", "message": "产品不存在"}), 404
|
return jsonify({"status": "error", "message": "产品不存在"}), 404
|
||||||
|
|
||||||
|
# 从新数据中提取信息
|
||||||
|
product_name = f"产品 {product_id}" # 备用名称
|
||||||
|
if 'product_name' in product_df.columns and not product_df['product_name'].empty:
|
||||||
|
product_name = product_df['product_name'].iloc[0]
|
||||||
|
|
||||||
product_info = {
|
product_info = {
|
||||||
"product_id": product_id,
|
"product_id": product_id,
|
||||||
"product_name": df['product_name'].iloc[0],
|
"product_name": product_name,
|
||||||
"data_points": len(df),
|
"data_points": len(product_df),
|
||||||
"date_range": {
|
"date_range": {
|
||||||
"start": df['date'].min().strftime('%Y-%m-%d'),
|
"start": product_df['date'].min().strftime('%Y-%m-%d'),
|
||||||
"end": df['date'].max().strftime('%Y-%m-%d')
|
"end": product_df['date'].max().strftime('%Y-%m-%d')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return jsonify({"status": "success", "data": product_info})
|
return jsonify({"status": "success", "data": product_info})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
logger.error(f"获取产品详情失败: {traceback.format_exc()}")
|
||||||
return jsonify({"status": "error", "message": str(e)}), 500
|
return jsonify({"status": "error", "message": str(e)}), 500
|
||||||
|
|
||||||
@app.route('/api/products/<product_id>/sales', methods=['GET'])
|
@app.route('/api/products/<product_id>/sales', methods=['GET'])
|
||||||
@ -644,29 +666,29 @@ def get_product(product_id):
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
def get_product_sales(product_id):
|
def get_product_sales(product_id):
|
||||||
|
"""获取产品销售数据 (已重构为使用新数据源)"""
|
||||||
try:
|
try:
|
||||||
start_date = request.args.get('start_date')
|
start_date = request.args.get('start_date')
|
||||||
end_date = request.args.get('end_date')
|
end_date = request.args.get('end_date')
|
||||||
|
|
||||||
from utils.multi_store_data_utils import load_multi_store_data
|
df = load_new_data()
|
||||||
df = load_multi_store_data(
|
df_product = df[df['product_id'] == product_id]
|
||||||
product_id=product_id,
|
|
||||||
start_date=start_date,
|
if start_date:
|
||||||
end_date=end_date
|
df_product = df_product[df_product['date'] >= pd.to_datetime(start_date)]
|
||||||
)
|
if end_date:
|
||||||
|
df_product = df_product[df_product['date'] <= pd.to_datetime(end_date)]
|
||||||
|
|
||||||
|
if df_product.empty:
|
||||||
|
return jsonify({"status": "error", "message": "产品不存在或在指定日期范围内无数据"}), 404
|
||||||
|
|
||||||
if df.empty:
|
df_product = df_product.sort_values('date')
|
||||||
return jsonify({"status": "error", "message": "产品不存在或无数据"}), 404
|
df_product['date'] = df_product['date'].dt.strftime('%Y-%m-%d')
|
||||||
|
|
||||||
# 确保数据按日期排序
|
sales_data = df_product.to_dict('records')
|
||||||
df = df.sort_values('date')
|
|
||||||
|
|
||||||
# 转换日期为字符串以便JSON序列化
|
|
||||||
df['date'] = df['date'].dt.strftime('%Y-%m-%d')
|
|
||||||
|
|
||||||
sales_data = df.to_dict('records')
|
|
||||||
return jsonify({"status": "success", "data": sales_data})
|
return jsonify({"status": "success", "data": sales_data})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
logger.error(f"获取产品销售数据失败: {traceback.format_exc()}")
|
||||||
return jsonify({"status": "error", "message": str(e)}), 500
|
return jsonify({"status": "error", "message": str(e)}), 500
|
||||||
|
|
||||||
@app.route('/api/data/upload', methods=['POST'])
|
@app.route('/api/data/upload', methods=['POST'])
|
||||||
@ -1500,7 +1522,9 @@ def predict():
|
|||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
# 不应阻止向用户返回结果,因此只打印警告
|
# 不应阻止向用户返回结果,因此只打印警告
|
||||||
|
|
||||||
return jsonify(response_data)
|
# 在返回前,使用我们的辅助函数对整个响应进行一次深度清洗
|
||||||
|
cleaned_response_data = convert_numpy_types_for_json(response_data)
|
||||||
|
return jsonify(cleaned_response_data)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"预测失败: {str(e)}")
|
print(f"预测失败: {str(e)}")
|
||||||
import traceback
|
import traceback
|
||||||
@ -2550,32 +2574,24 @@ def get_latest_model_id(model_type, product_id):
|
|||||||
|
|
||||||
# 获取产品名称的辅助函数
|
# 获取产品名称的辅助函数
|
||||||
def get_product_name(product_id):
|
def get_product_name(product_id):
|
||||||
"""根据产品ID获取产品名称"""
|
"""根据产品ID获取产品名称 (已重构)"""
|
||||||
try:
|
try:
|
||||||
# 从Excel文件中查找产品名称
|
# 注意:新数据源中没有 'product_name'。此函数现在返回一个占位符。
|
||||||
from utils.multi_store_data_utils import load_multi_store_data
|
# 在未来的迭代中,可能需要关联一个产品信息表。
|
||||||
df = load_multi_store_data()
|
return f"产品 {product_id}"
|
||||||
product_df = df[df['product_id'] == product_id]
|
|
||||||
if not product_df.empty:
|
|
||||||
return product_df['product_name'].iloc[0]
|
|
||||||
|
|
||||||
return None
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"获取产品名称失败: {str(e)}")
|
logger.warning(f"获取产品名称时出现问题: {e}")
|
||||||
return None
|
return product_id
|
||||||
# 获取店铺名称的辅助函数
|
|
||||||
def get_store_name(store_id):
|
def get_store_name(store_id):
|
||||||
"""根据店铺ID获取店铺名称"""
|
"""根据店铺ID获取店铺名称 (已重构)"""
|
||||||
try:
|
try:
|
||||||
from utils.multi_store_data_utils import get_available_stores
|
# 注意:新数据源中没有 'store_name'。此函数现在返回一个占位符。
|
||||||
stores = get_available_stores()
|
# 在未来的迭代中,可能需要关联一个店铺信息表。
|
||||||
for store in stores:
|
return f"店铺 {store_id}"
|
||||||
if store['store_id'] == store_id:
|
|
||||||
return store['store_name']
|
|
||||||
return None
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"获取店铺名称失败: {str(e)}")
|
logger.warning(f"获取店铺名称时出现问题: {e}")
|
||||||
return None
|
return store_id
|
||||||
|
|
||||||
|
|
||||||
# run_prediction 函数已被移除,因为其逻辑已完全整合到 /api/prediction 路由处理函数中
|
# run_prediction 函数已被移除,因为其逻辑已完全整合到 /api/prediction 路由处理函数中
|
||||||
@ -3837,23 +3853,38 @@ def update_train_task_with_websocket():
|
|||||||
|
|
||||||
@app.route('/api/stores', methods=['GET'])
|
@app.route('/api/stores', methods=['GET'])
|
||||||
def get_stores():
|
def get_stores():
|
||||||
"""
|
"""获取所有店铺列表 (已重构为使用新数据源并填充信息)"""
|
||||||
获取所有店铺列表
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
from utils.multi_store_data_utils import get_available_stores
|
df = load_new_data()
|
||||||
stores = get_available_stores()
|
|
||||||
|
|
||||||
|
# 从新数据中提取唯一的店铺信息
|
||||||
|
# 修正:只选择数据文件中实际存在的列
|
||||||
|
# 根据之前的分析,新数据有 'district' 列,但没有 'city' 和 'province'
|
||||||
|
stores_df = df[['store_id', 'district']].drop_duplicates('store_id')
|
||||||
|
|
||||||
|
stores_data = []
|
||||||
|
for _, row in stores_df.iterrows():
|
||||||
|
# 构建位置信息
|
||||||
|
location = row['district'] if pd.notna(row['district']) else "Unknown Location"
|
||||||
|
|
||||||
|
stores_data.append({
|
||||||
|
"store_id": row['store_id'],
|
||||||
|
"store_name": f"店铺 {row['store_id']}", # 使用ID作为临时名称
|
||||||
|
"location": location,
|
||||||
|
"type": "标准药店", # 填充默认值
|
||||||
|
"size": "120㎡", # 填充默认值
|
||||||
|
"opening_date": "2023-01-01", # 填充默认值
|
||||||
|
"status": "营业中" # 填充默认值
|
||||||
|
})
|
||||||
|
|
||||||
return jsonify({
|
return jsonify({
|
||||||
"status": "success",
|
"status": "success",
|
||||||
"data": stores,
|
"data": stores_data,
|
||||||
"count": len(stores)
|
"count": len(stores_data)
|
||||||
})
|
})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return jsonify({
|
logger.error(f"获取店铺列表失败: {traceback.format_exc()}")
|
||||||
"status": "error",
|
return jsonify({"status": "error", "message": f"获取店铺列表失败: {str(e)}"}), 500
|
||||||
"message": f"获取店铺列表失败: {str(e)}"
|
|
||||||
}), 500
|
|
||||||
|
|
||||||
@app.route('/api/stores/<store_id>', methods=['GET'])
|
@app.route('/api/stores/<store_id>', methods=['GET'])
|
||||||
def get_store(store_id):
|
def get_store(store_id):
|
||||||
@ -4043,11 +4074,20 @@ def delete_store(store_id):
|
|||||||
|
|
||||||
@app.route('/api/stores/<store_id>/products', methods=['GET'])
|
@app.route('/api/stores/<store_id>/products', methods=['GET'])
|
||||||
def get_store_products(store_id):
|
def get_store_products(store_id):
|
||||||
"""
|
"""获取店铺的产品列表 (已重构为使用新数据源)"""
|
||||||
获取店铺的产品列表
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
products = get_available_products(store_id=store_id)
|
df = load_new_data()
|
||||||
|
store_df = df[df['store_id'] == store_id]
|
||||||
|
|
||||||
|
if store_df.empty:
|
||||||
|
return jsonify({"status": "success", "data": [], "count": 0})
|
||||||
|
|
||||||
|
products_df = store_df[['product_id']].drop_duplicates().sort_values('product_id')
|
||||||
|
|
||||||
|
products = [
|
||||||
|
{'product_id': pid, 'product_name': f'产品 {pid}'}
|
||||||
|
for pid in products_df['product_id']
|
||||||
|
]
|
||||||
|
|
||||||
return jsonify({
|
return jsonify({
|
||||||
"status": "success",
|
"status": "success",
|
||||||
@ -4055,10 +4095,8 @@ def get_store_products(store_id):
|
|||||||
"count": len(products)
|
"count": len(products)
|
||||||
})
|
})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return jsonify({
|
logger.error(f"获取店铺产品列表失败: {traceback.format_exc()}")
|
||||||
"status": "error",
|
return jsonify({"status": "error", "message": f"获取店铺产品列表失败: {str(e)}"}), 500
|
||||||
"message": f"获取店铺产品列表失败: {str(e)}"
|
|
||||||
}), 500
|
|
||||||
|
|
||||||
@app.route('/api/stores/<store_id>/statistics', methods=['GET'])
|
@app.route('/api/stores/<store_id>/statistics', methods=['GET'])
|
||||||
def get_store_statistics(store_id):
|
def get_store_statistics(store_id):
|
||||||
|
@ -20,11 +20,13 @@ from datetime import datetime
|
|||||||
# 上述导入已不再需要,因为我们现在通过模型注册表动态获取训练器
|
# 上述导入已不再需要,因为我们现在通过模型注册表动态获取训练器
|
||||||
from predictors.model_predictor import load_model_and_predict
|
from predictors.model_predictor import load_model_and_predict
|
||||||
from utils.data_utils import prepare_data, prepare_sequences
|
from utils.data_utils import prepare_data, prepare_sequences
|
||||||
from utils.multi_store_data_utils import (
|
# from utils.multi_store_data_utils import (
|
||||||
load_multi_store_data,
|
# load_multi_store_data,
|
||||||
get_store_product_sales_data,
|
# get_store_product_sales_data,
|
||||||
aggregate_multi_store_data
|
# aggregate_multi_store_data
|
||||||
)
|
# )
|
||||||
|
# 以上旧模块已被新的统一数据加载器替代
|
||||||
|
from utils.new_data_loader import load_new_data
|
||||||
from analysis.metrics import evaluate_model
|
from analysis.metrics import evaluate_model
|
||||||
from core.config import DEVICE, DEFAULT_MODEL_DIR, DEFAULT_DATA_PATH
|
from core.config import DEVICE, DEFAULT_MODEL_DIR, DEFAULT_DATA_PATH
|
||||||
|
|
||||||
@ -53,13 +55,10 @@ class PharmacyPredictor:
|
|||||||
|
|
||||||
print(f"使用设备: {self.device}")
|
print(f"使用设备: {self.device}")
|
||||||
|
|
||||||
# 尝试加载多店铺数据
|
# 重构:不再预加载整个数据集到内存
|
||||||
try:
|
# self.data 将在需要时动态加载
|
||||||
self.data = load_multi_store_data(data_path)
|
self.data = None
|
||||||
print(f"已加载多店铺数据,来源: {data_path}")
|
print("PharmacyPredictor 已初始化,将在需要时动态加载数据。")
|
||||||
except Exception as e:
|
|
||||||
print(f"加载数据失败: {e}")
|
|
||||||
self.data = None
|
|
||||||
|
|
||||||
def train_model(self, product_id, model_type='transformer', epochs=100, batch_size=32,
|
def train_model(self, product_id, model_type='transformer', epochs=100, batch_size=32,
|
||||||
learning_rate=0.001, sequence_length=30, forecast_horizon=7,
|
learning_rate=0.001, sequence_length=30, forecast_horizon=7,
|
||||||
@ -104,76 +103,54 @@ class PharmacyPredictor:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"进度回调失败: {e}", flush=True)
|
print(f"进度回调失败: {e}", flush=True)
|
||||||
|
|
||||||
if self.data is None:
|
# --- 数据加载与筛选重构 ---
|
||||||
log_message("没有可用的数据,请先加载或生成数据", 'error')
|
# 统一使用新的数据加载器,替换掉所有旧的、分散的加载逻辑
|
||||||
|
log_message("正在使用新的统一数据加载器...", 'info')
|
||||||
|
try:
|
||||||
|
full_df = load_new_data()
|
||||||
|
except Exception as e:
|
||||||
|
log_message(f"使用新数据加载器失败: {e}", 'error')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# 根据训练模式准备数据
|
|
||||||
if training_mode == 'product':
|
if training_mode == 'product':
|
||||||
# 按产品训练:使用所有店铺的该产品数据
|
product_data = full_df[full_df['product_id'] == product_id].copy()
|
||||||
product_data = self.data[self.data['product_id'] == product_id].copy()
|
|
||||||
if product_data.empty:
|
if product_data.empty:
|
||||||
log_message(f"找不到产品 {product_id} 的数据", 'error')
|
log_message(f"找不到产品 {product_id} 的数据", 'error')
|
||||||
return None
|
return None
|
||||||
log_message(f"按产品训练模式: 产品 {product_id}, 数据量: {len(product_data)}")
|
log_message(f"按产品训练模式: 产品 {product_id}, 数据量: {len(product_data)}", 'info')
|
||||||
|
|
||||||
elif training_mode == 'store':
|
elif training_mode == 'store':
|
||||||
# 按店铺训练
|
|
||||||
if not store_id:
|
if not store_id:
|
||||||
log_message("店铺训练模式需要指定 store_id", 'error')
|
log_message("店铺训练模式需要指定 store_id", 'error')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# 如果product_id是'unknown',则表示为店铺所有商品训练一个聚合模型
|
# 筛选出该店铺的所有数据
|
||||||
if product_id == 'unknown':
|
store_df = full_df[full_df['store_id'] == store_id].copy()
|
||||||
try:
|
|
||||||
# 使用新的聚合函数,按店铺聚合
|
# 判断是为单个产品训练还是为整个店铺聚合训练
|
||||||
product_data = aggregate_multi_store_data(
|
if product_id and product_id != 'unknown' and product_id != 'all_products':
|
||||||
store_id=store_id,
|
|
||||||
aggregation_method=aggregation_method,
|
|
||||||
file_path=self.data_path
|
|
||||||
)
|
|
||||||
log_message(f"按店铺聚合训练: 店铺 {store_id}, 聚合方法 {aggregation_method}, 数据量: {len(product_data)}")
|
|
||||||
# 将product_id设置为'store_{store_id}',与API查找逻辑保持一致
|
|
||||||
product_id = f"store_{store_id}"
|
|
||||||
except Exception as e:
|
|
||||||
log_message(f"聚合店铺 {store_id} 数据失败: {e}", 'error')
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
# 为店铺的单个特定产品训练
|
# 为店铺的单个特定产品训练
|
||||||
try:
|
product_data = store_df[store_df['product_id'] == product_id].copy()
|
||||||
product_data = get_store_product_sales_data(
|
log_message(f"按店铺-产品训练: 店铺 {store_id}, 产品 {product_id}, 数据量: {len(product_data)}", 'info')
|
||||||
store_id=store_id,
|
else:
|
||||||
product_id=product_id,
|
# 为整个店铺聚合训练
|
||||||
file_path=self.data_path
|
log_message(f"按店铺聚合训练: 店铺 {store_id} (所有药品)", 'info')
|
||||||
)
|
product_data = store_df.groupby('date').agg({
|
||||||
log_message(f"按店铺-产品训练: 店铺 {store_id}, 产品 {product_id}, 数据量: {len(product_data)}")
|
'sales': 'sum',
|
||||||
except Exception as e:
|
'weekday': 'first', 'month': 'first', 'is_holiday': 'first',
|
||||||
log_message(f"获取店铺产品数据失败: {e}", 'error')
|
'is_weekend': 'first', 'is_promotion': 'first', 'temperature': 'mean'
|
||||||
return None
|
}).reset_index()
|
||||||
|
log_message(f"聚合后数据量: {len(product_data)}", 'info')
|
||||||
|
|
||||||
elif training_mode == 'global':
|
elif training_mode == 'global':
|
||||||
# 全局训练:聚合所有店铺的产品数据
|
product_data = full_df[full_df['product_id'] == product_id].copy()
|
||||||
try:
|
product_data = product_data.groupby('date').agg({
|
||||||
# 如果product_id是'unknown',则表示为全局所有商品训练一个聚合模型
|
'sales': aggregation_method,
|
||||||
if product_id == 'unknown':
|
'weekday': 'first', 'month': 'first', 'is_holiday': 'first',
|
||||||
product_data = aggregate_multi_store_data(
|
'is_weekend': 'first', 'is_promotion': 'first', 'temperature': 'mean'
|
||||||
product_id=None, # 传递None以触发真正的全局聚合
|
}).reset_index()
|
||||||
aggregation_method=aggregation_method,
|
log_message(f"全局训练模式: 产品 {product_id}, 聚合方法 {aggregation_method}, 数据量: {len(product_data)}", 'info')
|
||||||
file_path=self.data_path
|
|
||||||
)
|
|
||||||
log_message(f"全局训练模式: 所有产品, 聚合方法 {aggregation_method}, 数据量: {len(product_data)}")
|
|
||||||
# 将product_id设置为一个有意义的标识符
|
|
||||||
product_id = 'all_products'
|
|
||||||
else:
|
|
||||||
product_data = aggregate_multi_store_data(
|
|
||||||
product_id=product_id,
|
|
||||||
aggregation_method=aggregation_method,
|
|
||||||
file_path=self.data_path
|
|
||||||
)
|
|
||||||
log_message(f"全局训练模式: 产品 {product_id}, 聚合方法 {aggregation_method}, 数据量: {len(product_data)}")
|
|
||||||
except Exception as e:
|
|
||||||
log_message(f"聚合全局数据失败: {e}", 'error')
|
|
||||||
return None
|
|
||||||
else:
|
else:
|
||||||
log_message(f"不支持的训练模式: {training_mode}", 'error')
|
log_message(f"不支持的训练模式: {training_mode}", 'error')
|
||||||
return None
|
return None
|
||||||
|
@ -23,7 +23,7 @@ import xgboost as xgb
|
|||||||
|
|
||||||
from analysis.trend_analysis import analyze_prediction_result
|
from analysis.trend_analysis import analyze_prediction_result
|
||||||
from utils.visualization import plot_prediction_results
|
from utils.visualization import plot_prediction_results
|
||||||
from utils.multi_store_data_utils import get_store_product_sales_data, aggregate_multi_store_data
|
from utils.new_data_loader import load_new_data
|
||||||
from core.config import DEVICE, get_model_file_path, DEFAULT_DATA_PATH
|
from core.config import DEVICE, get_model_file_path, DEFAULT_DATA_PATH
|
||||||
from models.model_registry import get_predictor, register_predictor
|
from models.model_registry import get_predictor, register_predictor
|
||||||
|
|
||||||
@ -96,19 +96,40 @@ def load_model_and_predict(model_path: str, product_id: str, model_type: str, st
|
|||||||
if not os.path.exists(model_path):
|
if not os.path.exists(model_path):
|
||||||
raise FileNotFoundError(f"模型文件 {model_path} 不存在")
|
raise FileNotFoundError(f"模型文件 {model_path} 不存在")
|
||||||
|
|
||||||
# --- 数据加载部分保持不变 ---
|
# --- 数据加载重构 ---
|
||||||
from utils.multi_store_data_utils import aggregate_multi_store_data
|
# 统一使用新的数据加载器,确保与训练时的数据源和处理逻辑完全一致
|
||||||
|
print("正在使用新的统一数据加载器进行预测...")
|
||||||
|
full_df = load_new_data()
|
||||||
|
|
||||||
if training_mode == 'store' and store_id:
|
if training_mode == 'store' and store_id:
|
||||||
from utils.multi_store_data_utils import load_multi_store_data
|
store_df = full_df[full_df['store_id'] == store_id].copy()
|
||||||
store_df_for_name = load_multi_store_data(store_id=store_id)
|
# 判断是为单个产品预测还是为整个店铺聚合预测
|
||||||
product_name = store_df_for_name['store_name'].iloc[0] if not store_df_for_name.empty else f"店铺 {store_id}"
|
if product_id and product_id != 'unknown' and product_id != 'all_products':
|
||||||
product_df = aggregate_multi_store_data(store_id=store_id, aggregation_method='sum', file_path=DEFAULT_DATA_PATH)
|
product_df = store_df[store_df['product_id'] == product_id].copy()
|
||||||
|
product_name = f"店铺 {store_id} - 产品 {product_id}"
|
||||||
|
else:
|
||||||
|
# 为整个店铺的聚合销售额进行预测
|
||||||
|
product_df = store_df.groupby('date').agg({
|
||||||
|
'sales': 'sum',
|
||||||
|
'weekday': 'first', 'month': 'first', 'is_holiday': 'first',
|
||||||
|
'is_weekend': 'first', 'is_promotion': 'first', 'temperature': 'mean'
|
||||||
|
}).reset_index()
|
||||||
|
product_name = f"店铺 {store_id} (所有药品聚合)"
|
||||||
elif training_mode == 'global':
|
elif training_mode == 'global':
|
||||||
product_df = aggregate_multi_store_data(aggregation_method='sum', file_path=DEFAULT_DATA_PATH)
|
product_df = full_df[full_df['product_id'] == product_id].copy()
|
||||||
product_name = "全局销售数据"
|
product_df = product_df.groupby('date').agg({
|
||||||
else:
|
'sales': 'sum',
|
||||||
product_df = aggregate_multi_store_data(product_id=product_id, aggregation_method='sum', file_path=DEFAULT_DATA_PATH)
|
'weekday': 'first', 'month': 'first', 'is_holiday': 'first',
|
||||||
product_name = product_df['product_name'].iloc[0] if not product_df.empty else product_id
|
'is_weekend': 'first', 'is_promotion': 'first', 'temperature': 'mean'
|
||||||
|
}).reset_index()
|
||||||
|
product_name = f"全局聚合 - 产品 {product_id}"
|
||||||
|
else: # 默认 'product' 模式
|
||||||
|
product_df = full_df[full_df['product_id'] == product_id].copy()
|
||||||
|
# 兼容性处理:新数据可能没有 product_name 列
|
||||||
|
if 'product_name' in product_df.columns and not product_df['product_name'].empty:
|
||||||
|
product_name = product_df['product_name'].iloc[0]
|
||||||
|
else:
|
||||||
|
product_name = f"Product {product_id}"
|
||||||
|
|
||||||
if product_df.empty:
|
if product_df.empty:
|
||||||
raise ValueError(f"产品 {product_id} 或店铺 {store_id} 没有销售数据")
|
raise ValueError(f"产品 {product_id} 或店铺 {store_id} 没有销售数据")
|
||||||
|
@ -17,6 +17,7 @@ from tqdm import tqdm
|
|||||||
from models.kan_model import KANForecaster
|
from models.kan_model import KANForecaster
|
||||||
from models.optimized_kan_forecaster import OptimizedKANForecaster
|
from models.optimized_kan_forecaster import OptimizedKANForecaster
|
||||||
from utils.data_utils import create_dataset, PharmacyDataset
|
from utils.data_utils import create_dataset, PharmacyDataset
|
||||||
|
from utils.new_data_loader import load_new_data
|
||||||
from utils.visualization import plot_loss_curve
|
from utils.visualization import plot_loss_curve
|
||||||
from analysis.metrics import evaluate_model
|
from analysis.metrics import evaluate_model
|
||||||
from core.config import DEVICE, DEFAULT_MODEL_DIR, LOOK_BACK, FORECAST_HORIZON
|
from core.config import DEVICE, DEFAULT_MODEL_DIR, LOOK_BACK, FORECAST_HORIZON
|
||||||
@ -35,45 +36,44 @@ def train_product_model_with_kan(product_id, model_identifier, product_df=None,
|
|||||||
model: 训练好的模型
|
model: 训练好的模型
|
||||||
metrics: 模型评估指标
|
metrics: 模型评估指标
|
||||||
"""
|
"""
|
||||||
# 如果没有传入product_df,则根据训练模式加载数据
|
# --- 数据加载与筛选重构 ---
|
||||||
if product_df is None:
|
# 统一使用新的数据加载器,替换掉所有旧的、分散的加载逻辑
|
||||||
from utils.multi_store_data_utils import load_multi_store_data, get_store_product_sales_data, aggregate_multi_store_data
|
print("正在使用新的统一数据加载器...")
|
||||||
|
full_df = load_new_data() # 加载完整的、适配后的新数据
|
||||||
try:
|
|
||||||
if training_mode == 'store' and store_id:
|
if training_mode == 'store' and store_id:
|
||||||
# 加载特定店铺的数据
|
store_df = full_df[full_df['store_id'] == store_id].copy()
|
||||||
product_df = get_store_product_sales_data(
|
if product_id and product_id != 'unknown' and product_id != 'all_products':
|
||||||
store_id,
|
product_df = store_df[store_df['product_id'] == product_id].copy()
|
||||||
product_id,
|
training_scope = f"店铺 {store_id} - 产品 {product_id}"
|
||||||
'pharmacy_sales_multi_store.csv'
|
|
||||||
)
|
|
||||||
training_scope = f"店铺 {store_id}"
|
|
||||||
elif training_mode == 'global':
|
|
||||||
# 聚合所有店铺的数据
|
|
||||||
product_df = aggregate_multi_store_data(
|
|
||||||
product_id,
|
|
||||||
aggregation_method=aggregation_method,
|
|
||||||
file_path='pharmacy_sales_multi_store.csv'
|
|
||||||
)
|
|
||||||
training_scope = f"全局聚合({aggregation_method})"
|
|
||||||
else:
|
|
||||||
# 默认:加载所有店铺的产品数据
|
|
||||||
product_df = load_multi_store_data('pharmacy_sales_multi_store.csv', product_id=product_id)
|
|
||||||
training_scope = "所有店铺"
|
|
||||||
except Exception as e:
|
|
||||||
print(f"多店铺数据加载失败: {e}")
|
|
||||||
# 后备方案:尝试原始数据
|
|
||||||
df = pd.read_excel('pharmacy_sales.xlsx')
|
|
||||||
product_df = df[df['product_id'] == product_id].sort_values('date')
|
|
||||||
training_scope = "原始数据"
|
|
||||||
else:
|
|
||||||
# 如果传入了product_df,直接使用
|
|
||||||
if training_mode == 'store' and store_id:
|
|
||||||
training_scope = f"店铺 {store_id}"
|
|
||||||
elif training_mode == 'global':
|
|
||||||
training_scope = f"全局聚合({aggregation_method})"
|
|
||||||
else:
|
else:
|
||||||
training_scope = "所有店铺"
|
product_df = store_df.groupby('date').agg({
|
||||||
|
'sales': 'sum', 'weekday': 'first', 'month': 'first',
|
||||||
|
'is_holiday': 'first', 'is_weekend': 'first',
|
||||||
|
'is_promotion': 'first', 'temperature': 'mean'
|
||||||
|
}).reset_index()
|
||||||
|
training_scope = f"店铺 {store_id} (所有药品聚合)"
|
||||||
|
elif training_mode == 'global':
|
||||||
|
# 筛选特定产品在所有店铺的聚合数据
|
||||||
|
# 注意:新数据已经是按 (store_id, product_id, date) 展开的,聚合逻辑可能需要重新审视
|
||||||
|
# 此处暂时只筛选产品ID
|
||||||
|
product_df = full_df[full_df['product_id'] == product_id].copy()
|
||||||
|
# 按日期对同一产品在不同店铺的销售额求和
|
||||||
|
product_df = product_df.groupby('date').agg({
|
||||||
|
'sales': 'sum',
|
||||||
|
# 保留其他需要的特征,例如取第一个非空值或平均值
|
||||||
|
'weekday': 'first',
|
||||||
|
'month': 'first',
|
||||||
|
'is_holiday': 'first',
|
||||||
|
'is_weekend': 'first',
|
||||||
|
'is_promotion': 'first',
|
||||||
|
'temperature': 'mean'
|
||||||
|
}).reset_index()
|
||||||
|
training_scope = f"全局聚合({aggregation_method})"
|
||||||
|
else: # 默认 'product' 模式
|
||||||
|
# 筛选特定产品的数据(可能跨越多个店铺,但此处不聚合)
|
||||||
|
product_df = full_df[full_df['product_id'] == product_id].copy()
|
||||||
|
training_scope = f"所有店铺中的产品 {product_id}"
|
||||||
|
|
||||||
if product_df.empty:
|
if product_df.empty:
|
||||||
raise ValueError(f"产品 {product_id} 没有可用的销售数据")
|
raise ValueError(f"产品 {product_id} 没有可用的销售数据")
|
||||||
@ -95,7 +95,11 @@ def train_product_model_with_kan(product_id, model_identifier, product_df=None,
|
|||||||
raise ValueError(error_msg)
|
raise ValueError(error_msg)
|
||||||
|
|
||||||
product_df = product_df.sort_values('date')
|
product_df = product_df.sort_values('date')
|
||||||
product_name = product_df['product_name'].iloc[0]
|
# 兼容性处理:新数据可能没有 product_name 列
|
||||||
|
if 'product_name' in product_df.columns:
|
||||||
|
product_name = product_df['product_name'].iloc[0]
|
||||||
|
else:
|
||||||
|
product_name = f"Product {product_id}" # 使用 product_id 作为备用名称
|
||||||
|
|
||||||
model_type = "优化版KAN" if use_optimized else "KAN"
|
model_type = "优化版KAN" if use_optimized else "KAN"
|
||||||
print(f"使用{model_type}模型训练产品 '{product_name}' (ID: {product_id}) 的销售预测模型")
|
print(f"使用{model_type}模型训练产品 '{product_name}' (ID: {product_id}) 的销售预测模型")
|
||||||
|
@ -16,7 +16,8 @@ from tqdm import tqdm
|
|||||||
|
|
||||||
from models.mlstm_model import MLSTMTransformer as MatrixLSTM
|
from models.mlstm_model import MLSTMTransformer as MatrixLSTM
|
||||||
from utils.data_utils import create_dataset, PharmacyDataset
|
from utils.data_utils import create_dataset, PharmacyDataset
|
||||||
from utils.multi_store_data_utils import get_store_product_sales_data, aggregate_multi_store_data
|
# from utils.multi_store_data_utils import get_store_product_sales_data, aggregate_multi_store_data
|
||||||
|
from utils.new_data_loader import load_new_data
|
||||||
from utils.visualization import plot_loss_curve
|
from utils.visualization import plot_loss_curve
|
||||||
from analysis.metrics import evaluate_model
|
from analysis.metrics import evaluate_model
|
||||||
from core.config import (
|
from core.config import (
|
||||||
@ -124,13 +125,43 @@ def train_product_model_with_mlstm(
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[mLSTM] 任务 {task_id}: 进度管理器初始化失败: {e}", flush=True)
|
print(f"[mLSTM] 任务 {task_id}: 进度管理器初始化失败: {e}", flush=True)
|
||||||
|
|
||||||
# 数据现在由调用方传入,不再在此处加载
|
# --- 数据加载与筛选重构 ---
|
||||||
if training_mode == 'store' and store_id:
|
# 统一使用新的数据加载器
|
||||||
training_scope = f"店铺 {store_id}"
|
if product_df is None:
|
||||||
elif training_mode == 'global':
|
print("正在使用新的统一数据加载器...")
|
||||||
training_scope = f"全局聚合({aggregation_method})"
|
full_df = load_new_data()
|
||||||
|
|
||||||
|
if training_mode == 'store' and store_id:
|
||||||
|
store_df = full_df[full_df['store_id'] == store_id].copy()
|
||||||
|
if product_id and product_id != 'unknown' and product_id != 'all_products':
|
||||||
|
product_df = store_df[store_df['product_id'] == product_id].copy()
|
||||||
|
training_scope = f"店铺 {store_id} - 产品 {product_id}"
|
||||||
|
else:
|
||||||
|
product_df = store_df.groupby('date').agg({
|
||||||
|
'sales': 'sum', 'weekday': 'first', 'month': 'first',
|
||||||
|
'is_holiday': 'first', 'is_weekend': 'first',
|
||||||
|
'is_promotion': 'first', 'temperature': 'mean'
|
||||||
|
}).reset_index()
|
||||||
|
training_scope = f"店铺 {store_id} (所有药品聚合)"
|
||||||
|
elif training_mode == 'global':
|
||||||
|
product_df = full_df[full_df['product_id'] == product_id].copy()
|
||||||
|
product_df = product_df.groupby('date').agg({
|
||||||
|
'sales': 'sum', 'weekday': 'first', 'month': 'first',
|
||||||
|
'is_holiday': 'first', 'is_weekend': 'first',
|
||||||
|
'is_promotion': 'first', 'temperature': 'mean'
|
||||||
|
}).reset_index()
|
||||||
|
training_scope = f"全局聚合({aggregation_method})"
|
||||||
|
else: # 默认 'product' 模式
|
||||||
|
product_df = full_df[full_df['product_id'] == product_id].copy()
|
||||||
|
training_scope = f"所有店铺中的产品 {product_id}"
|
||||||
else:
|
else:
|
||||||
training_scope = "所有店铺"
|
# 如果传入了product_df,直接使用
|
||||||
|
if training_mode == 'store' and store_id:
|
||||||
|
training_scope = f"店铺 {store_id}"
|
||||||
|
elif training_mode == 'global':
|
||||||
|
training_scope = f"全局聚合({aggregation_method})"
|
||||||
|
else:
|
||||||
|
training_scope = "所有店铺"
|
||||||
|
|
||||||
# 数据量检查
|
# 数据量检查
|
||||||
min_required_samples = sequence_length + forecast_horizon
|
min_required_samples = sequence_length + forecast_horizon
|
||||||
@ -149,7 +180,11 @@ def train_product_model_with_mlstm(
|
|||||||
emit_progress(f"训练失败:数据不足 ({len(product_df)}/{min_required_samples} 天)")
|
emit_progress(f"训练失败:数据不足 ({len(product_df)}/{min_required_samples} 天)")
|
||||||
raise ValueError(error_msg)
|
raise ValueError(error_msg)
|
||||||
|
|
||||||
product_name = product_df['product_name'].iloc[0]
|
# 兼容性处理:新数据可能没有 product_name 列
|
||||||
|
if 'product_name' in product_df.columns and not product_df['product_name'].empty:
|
||||||
|
product_name = product_df['product_name'].iloc[0]
|
||||||
|
else:
|
||||||
|
product_name = f"产品 {product_id}"
|
||||||
|
|
||||||
print(f"[mLSTM] 使用mLSTM模型训练产品 '{product_name}' (ID: {product_id}) 的销售预测模型", flush=True)
|
print(f"[mLSTM] 使用mLSTM模型训练产品 '{product_name}' (ID: {product_id}) 的销售预测模型", flush=True)
|
||||||
print(f"[mLSTM] 训练范围: {training_scope}", flush=True)
|
print(f"[mLSTM] 训练范围: {training_scope}", flush=True)
|
||||||
|
@ -17,7 +17,8 @@ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
|
|||||||
|
|
||||||
from models.transformer_model import TimeSeriesTransformer
|
from models.transformer_model import TimeSeriesTransformer
|
||||||
from utils.data_utils import create_dataset, PharmacyDataset
|
from utils.data_utils import create_dataset, PharmacyDataset
|
||||||
from utils.multi_store_data_utils import get_store_product_sales_data, aggregate_multi_store_data
|
# from utils.multi_store_data_utils import get_store_product_sales_data, aggregate_multi_store_data
|
||||||
|
from utils.new_data_loader import load_new_data
|
||||||
from utils.visualization import plot_loss_curve
|
from utils.visualization import plot_loss_curve
|
||||||
from analysis.metrics import evaluate_model
|
from analysis.metrics import evaluate_model
|
||||||
from core.config import (
|
from core.config import (
|
||||||
@ -81,15 +82,42 @@ def train_product_model_with_transformer(
|
|||||||
def finish_training(self, *args, **kwargs): pass
|
def finish_training(self, *args, **kwargs): pass
|
||||||
progress_manager = DummyProgressManager()
|
progress_manager = DummyProgressManager()
|
||||||
|
|
||||||
|
# --- 数据加载与筛选重构 ---
|
||||||
if product_df is None:
|
if product_df is None:
|
||||||
from utils.multi_store_data_utils import aggregate_multi_store_data
|
print("正在使用新的统一数据加载器...")
|
||||||
product_df = aggregate_multi_store_data(
|
full_df = load_new_data()
|
||||||
product_id=product_id,
|
|
||||||
aggregation_method=aggregation_method
|
if training_mode == 'store' and store_id:
|
||||||
)
|
store_df = full_df[full_df['store_id'] == store_id].copy()
|
||||||
training_scope = f"全局聚合({aggregation_method})"
|
if product_id and product_id != 'unknown' and product_id != 'all_products':
|
||||||
|
product_df = store_df[store_df['product_id'] == product_id].copy()
|
||||||
|
training_scope = f"店铺 {store_id} - 产品 {product_id}"
|
||||||
|
else:
|
||||||
|
product_df = store_df.groupby('date').agg({
|
||||||
|
'sales': 'sum', 'weekday': 'first', 'month': 'first',
|
||||||
|
'is_holiday': 'first', 'is_weekend': 'first',
|
||||||
|
'is_promotion': 'first', 'temperature': 'mean'
|
||||||
|
}).reset_index()
|
||||||
|
training_scope = f"店铺 {store_id} (所有药品聚合)"
|
||||||
|
elif training_mode == 'global':
|
||||||
|
product_df = full_df[full_df['product_id'] == product_id].copy()
|
||||||
|
product_df = product_df.groupby('date').agg({
|
||||||
|
'sales': 'sum', 'weekday': 'first', 'month': 'first',
|
||||||
|
'is_holiday': 'first', 'is_weekend': 'first',
|
||||||
|
'is_promotion': 'first', 'temperature': 'mean'
|
||||||
|
}).reset_index()
|
||||||
|
training_scope = f"全局聚合({aggregation_method})"
|
||||||
|
else: # 默认 'product' 模式
|
||||||
|
product_df = full_df[full_df['product_id'] == product_id].copy()
|
||||||
|
training_scope = f"所有店铺中的产品 {product_id}"
|
||||||
else:
|
else:
|
||||||
training_scope = "所有店铺"
|
# 如果传入了product_df,直接使用
|
||||||
|
if training_mode == 'store' and store_id:
|
||||||
|
training_scope = f"店铺 {store_id}"
|
||||||
|
elif training_mode == 'global':
|
||||||
|
training_scope = f"全局聚合({aggregation_method})"
|
||||||
|
else:
|
||||||
|
training_scope = "所有店铺"
|
||||||
|
|
||||||
if product_df.empty:
|
if product_df.empty:
|
||||||
raise ValueError(f"产品 {product_id} 没有可用的销售数据")
|
raise ValueError(f"产品 {product_id} 没有可用的销售数据")
|
||||||
@ -106,7 +134,11 @@ def train_product_model_with_transformer(
|
|||||||
raise ValueError(error_msg)
|
raise ValueError(error_msg)
|
||||||
|
|
||||||
product_df = product_df.sort_values('date')
|
product_df = product_df.sort_values('date')
|
||||||
product_name = product_df['product_name'].iloc[0]
|
# 兼容性处理:新数据可能没有 product_name 列
|
||||||
|
if 'product_name' in product_df.columns and not product_df['product_name'].empty:
|
||||||
|
product_name = product_df['product_name'].iloc[0]
|
||||||
|
else:
|
||||||
|
product_name = f"产品 {product_id}"
|
||||||
|
|
||||||
print(f"[Transformer] 训练产品 '{product_name}' (ID: {product_id}) 的销售预测模型", flush=True)
|
print(f"[Transformer] 训练产品 '{product_name}' (ID: {product_id}) 的销售预测模型", flush=True)
|
||||||
print(f"[Device] 使用设备: {DEVICE}", flush=True)
|
print(f"[Device] 使用设备: {DEVICE}", flush=True)
|
||||||
|
@ -1,424 +0,0 @@
|
|||||||
"""
|
|
||||||
多店铺销售预测系统 - 数据处理工具函数
|
|
||||||
支持多店铺数据的加载、过滤和处理
|
|
||||||
"""
|
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
import os
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
from typing import Optional, List, Tuple, Dict, Any
|
|
||||||
from core.config import DEFAULT_DATA_PATH
|
|
||||||
|
|
||||||
def load_multi_store_data(file_path: str = None,
|
|
||||||
store_id: Optional[str] = None,
|
|
||||||
product_id: Optional[str] = None,
|
|
||||||
start_date: Optional[str] = None,
|
|
||||||
end_date: Optional[str] = None) -> pd.DataFrame:
|
|
||||||
"""
|
|
||||||
加载多店铺销售数据,支持按店铺、产品、时间范围过滤
|
|
||||||
|
|
||||||
参数:
|
|
||||||
file_path: 数据文件路径 (支持 .csv, .xlsx, .parquet)。如果为None,则使用config中定义的默认路径。
|
|
||||||
store_id: 店铺ID,为None时返回所有店铺数据
|
|
||||||
product_id: 产品ID,为None时返回所有产品数据
|
|
||||||
start_date: 开始日期 (YYYY-MM-DD)
|
|
||||||
end_date: 结束日期 (YYYY-MM-DD)
|
|
||||||
|
|
||||||
返回:
|
|
||||||
DataFrame: 过滤后的销售数据
|
|
||||||
"""
|
|
||||||
|
|
||||||
# 如果未提供文件路径,则使用配置文件中的默认路径
|
|
||||||
if file_path is None:
|
|
||||||
file_path = DEFAULT_DATA_PATH
|
|
||||||
|
|
||||||
if not os.path.exists(file_path):
|
|
||||||
raise FileNotFoundError(f"数据文件不存在: {file_path}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
if file_path.endswith('.csv'):
|
|
||||||
df = pd.read_csv(file_path)
|
|
||||||
elif file_path.endswith('.xlsx'):
|
|
||||||
df = pd.read_excel(file_path)
|
|
||||||
elif file_path.endswith('.parquet'):
|
|
||||||
df = pd.read_parquet(file_path)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"不支持的文件格式: {file_path}")
|
|
||||||
|
|
||||||
print(f"成功加载数据文件: {file_path}")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"加载文件 {file_path} 失败: {e}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
# 按店铺过滤
|
|
||||||
if store_id:
|
|
||||||
df = df[df['store_id'] == store_id].copy()
|
|
||||||
print(f"按店铺过滤: {store_id}, 剩余记录数: {len(df)}")
|
|
||||||
|
|
||||||
# 按产品过滤
|
|
||||||
if product_id:
|
|
||||||
df = df[df['product_id'] == product_id].copy()
|
|
||||||
print(f"按产品过滤: {product_id}, 剩余记录数: {len(df)}")
|
|
||||||
|
|
||||||
# 标准化列名和数据类型
|
|
||||||
df = standardize_column_names(df)
|
|
||||||
|
|
||||||
# 在标准化之后进行时间范围过滤
|
|
||||||
if start_date:
|
|
||||||
try:
|
|
||||||
start_date_dt = pd.to_datetime(start_date)
|
|
||||||
# 确保比较是在datetime对象之间
|
|
||||||
if 'date' in df.columns:
|
|
||||||
df = df[df['date'] >= start_date_dt].copy()
|
|
||||||
print(f"开始日期过滤: {start_date_dt}, 剩余记录数: {len(df)}")
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
print(f"警告: 无效的开始日期格式 '{start_date}',已忽略。")
|
|
||||||
|
|
||||||
if end_date:
|
|
||||||
try:
|
|
||||||
end_date_dt = pd.to_datetime(end_date)
|
|
||||||
# 确保比较是在datetime对象之间
|
|
||||||
if 'date' in df.columns:
|
|
||||||
df = df[df['date'] <= end_date_dt].copy()
|
|
||||||
print(f"结束日期过滤: {end_date_dt}, 剩余记录数: {len(df)}")
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
print(f"警告: 无效的结束日期格式 '{end_date}',已忽略。")
|
|
||||||
|
|
||||||
if len(df) == 0:
|
|
||||||
print("警告: 过滤后没有数据")
|
|
||||||
|
|
||||||
return df
|
|
||||||
|
|
||||||
def standardize_column_names(df: pd.DataFrame) -> pd.DataFrame:
|
|
||||||
"""
|
|
||||||
标准化列名以匹配训练代码和API期望的格式
|
|
||||||
|
|
||||||
参数:
|
|
||||||
df: 原始DataFrame
|
|
||||||
|
|
||||||
返回:
|
|
||||||
DataFrame: 标准化列名后的DataFrame
|
|
||||||
"""
|
|
||||||
df = df.copy()
|
|
||||||
|
|
||||||
# 定义列名映射并强制重命名
|
|
||||||
rename_map = {
|
|
||||||
'sales_quantity': 'sales', # 修复:匹配原始列名
|
|
||||||
'temperature_2m_mean': 'temperature', # 新增:处理温度列
|
|
||||||
'dayofweek': 'weekday' # 修复:匹配原始列名
|
|
||||||
}
|
|
||||||
df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns}, inplace=True)
|
|
||||||
|
|
||||||
# 确保date列是datetime类型
|
|
||||||
if 'date' in df.columns:
|
|
||||||
df['date'] = pd.to_datetime(df['date'], errors='coerce')
|
|
||||||
df.dropna(subset=['date'], inplace=True) # 移除无法解析的日期行
|
|
||||||
else:
|
|
||||||
# 如果没有date列,无法继续,返回空DataFrame
|
|
||||||
return pd.DataFrame()
|
|
||||||
|
|
||||||
# 计算 sales_amount
|
|
||||||
# 由于没有price列,sales_amount的计算逻辑需要调整或移除
|
|
||||||
# 这里我们注释掉它,因为原始数据中已有sales_amount
|
|
||||||
# if 'sales_amount' not in df.columns and 'sales' in df.columns and 'price' in df.columns:
|
|
||||||
# # 先确保sales和price是数字
|
|
||||||
# df['sales'] = pd.to_numeric(df['sales'], errors='coerce')
|
|
||||||
# df['price'] = pd.to_numeric(df['price'], errors='coerce')
|
|
||||||
# df['sales_amount'] = df['sales'] * df['price']
|
|
||||||
|
|
||||||
# 创建缺失的特征列
|
|
||||||
if 'weekday' not in df.columns:
|
|
||||||
df['weekday'] = df['date'].dt.dayofweek
|
|
||||||
|
|
||||||
if 'month' not in df.columns:
|
|
||||||
df['month'] = df['date'].dt.month
|
|
||||||
|
|
||||||
# 添加缺失的元数据列
|
|
||||||
meta_columns = {
|
|
||||||
'store_name': 'Unknown Store',
|
|
||||||
'store_location': 'Unknown Location',
|
|
||||||
'store_type': 'Unknown',
|
|
||||||
'product_name': 'Unknown Product',
|
|
||||||
'product_category': 'Unknown Category'
|
|
||||||
}
|
|
||||||
for col, default in meta_columns.items():
|
|
||||||
if col not in df.columns:
|
|
||||||
df[col] = default
|
|
||||||
|
|
||||||
# 添加缺失的布尔特征列
|
|
||||||
default_features = {
|
|
||||||
'is_holiday': False,
|
|
||||||
'is_weekend': None,
|
|
||||||
'is_promotion': False,
|
|
||||||
'temperature': 20.0
|
|
||||||
}
|
|
||||||
|
|
||||||
for feature, default_value in default_features.items():
|
|
||||||
if feature not in df.columns:
|
|
||||||
if feature == 'is_weekend':
|
|
||||||
df['is_weekend'] = df['weekday'].isin([5, 6])
|
|
||||||
else:
|
|
||||||
df[feature] = default_value
|
|
||||||
|
|
||||||
# 确保数值类型正确
|
|
||||||
numeric_columns = ['sales', 'sales_amount', 'weekday', 'month', 'temperature']
|
|
||||||
for col in numeric_columns:
|
|
||||||
if col in df.columns:
|
|
||||||
df[col] = pd.to_numeric(df[col], errors='coerce')
|
|
||||||
|
|
||||||
# 确保布尔类型正确
|
|
||||||
boolean_columns = ['is_holiday', 'is_weekend', 'is_promotion']
|
|
||||||
for col in boolean_columns:
|
|
||||||
if col in df.columns:
|
|
||||||
df[col] = df[col].astype(bool)
|
|
||||||
|
|
||||||
print(f"数据标准化完成,可用特征列: {[col for col in ['sales', 'weekday', 'month', 'is_holiday', 'is_weekend', 'is_promotion', 'temperature'] if col in df.columns]}")
|
|
||||||
|
|
||||||
return df
|
|
||||||
|
|
||||||
def get_available_stores(file_path: str = None) -> List[Dict[str, Any]]:
|
|
||||||
"""
|
|
||||||
获取可用的店铺列表
|
|
||||||
|
|
||||||
参数:
|
|
||||||
file_path: 数据文件路径
|
|
||||||
|
|
||||||
返回:
|
|
||||||
List[Dict]: 店铺信息列表
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
df = load_multi_store_data(file_path)
|
|
||||||
|
|
||||||
if 'store_id' not in df.columns:
|
|
||||||
print("数据文件中缺少 'store_id' 列")
|
|
||||||
return []
|
|
||||||
|
|
||||||
# 智能地获取店铺信息,即使某些列缺失
|
|
||||||
store_info = []
|
|
||||||
|
|
||||||
# 使用drop_duplicates获取唯一的店铺组合
|
|
||||||
stores_df = df.drop_duplicates(subset=['store_id'])
|
|
||||||
|
|
||||||
for _, row in stores_df.iterrows():
|
|
||||||
store_info.append({
|
|
||||||
'store_id': row['store_id'],
|
|
||||||
'store_name': row.get('store_name', f"店铺 {row['store_id']}"),
|
|
||||||
'location': row.get('store_location', '未知位置'),
|
|
||||||
'type': row.get('store_type', '标准'),
|
|
||||||
'opening_date': row.get('opening_date', '未知'),
|
|
||||||
})
|
|
||||||
|
|
||||||
return store_info
|
|
||||||
except Exception as e:
|
|
||||||
print(f"获取店铺列表失败: {e}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
def get_available_products(file_path: str = None,
|
|
||||||
store_id: Optional[str] = None) -> List[Dict[str, Any]]:
|
|
||||||
"""
|
|
||||||
获取可用的产品列表
|
|
||||||
|
|
||||||
参数:
|
|
||||||
file_path: 数据文件路径
|
|
||||||
store_id: 店铺ID,为None时返回所有产品
|
|
||||||
|
|
||||||
返回:
|
|
||||||
List[Dict]: 产品信息列表
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
df = load_multi_store_data(file_path, store_id=store_id)
|
|
||||||
|
|
||||||
# 获取唯一产品信息
|
|
||||||
product_columns = ['product_id', 'product_name']
|
|
||||||
if 'product_category' in df.columns:
|
|
||||||
product_columns.append('product_category')
|
|
||||||
if 'unit_price' in df.columns:
|
|
||||||
product_columns.append('unit_price')
|
|
||||||
|
|
||||||
products = df[product_columns].drop_duplicates()
|
|
||||||
|
|
||||||
return products.to_dict('records')
|
|
||||||
except Exception as e:
|
|
||||||
print(f"获取产品列表失败: {e}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
def get_store_product_sales_data(store_id: str,
|
|
||||||
product_id: str,
|
|
||||||
file_path: str = None) -> pd.DataFrame:
|
|
||||||
"""
|
|
||||||
获取特定店铺和产品的销售数据,用于模型训练
|
|
||||||
|
|
||||||
参数:
|
|
||||||
file_path: 数据文件路径
|
|
||||||
store_id: 店铺ID
|
|
||||||
product_id: 产品ID
|
|
||||||
|
|
||||||
返回:
|
|
||||||
DataFrame: 处理后的销售数据,包含模型需要的特征
|
|
||||||
"""
|
|
||||||
# 加载数据
|
|
||||||
df = load_multi_store_data(file_path, store_id=store_id, product_id=product_id)
|
|
||||||
|
|
||||||
if len(df) == 0:
|
|
||||||
raise ValueError(f"没有找到店铺 {store_id} 产品 {product_id} 的销售数据")
|
|
||||||
|
|
||||||
# 确保数据按日期排序
|
|
||||||
df = df.sort_values('date').copy()
|
|
||||||
|
|
||||||
# 数据标准化已在load_multi_store_data中完成
|
|
||||||
# 验证必要的列是否存在
|
|
||||||
required_columns = ['sales', 'price', 'weekday', 'month', 'is_holiday', 'is_weekend', 'is_promotion', 'temperature']
|
|
||||||
missing_columns = [col for col in required_columns if col not in df.columns]
|
|
||||||
|
|
||||||
if missing_columns:
|
|
||||||
print(f"警告: 数据标准化后仍缺少列 {missing_columns}")
|
|
||||||
raise ValueError(f"无法获取完整的特征数据,缺少列: {missing_columns}")
|
|
||||||
|
|
||||||
# 定义模型训练所需的所有列(特征 + 目标)
|
|
||||||
final_columns = [
|
|
||||||
'date', 'sales', 'product_id', 'product_name', 'store_id', 'store_name',
|
|
||||||
'weekday', 'month', 'is_holiday', 'is_weekend', 'is_promotion', 'temperature'
|
|
||||||
]
|
|
||||||
|
|
||||||
# 筛选出DataFrame中实际存在的列
|
|
||||||
existing_columns = [col for col in final_columns if col in df.columns]
|
|
||||||
|
|
||||||
# 返回只包含这些必需列的DataFrame
|
|
||||||
return df[existing_columns]
|
|
||||||
|
|
||||||
def aggregate_multi_store_data(product_id: Optional[str] = None,
|
|
||||||
store_id: Optional[str] = None,
|
|
||||||
aggregation_method: str = 'sum',
|
|
||||||
file_path: str = None) -> pd.DataFrame:
|
|
||||||
"""
|
|
||||||
聚合销售数据,可按产品(全局)或按店铺(所有产品)
|
|
||||||
|
|
||||||
参数:
|
|
||||||
file_path: 数据文件路径
|
|
||||||
product_id: 产品ID (用于全局模型)
|
|
||||||
store_id: 店铺ID (用于店铺聚合模型)
|
|
||||||
aggregation_method: 聚合方法 ('sum', 'mean', 'median')
|
|
||||||
|
|
||||||
返回:
|
|
||||||
DataFrame: 聚合后的销售数据
|
|
||||||
"""
|
|
||||||
# 根据是全局聚合、店铺聚合还是真正全局聚合来加载数据
|
|
||||||
if store_id:
|
|
||||||
# 店铺聚合:加载该店铺的所有数据
|
|
||||||
df = load_multi_store_data(file_path, store_id=store_id)
|
|
||||||
if len(df) == 0:
|
|
||||||
raise ValueError(f"没有找到店铺 {store_id} 的销售数据")
|
|
||||||
grouping_entity = f"店铺 {store_id}"
|
|
||||||
elif product_id:
|
|
||||||
# 按产品聚合:加载该产品在所有店铺的数据
|
|
||||||
df = load_multi_store_data(file_path, product_id=product_id)
|
|
||||||
if len(df) == 0:
|
|
||||||
raise ValueError(f"没有找到产品 {product_id} 的销售数据")
|
|
||||||
grouping_entity = f"产品 {product_id}"
|
|
||||||
else:
|
|
||||||
# 真正全局聚合:加载所有数据
|
|
||||||
df = load_multi_store_data(file_path)
|
|
||||||
if len(df) == 0:
|
|
||||||
raise ValueError("数据文件为空,无法进行全局聚合")
|
|
||||||
grouping_entity = "所有产品"
|
|
||||||
|
|
||||||
# 按日期聚合(使用标准化后的列名)
|
|
||||||
agg_dict = {}
|
|
||||||
if aggregation_method == 'sum':
|
|
||||||
agg_dict = {
|
|
||||||
'sales': 'sum', # 标准化后的销量列
|
|
||||||
'sales_amount': 'sum',
|
|
||||||
'price': 'mean' # 标准化后的价格列,取平均值
|
|
||||||
}
|
|
||||||
elif aggregation_method == 'mean':
|
|
||||||
agg_dict = {
|
|
||||||
'sales': 'mean',
|
|
||||||
'sales_amount': 'mean',
|
|
||||||
'price': 'mean'
|
|
||||||
}
|
|
||||||
elif aggregation_method == 'median':
|
|
||||||
agg_dict = {
|
|
||||||
'sales': 'median',
|
|
||||||
'sales_amount': 'median',
|
|
||||||
'price': 'median'
|
|
||||||
}
|
|
||||||
|
|
||||||
# 确保列名存在
|
|
||||||
available_cols = df.columns.tolist()
|
|
||||||
agg_dict = {k: v for k, v in agg_dict.items() if k in available_cols}
|
|
||||||
|
|
||||||
# 聚合数据
|
|
||||||
aggregated_df = df.groupby('date').agg(agg_dict).reset_index()
|
|
||||||
|
|
||||||
# 获取产品信息(取第一个店铺的信息)
|
|
||||||
product_info = df[['product_id', 'product_name', 'product_category']].iloc[0]
|
|
||||||
for col, val in product_info.items():
|
|
||||||
aggregated_df[col] = val
|
|
||||||
|
|
||||||
# 添加店铺信息标识为全局
|
|
||||||
aggregated_df['store_id'] = 'GLOBAL'
|
|
||||||
aggregated_df['store_name'] = f'全部店铺-{aggregation_method.upper()}'
|
|
||||||
aggregated_df['store_location'] = '全局聚合'
|
|
||||||
aggregated_df['store_type'] = 'global'
|
|
||||||
|
|
||||||
# 对聚合后的数据进行标准化(添加缺失的特征列)
|
|
||||||
aggregated_df = aggregated_df.sort_values('date').copy()
|
|
||||||
aggregated_df = standardize_column_names(aggregated_df)
|
|
||||||
|
|
||||||
# 定义模型训练所需的所有列(特征 + 目标)
|
|
||||||
final_columns = [
|
|
||||||
'date', 'sales', 'product_id', 'product_name', 'store_id', 'store_name',
|
|
||||||
'weekday', 'month', 'is_holiday', 'is_weekend', 'is_promotion', 'temperature'
|
|
||||||
]
|
|
||||||
|
|
||||||
# 筛选出DataFrame中实际存在的列
|
|
||||||
existing_columns = [col for col in final_columns if col in aggregated_df.columns]
|
|
||||||
|
|
||||||
# 返回只包含这些必需列的DataFrame
|
|
||||||
return aggregated_df[existing_columns]
|
|
||||||
|
|
||||||
def get_sales_statistics(file_path: str = None,
|
|
||||||
store_id: Optional[str] = None,
|
|
||||||
product_id: Optional[str] = None) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
获取销售数据统计信息
|
|
||||||
|
|
||||||
参数:
|
|
||||||
file_path: 数据文件路径
|
|
||||||
store_id: 店铺ID
|
|
||||||
product_id: 产品ID
|
|
||||||
|
|
||||||
返回:
|
|
||||||
Dict: 统计信息
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
df = load_multi_store_data(file_path, store_id=store_id, product_id=product_id)
|
|
||||||
|
|
||||||
if len(df) == 0:
|
|
||||||
return {'error': '没有数据'}
|
|
||||||
|
|
||||||
stats = {
|
|
||||||
'total_records': len(df),
|
|
||||||
'date_range': {
|
|
||||||
'start': df['date'].min().strftime('%Y-%m-%d'),
|
|
||||||
'end': df['date'].max().strftime('%Y-%m-%d')
|
|
||||||
},
|
|
||||||
'stores': df['store_id'].nunique(),
|
|
||||||
'products': df['product_id'].nunique(),
|
|
||||||
'total_sales_amount': float(df['sales_amount'].sum()) if 'sales_amount' in df.columns else 0,
|
|
||||||
'total_quantity': int(df['quantity_sold'].sum()) if 'quantity_sold' in df.columns else 0,
|
|
||||||
'avg_daily_sales': float(df.groupby('date')['quantity_sold'].sum().mean()) if 'quantity_sold' in df.columns else 0
|
|
||||||
}
|
|
||||||
|
|
||||||
return stats
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
return {'error': str(e)}
|
|
||||||
|
|
||||||
# 向后兼容的函数
|
|
||||||
def load_data(file_path=None, store_id=None):
|
|
||||||
"""
|
|
||||||
向后兼容的数据加载函数
|
|
||||||
"""
|
|
||||||
return load_multi_store_data(file_path, store_id=store_id)
|
|
86
server/utils/new_data_loader.py
Normal file
86
server/utils/new_data_loader.py
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import os
|
||||||
|
|
||||||
|
def load_new_data(file_path='data/old_5shops_50skus.parquet'):
|
||||||
|
"""
|
||||||
|
加载并适配新的Parquet数据文件,为现有系统提供兼容的数据格式。
|
||||||
|
|
||||||
|
核心原则:
|
||||||
|
1. 保证新数据的完整性,不丢弃任何原始特征。
|
||||||
|
2. 优先适配新数据,通过重命名和创建代理列来兼容旧代码。
|
||||||
|
|
||||||
|
参数:
|
||||||
|
file_path (str): 新数据文件的路径。
|
||||||
|
|
||||||
|
返回:
|
||||||
|
pandas.DataFrame: 经过适配处理的、包含所有原始特征的DataFrame。
|
||||||
|
"""
|
||||||
|
if not os.path.exists(file_path):
|
||||||
|
raise FileNotFoundError(f"数据文件不存在: {file_path}")
|
||||||
|
|
||||||
|
print(f"正在从 {file_path} 加载新数据...")
|
||||||
|
df = pd.read_parquet(file_path)
|
||||||
|
print("数据加载完成,开始进行适配处理...")
|
||||||
|
|
||||||
|
# 创建一个副本以进行修改,保留原始df的纯净
|
||||||
|
df_adapted = df.copy()
|
||||||
|
|
||||||
|
# --- 1. 列名映射 (适配旧代码的命名习惯) ---
|
||||||
|
# 步骤1.1: 安全地删除新数据中多余的 'date' 列,以 'kdrq' 为准
|
||||||
|
if 'date' in df_adapted.columns and 'kdrq' in df.columns:
|
||||||
|
df_adapted.drop(columns=['date'], inplace=True)
|
||||||
|
print("已删除新数据中多余的 'date' 列,以 'kdrq' 为准。")
|
||||||
|
|
||||||
|
rename_map = {
|
||||||
|
'subbh': 'store_id',
|
||||||
|
'hh': 'product_id',
|
||||||
|
'kdrq': 'date', # 现在可以安全地将 kdrq 重命名为 date
|
||||||
|
'net_sales_quantity': 'sales', # 将目标变量映射为 'sales'
|
||||||
|
'temperature_2m_mean': 'temperature',
|
||||||
|
'day_of_week': 'weekday'
|
||||||
|
}
|
||||||
|
df_adapted.rename(columns=rename_map, inplace=True)
|
||||||
|
print(f"已完成列名映射: {list(rename_map.keys())} -> {list(rename_map.values())}")
|
||||||
|
|
||||||
|
# --- 2. 数据类型转换 ---
|
||||||
|
# 将 'date' 列转换为标准的datetime对象
|
||||||
|
df_adapted['date'] = pd.to_datetime(df_adapted['date'])
|
||||||
|
print("已将 'date' 列转换为 datetime 类型。")
|
||||||
|
|
||||||
|
# --- 3. 关键特征工程 (创建代理列) ---
|
||||||
|
# 现有模型依赖 'is_promotion' 和 'is_weekend' 特征。
|
||||||
|
# 'is_weekend' 在新数据中已存在,无需处理。
|
||||||
|
# 'is_promotion' 在新数据中不存在,创建一个默认值为0的代理列。
|
||||||
|
if 'is_promotion' not in df_adapted.columns:
|
||||||
|
df_adapted['is_promotion'] = 0
|
||||||
|
print("创建了代理列 'is_promotion' 并填充默认值 0。")
|
||||||
|
|
||||||
|
# 确保 'month' 列存在,如果不存在则从日期中提取
|
||||||
|
if 'month' not in df_adapted.columns and 'date' in df_adapted.columns:
|
||||||
|
df_adapted['month'] = df_adapted['date'].dt.month
|
||||||
|
print("从 'date' 列中提取并创建了 'month' 列。")
|
||||||
|
|
||||||
|
print("数据适配处理完成。")
|
||||||
|
|
||||||
|
# 返回包含所有列的适配后DataFrame
|
||||||
|
return df_adapted
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# 用于直接运行此脚本进行测试
|
||||||
|
print("--- 测试数据加载器 ---")
|
||||||
|
try:
|
||||||
|
adapted_df = load_new_data()
|
||||||
|
print("\n--- 适配后数据信息 ---")
|
||||||
|
adapted_df.info()
|
||||||
|
|
||||||
|
print("\n--- 检查关键列 ---")
|
||||||
|
key_cols = [
|
||||||
|
'store_id', 'product_id', 'date', 'sales',
|
||||||
|
'temperature', 'weekday', 'is_promotion', 'month'
|
||||||
|
]
|
||||||
|
print(adapted_df[key_cols].head())
|
||||||
|
|
||||||
|
print(f"\n测试成功!适配后的DataFrame包含 {len(adapted_df.columns)} 列。")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\n测试失败: {e}")
|
55
temp_data_analysis.py
Normal file
55
temp_data_analysis.py
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import os
|
||||||
|
|
||||||
|
def analyze_parquet_files():
|
||||||
|
"""
|
||||||
|
分析两个Parquet数据文件的结构差异。
|
||||||
|
"""
|
||||||
|
data_path = 'data'
|
||||||
|
current_data_file = os.path.join(data_path, 'timeseries_training_data_sample_10s50p.parquet')
|
||||||
|
new_data_file = os.path.join(data_path, 'old_5shops_50skus.parquet')
|
||||||
|
|
||||||
|
print("="*50)
|
||||||
|
print("数据文件差异分析报告")
|
||||||
|
print("="*50)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# --- 分析当前数据文件 ---
|
||||||
|
print(f"\n--- 1. 分析当前数据: {current_data_file} ---\n")
|
||||||
|
if os.path.exists(current_data_file):
|
||||||
|
df_current = pd.read_parquet(current_data_file)
|
||||||
|
print("【列名和数据类型】:")
|
||||||
|
df_current.info(verbose=False)
|
||||||
|
print("\n【前5行样本数据】:")
|
||||||
|
print(df_current.head())
|
||||||
|
print(f"\n【总行数】: {len(df_current)}")
|
||||||
|
print(f"【唯一店铺数】: {df_current['store_id'].nunique()}")
|
||||||
|
print(f"【唯一商品数】: {df_current['product_id'].nunique()}")
|
||||||
|
else:
|
||||||
|
print(f"错误: 文件不存在 {current_data_file}")
|
||||||
|
|
||||||
|
print("\n" + "-"*40 + "\n")
|
||||||
|
|
||||||
|
# --- 分析新数据文件 ---
|
||||||
|
print(f"\n--- 2. 分析新数据: {new_data_file} ---\n")
|
||||||
|
if os.path.exists(new_data_file):
|
||||||
|
df_new = pd.read_parquet(new_data_file)
|
||||||
|
print("【列名和数据类型 (仅显示部分)】:")
|
||||||
|
df_new.info(verbose=True, max_cols=10, show_counts=True) # 显示更详细的信息
|
||||||
|
print("\n【所有列名列表】:")
|
||||||
|
print(df_new.columns.tolist())
|
||||||
|
print("\n【前5行样本数据 (部分列)】:")
|
||||||
|
# 选择一些关键列进行展示
|
||||||
|
display_cols = ['subbh', 'hh', 'kdrq', 'net_sales_quantity', 'is_weekend', 'sales_quantity_rolling_mean_7d', 'province', 'temperature_2m_mean', 'brand_encoded']
|
||||||
|
print(df_new[display_cols].head())
|
||||||
|
print(f"\n【总行数】: {len(df_new)}")
|
||||||
|
print(f"【唯一店铺数 (subbh)】: {df_new['subbh'].nunique()}")
|
||||||
|
print(f"【唯一商品数 (hh)】: {df_new['hh'].nunique()}")
|
||||||
|
else:
|
||||||
|
print(f"错误: 文件不存在 {new_data_file}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\n分析过程中出现错误: {e}")
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
analyze_parquet_files()
|
Loading…
x
Reference in New Issue
Block a user