数据源更换

2025-07-15 09:40:37 +08:00
12 changed files with 122 additions and 46 deletions
--- a/server/api.py
+++ b/server/api.py
@ -160,6 +160,7 @@ def train_store_model(store_id, model_type, epochs=50, product_scope='all', prod
            
            # 读取店铺所有数据，找到第一个有数据的药品
            try:
+                from utils.multi_store_data_utils import load_multi_store_data
                from utils.multi_store_data_utils import load_multi_store_data
                df = load_multi_store_data()
                store_products = df[df['store_id'] == store_id]['product_id'].unique()
@ -209,6 +210,7 @@ def train_global_model(model_type, epochs=50, training_scope='all_stores_all_pro
        
        # 读取数据
        from utils.multi_store_data_utils import load_multi_store_data
+        from utils.multi_store_data_utils import load_multi_store_data
        df = load_multi_store_data()
        
        # 根据训练范围过滤数据
@ -687,6 +689,7 @@ def get_products():
 })
 def get_product(product_id):
    try:
+        from utils.multi_store_data_utils import load_multi_store_data
        from utils.multi_store_data_utils import load_multi_store_data
        df = load_multi_store_data(product_id=product_id)
        
@ -1715,6 +1718,7 @@ def compare_predictions():
        
        # 获取产品名称
        from utils.multi_store_data_utils import load_multi_store_data
+        from utils.multi_store_data_utils import load_multi_store_data
        df = load_multi_store_data()
        product_df = df[df['product_id'] == product_id]
        
@ -1871,6 +1875,7 @@ def analyze_prediction():
        
        # 获取产品特征数据
        from utils.multi_store_data_utils import load_multi_store_data
+        from utils.multi_store_data_utils import load_multi_store_data
        df = load_multi_store_data()
        product_df = df[df['product_id'] == product_id].sort_values('date')
        
@ -2693,6 +2698,7 @@ def get_product_name(product_id):
    try:
        # 从Excel文件中查找产品名称
        from utils.multi_store_data_utils import load_multi_store_data
+        from utils.multi_store_data_utils import load_multi_store_data
        df = load_multi_store_data()
        product_df = df[df['product_id'] == product_id]
        if not product_df.empty:
@ -2755,6 +2761,7 @@ def run_prediction(model_type, product_id, model_id, future_days, start_date, ve
            try:
                # 读取原始数据
                from utils.multi_store_data_utils import load_multi_store_data
+                from utils.multi_store_data_utils import load_multi_store_data
                df = load_multi_store_data()
                product_df = df[df['product_id'] == product_id].copy()
                
@ -4288,6 +4295,7 @@ def get_global_training_stats():
        
        # 读取数据
        from utils.multi_store_data_utils import load_multi_store_data
+        from utils.multi_store_data_utils import load_multi_store_data
        df = load_multi_store_data()
        
        # 根据训练范围过滤数据
--- a/server/models/pycache/init.cpython-313.pyc
+++ b/server/models/pycache/init.cpython-313.pyc
--- a/server/models/pycache/data_utils.cpython-313.pyc
+++ b/server/models/pycache/data_utils.cpython-313.pyc
--- a/server/models/pycache/kan_model.cpython-313.pyc
+++ b/server/models/pycache/kan_model.cpython-313.pyc
--- a/server/models/pycache/mlstm_model.cpython-313.pyc
+++ b/server/models/pycache/mlstm_model.cpython-313.pyc
--- a/server/models/pycache/model_manager.cpython-313.pyc
+++ b/server/models/pycache/model_manager.cpython-313.pyc
--- a/server/models/pycache/optimized_kan_forecaster.cpython-313.pyc
+++ b/server/models/pycache/optimized_kan_forecaster.cpython-313.pyc
--- a/server/models/pycache/slstm_model.cpython-313.pyc
+++ b/server/models/pycache/slstm_model.cpython-313.pyc
--- a/server/models/pycache/transformer_model.cpython-313.pyc
+++ b/server/models/pycache/transformer_model.cpython-313.pyc
--- a/server/models/pycache/utils.cpython-313.pyc
+++ b/server/models/pycache/utils.cpython-313.pyc
--- a/server/utils/multi_store_data_utils.py
+++ b/server/utils/multi_store_data_utils.py
@ -9,62 +9,67 @@ import os
 from datetime import datetime, timedelta
 from typing import Optional, List, Tuple, Dict, Any

-def load_multi_store_data(file_path: str = 'data/timeseries_training_data_sample_10s50p.parquet',
+def load_multi_store_data(file_path: Optional[str] = None,
                         store_id: Optional[str] = None,
                         product_id: Optional[str] = None,
                         start_date: Optional[str] = None,
                         end_date: Optional[str] = None) -> pd.DataFrame:
    """
-    加载多店铺销售数据，支持按店铺、产品、时间范围过滤
-    
+    加载多店铺销售数据，支持按店铺、产品、时间范围过滤。
+    该函数使用健壮的路径解析，并支持多种备用数据文件。
+
    参数:
-    file_path: 数据文件路径 (支持 .csv, .xlsx, .parquet)
-    store_id: 店铺ID，为None时返回所有店铺数据
-    product_id: 产品ID，为None时返回所有产品数据
-    start_date: 开始日期 (YYYY-MM-DD)
-    end_date: 结束日期 (YYYY-MM-DD)
-    
+    file_path: (可选) 数据文件的具体路径。如果为None，将使用默认的备用文件列表。
+    store_id: 店铺ID，为None时返回所有店铺数据。
+    product_id: 产品ID，为None时返回所有产品数据。
+    start_date: 开始日期 (YYYY-MM-DD)。
+    end_date: 结束日期 (YYYY-MM-DD)。
+
    返回:
-    DataFrame: 过滤后的销售数据
+    DataFrame: 过滤后的销售数据。
    """
-    
-    # 尝试多个可能的文件路径
-    # 获取当前脚本所在的目录
+    # 获取当前脚本所在的目录，并构造项目根目录的绝对路径
    current_dir = os.path.dirname(os.path.abspath(__file__))
-    # 假设项目根目录是 server/utils 的上两级目录
    project_root = os.path.abspath(os.path.join(current_dir, '..', '..'))
-    
-    possible_paths = [
-        file_path, # 相对路径 (如果从根目录运行)
-        os.path.join(project_root, file_path), # 基于项目根目录的绝对路径
-        os.path.join('..', file_path), # 相对路径 (如果从 server 目录运行)
-        os.path.join('server', file_path) # 相对路径 (如果从根目录运行，但路径错误)
+
+    # 定义备用数据文件列表，相对于项目根目录
+    default_files = [
+        'data/timeseries_training_data_sample_10s50p.parquet',
+        'data/pharmacy_sales_multi_store.csv',
+        'data/pharmacy_sales.xlsx'
    ]
-    
+
+    # 如果用户提供了file_path，优先使用它
+    if file_path:
+        possible_paths = [file_path]
+    else:
+        # 否则，使用默认的备用文件列表
+        possible_paths = [os.path.join(project_root, f) for f in default_files]
+
    df = None
    loaded_path = None
    for path in possible_paths:
        try:
-            if not os.path.exists(path):
-                continue
-
-            if path.endswith('.csv'):
-                df = pd.read_csv(path)
-            elif path.endswith('.xlsx'):
-                df = pd.read_excel(path)
-            elif path.endswith('.parquet'):
-                df = pd.read_parquet(path)
-            
-            if df is not None:
-                loaded_path = path
-                print(f"成功加载数据文件: {loaded_path}")
-                break
+            if os.path.exists(path):
+                if path.endswith('.csv'):
+                    df = pd.read_csv(path)
+                elif path.endswith('.xlsx'):
+                    df = pd.read_excel(path)
+                elif path.endswith('.parquet'):
+                    df = pd.read_parquet(path)
+                
+                if df is not None:
+                    loaded_path = path
+                    print(f"成功加载数据文件: {loaded_path}")
+                    break
        except Exception as e:
-            print(f"加载文件 {path} 失败: {e}")
+            print(f"尝试加载文件 {path} 失败: {e}")
            continue
    
    if df is None:
-        raise FileNotFoundError(f"无法找到或加载数据文件，尝试的路径: {possible_paths}")
+        error_message = f"无法在预设路径中找到任何有效的数据文件。尝试的路径: {possible_paths}"
+        print(error_message)
+        raise FileNotFoundError(error_message)
    
    # 按店铺过滤
    if store_id:
--- a/start_api_debug.py
+++ b/start_api_debug.py
@ -6,21 +6,79 @@ import subprocess
 import sys
 import os

+def kill_process_on_port(port):
+    """查找并终止占用指定端口的进程"""
+    if os.name == 'nt':  # Windows
+        try:
+            # 查找占用端口的PID
+            command = f"netstat -aon | findstr :{port}"
+            result = subprocess.check_output(command, shell=True, text=True, stderr=subprocess.DEVNULL)
+            
+            if not result:
+                print(f"端口 {port} 未被占用。")
+                return
+
+            for line in result.strip().split('\n'):
+                parts = line.strip().split()
+                if len(parts) >= 5 and parts[3] == 'LISTENING':
+                    pid = parts[4]
+                    print(f"端口 {port} 被PID {pid} 占用，正在终止...")
+                    # 强制终止进程
+                    kill_command = f"taskkill /F /PID {pid}"
+                    subprocess.run(kill_command, shell=True, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+                    print(f"成功终止PID {pid}。")
+        except subprocess.CalledProcessError:
+            # findstr没找到匹配项时会返回错误码1，这是正常情况
+            print(f"端口 {port} 未被占用。")
+        except Exception as e:
+            print(f"终止进程时出错: {e}")
+    else:  # Linux / macOS
+        try:
+            command = f"lsof -t -i:{port}"
+            result = subprocess.check_output(command, shell=True, text=True, stderr=subprocess.DEVNULL)
+            pids = result.strip().split('\n')
+            for pid in pids:
+                if pid:
+                    print(f"端口 {port} 被PID {pid} 占用，正在终止...")
+                    kill_command = f"kill -9 {pid}"
+                    subprocess.run(kill_command, shell=True, check=True)
+                    print(f"成功终止PID {pid}。")
+        except subprocess.CalledProcessError:
+            print(f"端口 {port} 未被占用。")
+        except Exception as e:
+            print(f"终止进程时出错: {e}")
+
 def start_api_debug():
-    """启动API服务器（调试模式）"""
-    print("启动API服务器（调试模式）...")
+    """启动API服务器（调试模式），并在启动前清理端口"""
+    port = 5000
+    print(f"准备启动API服务器，将首先清理端口 {port}...")
+    print("="*60)
+    
+    # 杀死可能存在的旧进程
+    kill_process_on_port(port)
+    
+    print("\n端口清理完成，准备启动新服务...")
    print("="*60)
    
    # 切换到正确的目录
-    os.chdir(os.path.dirname(__file__))
+    # 脚本的当前目录
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    if os.path.basename(script_dir).lower() == 'server':
+        # 如果在server目录下，切换到上级目录
+        os.chdir(os.path.dirname(script_dir))
+    else:
+        # 否则，假定在项目根目录
+        os.chdir(script_dir)
+
+    print(f"当前工作目录: {os.getcwd()}")
    
    # 启动命令
    cmd = [
-        sys.executable, 
-        "./server/api.py", 
+        sys.executable,
+        "./server/api.py",
        "--debug",
        "--host", "0.0.0.0",
-        "--port", "5000"
+        "--port", str(port)
    ]
    
    print(f"执行命令: {' '.join(cmd)}")
@ -28,11 +86,16 @@ def start_api_debug():
    
    try:
        # 直接运行，输出会实时显示
-        result = subprocess.run(cmd)
-        print(f"API服务器退出，退出码: {result.returncode}")
+        # 使用 Popen 以便更好地控制子进程
+        process = subprocess.Popen(cmd)
+        process.wait() # 等待进程结束
+        print(f"API服务器退出，退出码: {process.returncode}")
        
    except KeyboardInterrupt:
-        print("\n收到中断信号，停止API服务器")
+        print("\n收到中断信号，停止API服务器...")
+        process.terminate() # 确保子进程被终止
+        process.wait()
+        print("服务器已停止。")
    except Exception as e:
        print(f"启动API服务器失败: {e}")