不会再改算法了, 完备

2 years ago · 36b93fcbf0
parent e903ba49d0
commit 36b93fcbf0
5 changed files with 450 additions and 415 deletions
--- a/api_send_evaluate.py
+++ b/api_send_evaluate.py
@ -45,7 +45,7 @@ def classify_features(features_data_list):
    返回:
    dict: 包含分类结果和模型文件信息的字典
    """
-    response = requests.post("http://127.0.0.1:3397/evaluate/", json=features_data_list)
+    response = requests.post("http://p2p1.melulu.top:3397/evaluate/", json=features_data_list)
    if response.status_code == 200:
        results = response.json()
        print("Precision:", results["classification_result"]["precision"])
--- a/api_send_inference.py
+++ b/api_send_inference.py
@ -87,7 +87,7 @@ if __name__ == "__main__":
    features_data_list = [features_data4, features_data2, features_data3, features_data1]

    # 发送 POST 请求
-    response = requests.post("http://127.0.0.1:3397/inference/", json=features_data_list)
+    response = requests.post("http://p2p1.melulu.top:3397/inference/", json=features_data_list)

    if response.status_code == 200:
        # 获取分类结果列表
--- a/config/config.yaml
+++ b/config/config.yaml
@ -8,11 +8,11 @@ batch_size: 16
 learning_rate: 0.001
 nc: 4
 #data_train: train_val # train: 只用train训练，val做验证， infer做测试；train_val: 用train和val做训练，infer做验证， infer做测试；all: 全部训练，全部验证，全部测试（数据先1/5作为infer，剩下的再1/5作为val，剩下的4/5作为训练）
-data_train: train
+data_train: train_val
 early_stop_patience: 50
 gamma: 0.98
 step_size: 10
-experiments_count: 5
+experiments_count: 1

 #---检测和推理配置---#
 # 检测和推理使用模型路径
@ -46,3 +46,79 @@ feature_weights:
  - 0.08
  - 0.12

+#---网络结构---#
+# MLP configuration
+mlp:
+  input_dim: 10  # Number of input features
+  layers:
+    - output_dim: 32
+      activation: relu
+    - output_dim: 128
+      activation: relu
+    - output_dim: 32
+      activation: relu
+  output_dim: 4  # Number of classes
+
+# Transformer configuration
+transformer:
+  d_model: 32  # Reduced embedding dimension
+  nhead: 4  # Reduced number of attention heads
+  num_encoder_layers: 2  # Reduced number of encoder layers
+  num_decoder_layers: 2  # Reduced number of decoder layers
+  dim_feedforward: 128  # Reduced feedforward network dimension
+  dropout: 0.1  # Dropout probability
+  input_dim: 10  # Number of input features
+  output_dim: 4  # Number of classes
+
+#---训练配置备份---#
+# MLP good train param 1
+# #---训练配置---#
+# n_epochs: 150
+# batch_size: 16
+# learning_rate: 0.001
+# nc: 4
+# #data_train: train_val # train: 只用train训练，val做验证， infer做测试；train_val: 用train和val做训练，infer做验证， infer做测试；all: 全部训练，全部验证，全部测试（数据先1/5作为infer，剩下的再1/5作为val，剩下的4/5作为训练）
+# data_train: train_val
+# early_stop_patience: 50
+# gamma: 0.98
+# step_size: 10
+# experiments_count: 1
+
+# MLP good train param 2
+# #---训练配置---#
+# n_epochs: 300
+# batch_size: 8
+# learning_rate: 0.0005
+# nc: 4
+# #data_train: train_val # train: 只用train训练，val做验证， infer做测试；train_val: 用train和val做训练，infer做验证， infer做测试；all: 全部训练，全部验证，全部测试（数据先1/5作为infer，剩下的再1/5作为val，剩下的4/5作为训练）
+# data_train: train_val
+# early_stop_patience: 50
+# gamma: 0.98
+# step_size: 10
+# experiments_count: 1
+
+# Transformer good train param 1
+# #---训练配置---#
+# n_epochs: 150
+# batch_size: 64
+# learning_rate: 0.001
+# nc: 4
+# #data_train: train_val # train: 只用train训练，val做验证， infer做测试；train_val: 用train和val做训练，infer做验证， infer做测试；all: 全部训练，全部验证，全部测试（数据先1/5作为infer，剩下的再1/5作为val，剩下的4/5作为训练）
+# data_train: train_val
+# early_stop_patience: 50
+# gamma: 0.98
+# step_size: 10
+# experiments_count: 1
+
+# Transformer good train param 2
+# #---训练配置---#
+# n_epochs: 300
+# batch_size: 8
+# learning_rate: 0.0005
+# nc: 4
+# #data_train: train_val # train: 只用train训练，val做验证， infer做测试；train_val: 用train和val做训练，infer做验证， infer做测试；all: 全部训练，全部验证，全部测试（数据先1/5作为infer，剩下的再1/5作为val，剩下的4/5作为训练）
+# data_train: train_val
+# early_stop_patience: 50
+# gamma: 0.98
+# step_size: 10
+# experiments_count: 1
--- a/psy_api.py
+++ b/psy_api.py
@ -1,31 +1,34 @@
 import os
-import sys
-root_path = os.getcwd()
-sys.path.append(root_path)
-
 import time
 import datetime
-import signal
+import logging
 import uvicorn
-import pandas as pd
+import yaml
+import numpy as np
 from fastapi import FastAPI, Request
 from pydantic import BaseModel
 from typing import List
-from utils.common import train_detect, evaluate_model, inference_model
+import atexit
 from fastapi.middleware.cors import CORSMiddleware
-import logging
-import matplotlib.pyplot as plt
-import argparse
-import numpy as np
-import yaml
-import threading
-import pickle
-from fastapi.responses import FileResponse
 from fastapi.staticfiles import StaticFiles
 from utils.feature_process import create_feature_df, apply_feature_weights, Features
+from utils.common import MLModel

 app = FastAPI()

+# 控制是否打印的宏定义
+PRINT_LOG = True
+
+def log_print(message):
+    logging.info(message)
+    if PRINT_LOG:
+        print(message)
+
+# 保证日志写到文件
+def flush_log():
+    for handler in logging.getLogger().handlers:
+        handler.flush()
+
 # 定义fastapi返回类 inference
 class PredictionResult(BaseModel):
    predictions: list
@ -46,14 +49,16 @@ app.add_middleware(
    allow_headers=["*"],
 )

-# 定义接口
+# 初始化配置文件
+config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "config/config.yaml"))
+
+# 定义训练接口
@app.post("/train/")
-async def classify_features(request: Request, features_list: List[Features]):
+async def train_model(request: Request, features_list: List[Features]):
    # 遍历每个特征对象，并将其添加到 all_features 中
    all_features = create_feature_df(features_list)

    # 读取 YAML 配置文件
-    config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "config/config.yaml"))
    with open(config_path, 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
        feature_names = config['feature_names']
@ -70,54 +75,53 @@ async def classify_features(request: Request, features_list: List[Features]):

    # 训练前设置
    now = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-    data_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"train_feature_label_weighted_{now}.xlsx"))
+    data_path = os.path.abspath(os.path.join(static_dir, f"train_feature_label_weighted_{now}.xlsx"))
    config['data_path'] = data_path
    feature_label_weighted.to_excel(data_path, index=False)

    # 添加模型保存路径
-    model_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"train_model_{now}.pth"))
+    model_path = os.path.abspath(os.path.join(static_dir, f"train_model_{now}.pth"))
    config['model_path'] = model_path

    # 配置日志
-    log_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"train_log_{now}.log"))
+    log_path = os.path.abspath(os.path.join(static_dir, f"train_log_{now}.log"))
    logging.basicConfig(filename=log_path, level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')

    # 配置训练和验证结果图片路径
-    train_process_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"train_progress_img_{now}.png"))
+    train_process_path = os.path.abspath(os.path.join(static_dir, f"train_progress_img_{now}.png"))
    config['train_process_path'] = train_process_path
-    evaluate_result_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"evaluate_result_img_{now}.png"))
+    evaluate_result_path = os.path.abspath(os.path.join(static_dir, f"evaluate_result_img_{now}.png"))
    config['evaluate_result_path'] = evaluate_result_path

-    print("config: ", config)
-    logging.info("config: ", config)
+    log_print("config: " + str(config))

    # 开始训练
+    # 初始化 MLModel 实例
+    ml_model = MLModel(config)
    list_avg_f1 = []
    list_wrong_percentage = []
    list_precision = []
    list_recall = []
    list_f1 = []
-    train_times = 1 if config['data_train']==r'all' else config["experiments_count"]
-    for i in range(train_times):
-        avg_f1, wrong_percentage, precision, recall, f1 = train_detect(config)
+    train_times = 1 if config['data_train'] == 'all' else config["experiments_count"]
+    for _ in range(train_times):
+        avg_f1, wrong_percentage, precision, recall, f1 = ml_model.train_detect()
        list_avg_f1.append(avg_f1)
        list_wrong_percentage.append(wrong_percentage)
        list_precision.append(precision)
        list_recall.append(recall)
        list_f1.append(f1)

-    logging.info(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%")
-    logging.info(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}")
-    logging.info(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}")
-    logging.info(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}")
-    print(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%")
-    print(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}")
-    print(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}")
-    print(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}")
+    log_print(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%")
+    log_print(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}")
+    log_print(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}")
+    log_print(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}")

    end_time = time.time()  # 记录结束时间
-    # 训练结束
-    print("预测耗时:", end_time - start_time, "秒")  # 打印执行时间
+    log_print("预测耗时: " + str(end_time - start_time) + " 秒")  # 打印执行时间
+
+    # 保证日志写到文件
+    atexit.register(flush_log)

    # 返回分类结果和模型文件下载 URL，static不是程序执行路径，而是app.mount的静态文件夹
    model_file_url = f"{request.base_url}train_api/train_model_{now}.pth"
@ -139,14 +143,13 @@ async def classify_features(request: Request, features_list: List[Features]):
        }
    }

-# 定义接口
+# 定义验证接口
@app.post("/evaluate/")
-async def classify_features(request: Request, features_list: List[Features]):
+async def evaluate_model(request: Request, features_list: List[Features]):
    # 遍历每个特征对象，并将其添加到 all_features 中
    all_features = create_feature_df(features_list)

    # 读取 YAML 配置文件
-    config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "config/config.yaml"))
    with open(config_path, 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
        feature_names = config['feature_names']
@ -163,50 +166,35 @@ async def classify_features(request: Request, features_list: List[Features]):

    # 训练前设置
    now = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-    data_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"evaluate_feature_label_weighted_{now}.xlsx"))
+    data_path = os.path.abspath(os.path.join(static_dir, f"evaluate_feature_label_weighted_{now}.xlsx"))
    config['data_path'] = data_path
    feature_label_weighted.to_excel(data_path, index=False)

    # 配置验证结果图片路径
-    evaluate_result_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"evaluate_result_img_{now}.png"))
+    evaluate_result_path = os.path.abspath(os.path.join(static_dir, f"evaluate_result_img_{now}.png"))
    config['evaluate_result_path'] = evaluate_result_path

    # 配置日志
-    log_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"evaluate_log_{now}.log"))
+    log_path = os.path.abspath(os.path.join(static_dir, f"evaluate_log_{now}.log"))
    logging.basicConfig(filename=log_path, level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')

-    # 开始验证
-    list_avg_f1 = []
-    list_wrong_percentage = []
-    list_precision = []
-    list_recall = []
-    list_f1 = []
-
    # 特征和标签
    X = feature_label_weighted[config['feature_names']].values
    y = feature_label_weighted[config['label_name']].values

-    print("config: ", config)
-    logging.info("config: ", config)
-    avg_f1, wrong_percentage, precision, recall, f1 = evaluate_model(config["model_path"], X, y, config)
-    list_avg_f1.append(avg_f1)
-    list_wrong_percentage.append(wrong_percentage)
-    list_precision.append(precision)
-    list_recall.append(recall)
-    list_f1.append(f1)
+    # 初始化 MLModel 实例
+    ml_model = MLModel(config)
+
+    # 加载模型
+    ml_model.load_model()

-    logging.info(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%")
-    logging.info(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}")
-    logging.info(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}")
-    logging.info(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}")
-    print(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%")
-    print(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}")
-    print(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}")
-    print(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}")
+    avg_f1, wrong_percentage, precision, recall, f1 = ml_model.evaluate_model(X, y)

    end_time = time.time()  # 记录结束时间
-    # 训练结束
-    print("预测耗时:", end_time - start_time, "秒")  # 打印执行时间
+    log_print("预测耗时: " + str(end_time - start_time) + " 秒")  # 打印执行时间
+
+    # 保证日志写到文件
+    atexit.register(flush_log)

    # 返回分类结果和模型文件下载 URL，static不是程序执行路径，而是app.mount的静态文件夹
    log_file_url = f"{request.base_url}evaluate_api/evaluate_log_{now}.log"
@ -226,14 +214,13 @@ async def classify_features(request: Request, features_list: List[Features]):
        }
    }

-# 定义接口
+# 定义推理接口
@app.post("/inference/")
-async def classify_features(request: Request, features_list: List[Features]):
+async def inference_model(request: Request, features_list: List[Features]):
    # 遍历每个特征对象，并将其添加到 all_features 中
    all_features = create_feature_df(features_list)

    # 读取 YAML 配置文件
-    config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "config/config.yaml"))
    with open(config_path, 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
        feature_names = config['feature_names']
@ -250,23 +237,32 @@ async def classify_features(request: Request, features_list: List[Features]):

    # 训练前设置
    now = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-    data_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"inference_feature_label_weighted_{now}.xlsx"))
+    data_path = os.path.abspath(os.path.join(static_dir, f"inference_feature_label_weighted_{now}.xlsx"))
    config['data_path'] = data_path
    feature_label_weighted.to_excel(data_path, index=False)

    # 配置日志
-    log_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"inference_log_{now}.log"))
+    log_path = os.path.abspath(os.path.join(static_dir, f"inference_log_{now}.log"))
    logging.basicConfig(filename=log_path, level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')

    # 特征和标签
    X = feature_label_weighted[config['feature_names']].values
-    y = feature_label_weighted[config['label_name']].values

-    predictions = inference_model(config["model_path"], X, y, config)
+    # 初始化 MLModel 实例
+    ml_model = MLModel(config)
+
+    # 加载模型
+    ml_model.load_model()
+
+    predictions = ml_model.inference_model(X)
+
    end_time = time.time()  # 记录结束时间
-    print("预测耗时:", end_time - start_time, "秒")  # 打印执行时间
+    log_print("预测耗时: " + str(end_time - start_time) + " 秒")  # 打印执行时间

-    print("预测结果:", predictions)
+    log_print("预测结果: " + str(predictions))
+
+    # 保证日志写到文件
+    atexit.register(flush_log)

    # 返回预测结果
    return PredictionResult(predictions=predictions)
@ -299,7 +295,5 @@ if __name__ == "__main__":
    # 同级目录下的static文件夹
    app.mount("/train_api", StaticFiles(directory=static_dir_train), name="static_dir_train")
    app.mount("/evaluate_api", StaticFiles(directory=static_dir_evaluate), name="static_dir_evaluate")
-    app.mount("/inference_api", StaticFiles(directory=static_dir_evaluate), name="static_dir_inference")
+    app.mount("/inference_api", StaticFiles(directory=static_dir_inference), name="static_dir_inference")
    uvicorn.run(app, host="0.0.0.0", port=3397, reload=False)
-    ## train evl 功能OK了 差infer就可以了；还有就是做一个模型上传机制；目前为止最好的模型就是model下面那个
-
--- a/utils/common.py
+++ b/utils/common.py
@ -10,33 +10,36 @@ from sklearn.metrics import precision_score, recall_score, f1_score
 from sklearn.utils.class_weight import compute_class_weight
 import logging
 import matplotlib.pyplot as plt
-import argparse

+# 控制是否打印的宏定义
+PRINT_LOG = True

-class MLP(nn.Module):
-    def __init__(self, config):
-        super(MLP, self).__init__()
-        self.model = nn.Sequential(
-            nn.Linear(len(config['feature_names']), 32),
-            nn.ReLU(),
-            nn.Linear(32, 128),
-            nn.ReLU(),
-            nn.Linear(128, 32),
-            nn.ReLU(),
-            nn.Linear(32, config['nc']),
-        )
+def log_print(message):
+    logging.info(message)
+    if PRINT_LOG:
+        print(message)

-    def forward(self, x):
-        return self.model(x)
+class MLModel:
+    def __init__(self, model_config):
+        self.config = model_config
+        self.model = None
+
+    def create_model(self):
+        self.model = MLP(self.config).to(self.config['device'])
+        # self.model = TransformerModel(self.config).to(self.config['device'])
+
+    def load_model(self):
+        self.create_model()
+        self.model.load_state_dict(torch.load(self.config['model_path'], map_location=self.config['device']))

-def load_and_split_data(config):
+    def load_and_split_data(self):
        parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-    file_path = os.path.join(parent_dir, config['data_path'])
+        file_path = os.path.join(parent_dir, self.config['data_path'])

        data = pd.read_excel(file_path)

-    X = data[config['feature_names']].values
-    y = data[config['label_name']].values
+        X = data[self.config['feature_names']].values
+        y = data[self.config['label_name']].values

        skf_outer = StratifiedKFold(n_splits=5, shuffle=True)
        train_index_outer, test_index_outer = next(skf_outer.split(X, y))
@ -50,62 +53,42 @@ def load_and_split_data(config):

        return X, y, X_train_val, y_train_val, X_train, y_train, X_val, y_val, X_infer, y_infer

-def save_model(model_path, best_model):
-    torch.save(best_model, model_path)
-
-def evaluate_model(model_path, X_infer, y_infer, config):
-    # 如果传入的是模型文件路径,则从该路径加载模型
-    if isinstance(model_path, str):
-        model = MLP(config).to(config['device'])
-        model.load_state_dict(torch.load(model_path, map_location=config['device']))  # 加载训练好的模型参数
-    else:
-        model = model_path
-    # infer_data = pd.DataFrame(X_infer, columns=config['feature_names'])
-    # infer_data[config['label_name']] = y_infer
-    # infer_data.to_excel(os.path.join(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")), config['infer_path']), index=False)
-
-    model.eval()
+    def save_model(self, model_path):
+        torch.save(self.model.state_dict(), model_path)

+    def evaluate_model(self, X_infer, y_infer):
+        self.model.eval()
        with torch.no_grad():
-        outputs = model(torch.from_numpy(X_infer).float().to(config['device']))
+            outputs = self.model(torch.from_numpy(X_infer).float().to(self.config['device']))

        _, predictions = torch.max(outputs, 1)
-
-    wrong_indices = np.where(y_infer != predictions.cpu().numpy())[0]
-
-    wrong_count = len(wrong_indices)
+        precision = precision_score(y_infer, predictions.cpu().numpy(), average=None)
+        recall = recall_score(y_infer, predictions.cpu().numpy(), average=None)
+        f1 = f1_score(y_infer, predictions.cpu().numpy(), average=None)
+        wrong_count = len(np.where(y_infer != predictions.cpu().numpy())[0])
        total_count = len(y_infer)
        wrong_percentage = (wrong_count / total_count) * 100

-    print("Infer Result: ")
-    logging.info("Infer Result: ")
-    print("预测错误数量:", wrong_count)
-    print("预测错误占总数量的百分比:", wrong_percentage, "%")
-    print("总数量:", total_count)
+        log_print("Evaluate Result: ")

-    logging.info(f"Prediction errors: {wrong_count}")
-    logging.info(f"Prediction error percentage: {wrong_percentage:.2f}%")
-    logging.info(f"Total samples: {total_count}")
-
-    precision = precision_score(y_infer, predictions.cpu().numpy(), average=None)
-    recall = recall_score(y_infer, predictions.cpu().numpy(), average=None)
-    f1 = f1_score(y_infer, predictions.cpu().numpy(), average=None)
+        log_print(f"Prediction errors: {wrong_count}")
+        log_print(f"Prediction error percentage: {wrong_percentage:.2f}%")
+        log_print(f"Total samples: {total_count}")

        avg_precision = np.mean(precision)
        avg_recall = np.mean(recall)
        avg_f1 = np.mean(f1)

        for i in range(len(precision)):
-        print(f"Class {i} Precision: {precision[i]:.4f}, Recall: {recall[i]:.4f}, F1: {f1[i]:.4f}")
+            log_print(f"Class {i} Precision: {precision[i]:.4f}, Recall: {recall[i]:.4f}, F1: {f1[i]:.4f}")

-    print("精确率:", precision)
-    print("召回率:", recall)
-    print("F1得分:", f1)
-    print("平均精确率:", avg_precision)
-    print("平均召回率:", avg_recall)
-    print("平均F1得分:", avg_f1)
-    print("Infer Result End: ")
-    logging.info("Infer Result End: ")
+        log_print("精确率:" + str(precision))
+        log_print("召回率:" + str(recall))
+        log_print("F1得分:" + str(f1))
+        log_print("平均精确率:" + str(avg_precision))
+        log_print("平均召回率:" + str(avg_recall))
+        log_print("平均F1得分:" + str(avg_f1))
+        log_print("Evaluate Result End: ")

        fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5))
        ax1.bar(np.arange(len(precision)), precision)
@ -116,120 +99,40 @@ def evaluate_model(model_path, X_infer, y_infer, config):
        ax3.set_title('F1 Score')
        # 保存图片
        parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-    evaluate_result_path = os.path.join(parent_dir, config['evaluate_result_path'])
+        evaluate_result_path = os.path.join(parent_dir, self.config['evaluate_result_path'])
        plt.savefig(evaluate_result_path)

-    return avg_f1, wrong_percentage, precision, recall, f1
-
-def inference_model(model_path, X_infer, y_infer, config):
-    # 如果传入的是模型文件路径,则从该路径加载模型
-    if isinstance(model_path, str):
-        model = MLP(config).to(config['device'])
-        model.load_state_dict(torch.load(model_path, map_location=config['device']))  # 加载训练好的模型参数
-    else:
-        model = model_path
-    # infer_data = pd.DataFrame(X_infer, columns=config['feature_names'])
-    # infer_data[config['label_name']] = y_infer
-    # infer_data.to_excel(os.path.join(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")), config['infer_path']), index=False)
-
-    model.eval()
+        return np.mean(f1), wrong_percentage, precision, recall, f1

-    # 推理
+    def inference_model(self, X_infer):
+        self.model.eval()
        with torch.no_grad():
-        outputs = model(torch.from_numpy(X_infer).float().to(config['device']))
+            outputs = self.model(torch.from_numpy(X_infer).float().to(self.config['device']))

-    # 获取预测结果
        _, predictions = torch.max(outputs, 1)
-
-    # 实际类别从1开始，程序类别从0开始
-    predictions += 1
-
-    # 打印预测结果
-    # print("预测结果:", predictions.cpu().numpy())
-    # 返回预测结果
        return predictions.cpu().numpy().tolist()

-def train_detect(config):
-    X, y, X_train_val, y_train_val, X_train, y_train, X_val, y_val, X_infer, y_infer = load_and_split_data(config)
-
-    if config['data_train'] == r'train_val':
-        train_dataset = TensorDataset(torch.from_numpy(X_train_val).float().to(config['device']), torch.from_numpy(y_train_val).long().to(config['device']))
-        val_dataset = TensorDataset(torch.from_numpy(X_infer).float().to(config['device']), torch.from_numpy(y_infer).long().to(config['device']))
-        class_weights = torch.tensor(compute_class_weight('balanced', classes=np.unique(y_train_val), y=y_train_val), dtype=torch.float32).to(config['device'])
-        logging.info(f"Class weights: {class_weights}")
-    elif config['data_train'] == r'train':
-        train_dataset = TensorDataset(torch.from_numpy(X_train).float().to(config['device']), torch.from_numpy(y_train).long().to(config['device']))
-        val_dataset = TensorDataset(torch.from_numpy(X_val).float().to(config['device']), torch.from_numpy(y_val).long().to(config['device']))
-        class_weights = torch.tensor(compute_class_weight('balanced', classes=np.unique(y_train), y=y_train), dtype=torch.float32).to(config['device'])
-        logging.info(f"Class weights: {class_weights}")
-    elif config['data_train'] == r'all':
-        train_dataset = TensorDataset(torch.from_numpy(X).float().to(config['device']), torch.from_numpy(y).long().to(config['device']))
-        val_dataset = TensorDataset(torch.from_numpy(X).float().to(config['device']), torch.from_numpy(y).long().to(config['device']))
-        X_infer = X
-        y_infer = y
-        class_weights = torch.tensor(compute_class_weight('balanced', classes=np.unique(y), y=y), dtype=torch.float32).to(config['device'])
-        logging.info(f"Class weights: {class_weights}")
-    else:
-        print("Error: Set data_train first in yaml!")
-        logging.error("Error: Set data_train first in yaml!")
-
-
-    train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
-    val_loader = DataLoader(val_dataset, batch_size=config['batch_size'])
-
-    model = MLP(config).to(config['device'])
-
-    criterion = nn.CrossEntropyLoss(weight=class_weights)
-    optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'])
-    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, config['step_size'], config['gamma'])
-
-    best_val_f1, best_val_recall, best_val_precision, best_epoch, best_model = train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, config)
-
-    # 保存模型
-    parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-    model_path = os.path.join(parent_dir, config['model_path'])
-    save_model(model_path, best_model)
-
-    logging.info(f"Best Validation F1 Score (Macro): {best_val_f1:.4f}")
-    logging.info(f"Best Validation Recall (Macro): {best_val_recall:.4f}")
-    logging.info(f"Best Validation Precision (Macro): {best_val_precision:.4f}")
-    logging.info(f"Best Epoch: {best_epoch + 1}")
-    print(f"Best Validation F1 Score (Macro): {best_val_f1:.4f}")
-    print(f"Best Validation Recall (Macro): {best_val_recall:.4f}")
-    print(f"Best Validation Precision (Macro): {best_val_precision:.4f}")
-    print(f"Best Epoch: {best_epoch + 1}")
-
-    avg_f1, wrong_percentage, precision, recall, f1 = evaluate_model(model, X_infer, y_infer, config)
-
-    return avg_f1, wrong_percentage, precision, recall, f1
-
-def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, config):
-    n_epochs = config['n_epochs']
+    def train_model(self, train_loader, val_loader, criterion, optimizer, scheduler):
+        n_epochs = self.config['n_epochs']
        best_val_f1 = 0.0
        best_val_recall = 0.0
        best_val_precision = 0.0
        best_epoch = -1
        best_model = None
-    patience = config['early_stop_patience']
+        patience = self.config['early_stop_patience']
        trigger_times = 0

-    train_loss_history = []
-    train_acc_history = []
-    val_loss_history = []
-    val_acc_history = []
-    val_f1_history = []
-    val_precision_history = []
-    val_recall_history = []
+        train_loss_history, train_acc_history, val_loss_history, val_acc_history, val_f1_history, val_precision_history, val_recall_history = [[] for _ in range(7)]

        plt.rcParams['figure.max_open_warning'] = 50

        for epoch in range(n_epochs):
-        # 训练阶段
-        model.train()
+            # Training phase
+            self.model.train()
            train_loss, train_acc = 0, 0
            for inputs, targets in train_loader:
                optimizer.zero_grad()
-            outputs = model(inputs)
+                outputs = self.model(inputs)
                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()
@ -243,12 +146,12 @@ def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler
            # 更新学习率
            scheduler.step()

-        # 验证阶段
-        model.eval()
+            # Validation phase
            val_loss, val_acc, all_preds, all_targets = 0, 0, [], []
+            self.model.eval()
            with torch.no_grad():
                for inputs, targets in val_loader:
-                outputs = model(inputs)
+                    outputs = self.model(inputs)
                    loss = criterion(outputs, targets)
                    val_loss += loss.item() * inputs.size(0)
                    _, preds = torch.max(outputs, 1)
@ -263,8 +166,7 @@ def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler
            class_recalls_m = recall_score(all_targets, all_preds, average='macro')
            class_f1_scores_m = f1_score(all_targets, all_preds, average='macro')

-        logging.info(f'Epoch {epoch+1:0{3}d} | Train Loss: {train_loss:.4f} | Train Accuracy: {train_acc:.4f} | Validation Loss: {val_loss:.4f} | Validation Accuracy: {val_acc:.4f} | Validation Mean Precision: {class_precisions_m:.4f} | Validation Mean Recall: {class_recalls_m:.4f} | Validation Mean F1_score: {class_f1_scores_m:.4f}')
-        print(f'Epoch {epoch+1:0{3}d} | Train Loss: {train_loss:.4f} | Train Accuracy: {train_acc:.4f} | Validation Loss: {val_loss:.4f} | Validation Accuracy: {val_acc:.4f} | Validation Mean Precision: {class_precisions_m:.4f} | Validation Mean Recall: {class_recalls_m:.4f} | Validation Mean F1_score: {class_f1_scores_m:.4f}')
+            log_print(f'Epoch {epoch+1:0{3}d} | Train Loss: {train_loss:.4f} | Train Accuracy: {train_acc:.4f} | Validation Loss: {val_loss:.4f} | Validation Accuracy: {val_acc:.4f} | Validation Mean Precision: {class_precisions_m:.4f} | Validation Mean Recall: {class_recalls_m:.4f} | Validation Mean F1_score: {class_f1_scores_m:.4f}')

            train_loss_history.append(train_loss)
            train_acc_history.append(train_acc)
@ -292,7 +194,7 @@ def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler
            ax3.legend()
            # 保存图片
            parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-        train_process_path = os.path.join(parent_dir, config['train_process_path'])
+            train_process_path = os.path.join(parent_dir, self.config['train_process_path'])
            plt.savefig(train_process_path)

            if class_f1_scores_m > best_val_f1:
@ -300,49 +202,112 @@ def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler
                best_val_recall = class_recalls_m
                best_val_precision = class_precisions_m
                best_epoch = epoch
-            best_model = model.state_dict()
+                best_model = self.model.state_dict()
                trigger_times = 0
            else:
                trigger_times += 1
                if trigger_times >= patience:
-                logging.info(f'Early stopping at epoch {epoch} | Best epoch : {best_epoch + 1}')
-                print(f'Early stopping at epoch {epoch} | Best epoch : {best_epoch + 1}')
+                    log_print(f'Early stopping at epoch {epoch} | Best epoch : {best_epoch + 1}')
                    break

        return best_val_f1, best_val_recall, best_val_precision, best_epoch, best_model

-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--config', type=str, default='config.yaml', help='Path to the configuration file')
-    args = parser.parse_args()
-    with open(args.config, 'r') as f:
-        config = yaml.load(f, Loader=yaml.FullLoader)
+    def train_detect(self):
+        X, y, X_train_val, y_train_val, X_train, y_train, X_val, y_val, X_infer, y_infer = self.load_and_split_data()
+
+        if self.config['data_train'] == 'train_val':
+            train_dataset = TensorDataset(torch.from_numpy(X_train_val).float().to(self.config['device']), torch.from_numpy(y_train_val).long().to(self.config['device']))
+            val_dataset = TensorDataset(torch.from_numpy(X_infer).float().to(self.config['device']), torch.from_numpy(y_infer).long().to(self.config['device']))
+            class_weights = torch.tensor(compute_class_weight('balanced', classes=np.unique(y_train_val), y=y_train_val), dtype=torch.float32).to(self.config['device'])
+        elif self.config['data_train'] == 'train':
+            train_dataset = TensorDataset(torch.from_numpy(X_train).float().to(self.config['device']), torch.from_numpy(y_train).long().to(self.config['device']))
+            val_dataset = TensorDataset(torch.from_numpy(X_val).float().to(self.config['device']), torch.from_numpy(y_val).long().to(self.config['device']))
+            class_weights = torch.tensor(compute_class_weight('balanced', classes=np.unique(y_train), y=y_train), dtype=torch.float32).to(self.config['device'])
+        elif self.config['data_train'] == 'all':
+            train_dataset = TensorDataset(torch.from_numpy(X).float().to(self.config['device']), torch.from_numpy(y).long().to(self.config['device']))
+            val_dataset = TensorDataset(torch.from_numpy(X).float().to(self.config['device']), torch.from_numpy(y).long().to(self.config['device']))
+            X_infer = X
+            y_infer = y
+            class_weights = torch.tensor(compute_class_weight('balanced', classes=np.unique(y), y=y), dtype=torch.float32).to(self.config['device'])
+        else:
+            logging.error("Error: Set data_train first in yaml!")
+            raise ValueError("Error: Set data_train first in yaml!")
+        
+        log_print(f"Class weights: {class_weights}")

-    # 配置日志
-    parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-    log_path = os.path.join(parent_dir, config['log_path'])
-    logging.basicConfig(filename=log_path, level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
-    
-    list_avg_f1 = []
-    list_wrong_percentage = []
-    list_precision = []
-    list_recall = []
-    list_f1 = []
-    train_times = 1 if config['data_train']==r'all' else config["experiments_count"]
-    for i in range(train_times):
-        avg_f1, wrong_percentage, precision, recall, f1 = train_detect(config)
-        list_avg_f1.append(avg_f1)
-        list_wrong_percentage.append(wrong_percentage)
-        list_precision.append(precision)
-        list_recall.append(recall)
-        list_f1.append(f1)
-
-    logging.info(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%")
-    logging.info(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}")
-    logging.info(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}")
-    logging.info(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}")
-
-    print(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%")
-    print(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}")
-    print(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}")
-    print(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}")
+        train_loader = DataLoader(train_dataset, batch_size=self.config['batch_size'], shuffle=True)
+        val_loader = DataLoader(val_dataset, batch_size=self.config['batch_size'])
+
+        self.create_model()
+
+        criterion = nn.CrossEntropyLoss(weight=class_weights)
+        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.config['learning_rate'])
+        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, self.config['step_size'], self.config['gamma'])
+
+        best_val_f1, best_val_recall, best_val_precision, best_epoch, best_model = self.train_model(train_loader, val_loader, criterion, optimizer, scheduler)
+
+        # Save the best model
+        self.save_model(self.config['model_path'])
+
+        log_print(f"Best Validation F1 Score (Macro): {best_val_f1:.4f}")
+        log_print(f"Best Validation Recall (Macro): {best_val_recall:.4f}")
+        log_print(f"Best Validation Precision (Macro): {best_val_precision:.4f}")
+        log_print(f"Best Epoch: {best_epoch + 1}")
+
+        avg_f1, wrong_percentage, precision, recall, f1 = self.evaluate_model(X_infer, y_infer)
+
+        return avg_f1, wrong_percentage, precision, recall, f1
+
+
+# class MLP(nn.Module):
+#     def __init__(self, config):
+#         super(MLP, self).__init__()
+#         self.model = nn.Sequential(
+#             nn.Linear(len(config['feature_names']), 32),
+#             nn.ReLU(),
+#             nn.Linear(32, 128),
+#             nn.ReLU(),
+#             nn.Linear(128, 32),
+#             nn.ReLU(),
+#             nn.Linear(32, config['nc']),
+#         )
+
+#     def forward(self, x):
+#         return self.model(x)
+#     20260605
+
+class MLP(nn.Module):
+    def __init__(self, config):
+        super(MLP, self).__init__()
+        layers = []
+        input_dim = config['mlp']['input_dim']
+        for layer_cfg in config['mlp']['layers']:
+            layers.append(nn.Linear(input_dim, layer_cfg['output_dim']))
+            if layer_cfg.get('activation', None) == 'relu':
+                layers.append(nn.ReLU())
+            input_dim = layer_cfg['output_dim']
+        layers.append(nn.Linear(input_dim, config['mlp']['output_dim']))
+        self.model = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.model(x)
+    
+class TransformerModel(nn.Module):
+    def __init__(self, config):
+        super(TransformerModel, self).__init__()
+        self.embedding = nn.Linear(config['transformer']['input_dim'], config['transformer']['d_model'])
+        self.transformer = nn.Transformer(
+            d_model=config['transformer']['d_model'],
+            nhead=config['transformer']['nhead'],
+            num_encoder_layers=config['transformer']['num_encoder_layers'],
+            num_decoder_layers=config['transformer']['num_decoder_layers'],
+            dim_feedforward=config['transformer']['dim_feedforward'],
+            dropout=config['transformer']['dropout']
+        )
+        self.fc = nn.Linear(config['transformer']['d_model'], config['transformer']['output_dim'])
+
+    def forward(self, x):
+        x = self.embedding(x).unsqueeze(1)  # Add sequence dimension
+        transformer_output = self.transformer(x, x)
+        output = self.fc(transformer_output.squeeze(1))  # Remove sequence dimension
+        return output