psy/train_local.py

import os
import time
import datetime
import pandas as pd
from typing import List
from common import train_detect
import logging
import numpy as np
import yaml
from feature_process import create_feature_df, apply_feature_weights, Features, process_features_list
from data_process import preprocess_data, convert_to_list

if __name__ == "__main__":
    # 读取原始数据表
    df_src = pd.read_excel("data/data_src.xlsx")
    df_leave = pd.read_excel("data_processed/Leave_Record_RES.xlsx")
    df_dropout_warning = pd.read_excel("data_processed/Dropout_Warning_RES.xlsx")

    # 数据预处理
    df = preprocess_data(df_src, df_leave, df_dropout_warning)

    # 转换成数据列表
    features_data_list = convert_to_list(df)

    processed_features_list: List[Features] = process_features_list(features_data_list)

    # 特征预处理
    all_features = create_feature_df(processed_features_list)

    # 读取 YAML 配置文件
    config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "train_local.yaml"))
    with open(config_path, 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
        feature_names = config['feature_names']
        feature_weights = config['feature_weights']

    # 应用特征权重
    feature_label_weighted = apply_feature_weights(all_features, feature_names, feature_weights)

    start_time = time.time()  # 记录开始时间

    # 创建静态文件存放文件夹
    static_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "static_local"))  # 设置模型文件和配置文件的存放目录，和本py同级
    os.makedirs(static_dir, exist_ok=True)

    # 训练前设置
    now = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    data_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"all_features_label_{now}.xlsx"))
    config['data_path'] = data_path
    feature_label_weighted.to_excel(data_path, index=False)

    # 添加模型保存路径
    model_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"model_{now}.pth"))
    config['model_path'] = model_path

    # 配置日志
    log_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"train_{now}.log"))
    logging.basicConfig(filename=log_path, level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')

    # 开始训练
    list_avg_f1 = []
    list_wrong_percentage = []
    list_precision = []
    list_recall = []
    list_f1 = []
    train_times = 1 if config['data_train']==r'all' else config["experiments_count"]
    for i in range(train_times):
        print(config)
        avg_f1, wrong_percentage, precision, recall, f1 = train_detect(config)
        list_avg_f1.append(avg_f1)
        list_wrong_percentage.append(wrong_percentage)
        list_precision.append(precision)
        list_recall.append(recall)
        list_f1.append(f1)

    logging.info(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%")
    logging.info(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}")
    logging.info(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}")
    logging.info(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}")
    print(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%")
    print(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}")
    print(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}")
    print(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}")

    end_time = time.time()  # 记录结束时间
    # 训练结束
    print("预测耗时:", end_time - start_time, "秒")  # 打印执行时间