You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
psy/train_local.py

87 lines
4.2 KiB
Python

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import os
import time
import datetime
import pandas as pd
from typing import List
from common import train_detect
import logging
import numpy as np
import yaml
from feature_process import create_feature_df, apply_feature_weights, Features, process_features_list
from data_process import preprocess_data, convert_to_list
if __name__ == "__main__":
# 读取原始数据表
df_src = pd.read_excel("data/data_src.xlsx")
df_leave = pd.read_excel("data_processed/Leave_Record_RES.xlsx")
df_dropout_warning = pd.read_excel("data_processed/Dropout_Warning_RES.xlsx")
# 数据预处理
df = preprocess_data(df_src, df_leave, df_dropout_warning)
# 转换成数据列表
features_data_list = convert_to_list(df)
processed_features_list: List[Features] = process_features_list(features_data_list)
# 特征预处理
all_features = create_feature_df(processed_features_list)
# 读取 YAML 配置文件
config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "train_local.yaml"))
with open(config_path, 'r') as f:
config = yaml.load(f, Loader=yaml.FullLoader)
feature_names = config['feature_names']
feature_weights = config['feature_weights']
# 应用特征权重
feature_label_weighted = apply_feature_weights(all_features, feature_names, feature_weights)
start_time = time.time() # 记录开始时间
# 创建静态文件存放文件夹
static_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "static_local")) # 设置模型文件和配置文件的存放目录和本py同级
os.makedirs(static_dir, exist_ok=True)
# 训练前设置
now = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
data_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"all_features_label_{now}.xlsx"))
config['data_path'] = data_path
feature_label_weighted.to_excel(data_path, index=False)
# 添加模型保存路径
model_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"model_{now}.pth"))
config['model_path'] = model_path
# 配置日志
log_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"train_{now}.log"))
logging.basicConfig(filename=log_path, level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
# 开始训练
list_avg_f1 = []
list_wrong_percentage = []
list_precision = []
list_recall = []
list_f1 = []
train_times = 1 if config['data_train']==r'all' else config["experiments_count"]
for i in range(train_times):
print(config)
avg_f1, wrong_percentage, precision, recall, f1 = train_detect(config)
list_avg_f1.append(avg_f1)
list_wrong_percentage.append(wrong_percentage)
list_precision.append(precision)
list_recall.append(recall)
list_f1.append(f1)
logging.info(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%")
logging.info(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}")
logging.info(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}")
logging.info(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}")
print(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%")
print(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}")
print(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}")
print(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}")
end_time = time.time() # 记录结束时间
# 训练结束
print("预测耗时:", end_time - start_time, "") # 打印执行时间