import os import time import datetime import pandas as pd from typing import List from common import evaluate_model import logging import numpy as np import yaml from feature_process import create_feature_df, apply_feature_weights, Features, process_features_list from data_process import preprocess_data, convert_to_list if __name__ == "__main__": # 读取原始数据表 df_src = pd.read_excel("data/data_src.xlsx") df_leave = pd.read_excel("data_processed/Leave_Record_RES.xlsx") df_dropout_warning = pd.read_excel("data_processed/Dropout_Warning_RES.xlsx") # 数据预处理 df = preprocess_data(df_src, df_leave, df_dropout_warning) # 转换成数据列表 features_data_list = convert_to_list(df) processed_features_list: List[Features] = process_features_list(features_data_list) # 特征预处理 all_features = create_feature_df(processed_features_list) # 读取 YAML 配置文件 config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "train_local.yaml")) with open(config_path, 'r') as f: config = yaml.load(f, Loader=yaml.FullLoader) feature_names = config['feature_names'] feature_weights = config['feature_weights'] # 应用特征权重 feature_label_weighted = apply_feature_weights(all_features, feature_names, feature_weights) start_time = time.time() # 记录开始时间 # 创建静态文件存放文件夹 static_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "evaluate_local")) # 设置模型文件和配置文件的存放目录,和本py同级 os.makedirs(static_dir, exist_ok=True) # 训练前设置 now = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") data_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"all_features_label_{now}.xlsx")) config['data_path'] = data_path feature_label_weighted.to_excel(data_path, index=False) # 配置日志 log_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"evaluate_{now}.log")) logging.basicConfig(filename=log_path, level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') # 开始验证 list_avg_f1 = [] list_wrong_percentage = [] list_precision = [] list_recall = [] list_f1 = [] # 特征和标签 X = feature_label_weighted[config['feature_names']].values y = feature_label_weighted[config['label_name']].values print(config) avg_f1, wrong_percentage, precision, recall, f1 = evaluate_model(config["model_path"], X, y, config) list_avg_f1.append(avg_f1) list_wrong_percentage.append(wrong_percentage) list_precision.append(precision) list_recall.append(recall) list_f1.append(f1) logging.info(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%") logging.info(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}") logging.info(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}") logging.info(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}") print(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%") print(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}") print(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}") print(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}") end_time = time.time() # 记录结束时间 # 训练结束 print("预测耗时:", end_time - start_time, "秒") # 打印执行时间