|
|
import os
|
|
|
import time
|
|
|
import datetime
|
|
|
import pandas as pd
|
|
|
from typing import List
|
|
|
from common import evaluate_model
|
|
|
import logging
|
|
|
import numpy as np
|
|
|
import yaml
|
|
|
from feature_process import create_feature_df, apply_feature_weights, Features, process_features_list
|
|
|
from data_process import preprocess_data, convert_to_list
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
# 读取原始数据表
|
|
|
df_src = pd.read_excel("data/data_src.xlsx")
|
|
|
df_leave = pd.read_excel("data_processed/Leave_Record_RES.xlsx")
|
|
|
df_dropout_warning = pd.read_excel("data_processed/Dropout_Warning_RES.xlsx")
|
|
|
|
|
|
# 数据预处理
|
|
|
df = preprocess_data(df_src, df_leave, df_dropout_warning)
|
|
|
|
|
|
# 转换成数据列表
|
|
|
features_data_list = convert_to_list(df)
|
|
|
|
|
|
processed_features_list: List[Features] = process_features_list(features_data_list)
|
|
|
|
|
|
# 特征预处理
|
|
|
all_features = create_feature_df(processed_features_list)
|
|
|
|
|
|
# 读取 YAML 配置文件
|
|
|
config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "train_local.yaml"))
|
|
|
with open(config_path, 'r') as f:
|
|
|
config = yaml.load(f, Loader=yaml.FullLoader)
|
|
|
feature_names = config['feature_names']
|
|
|
feature_weights = config['feature_weights']
|
|
|
|
|
|
# 应用特征权重
|
|
|
feature_label_weighted = apply_feature_weights(all_features, feature_names, feature_weights)
|
|
|
|
|
|
start_time = time.time() # 记录开始时间
|
|
|
|
|
|
# 创建静态文件存放文件夹
|
|
|
static_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "evaluate_local")) # 设置模型文件和配置文件的存放目录,和本py同级
|
|
|
os.makedirs(static_dir, exist_ok=True)
|
|
|
|
|
|
# 训练前设置
|
|
|
now = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
|
data_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"all_features_label_{now}.xlsx"))
|
|
|
config['data_path'] = data_path
|
|
|
feature_label_weighted.to_excel(data_path, index=False)
|
|
|
|
|
|
# 配置日志
|
|
|
log_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"evaluate_{now}.log"))
|
|
|
logging.basicConfig(filename=log_path, level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
|
|
|
|
|
|
# 开始验证
|
|
|
list_avg_f1 = []
|
|
|
list_wrong_percentage = []
|
|
|
list_precision = []
|
|
|
list_recall = []
|
|
|
list_f1 = []
|
|
|
|
|
|
# 特征和标签
|
|
|
X = feature_label_weighted[config['feature_names']].values
|
|
|
y = feature_label_weighted[config['label_name']].values
|
|
|
|
|
|
print(config)
|
|
|
avg_f1, wrong_percentage, precision, recall, f1 = evaluate_model(config["model_path"], X, y, config)
|
|
|
list_avg_f1.append(avg_f1)
|
|
|
list_wrong_percentage.append(wrong_percentage)
|
|
|
list_precision.append(precision)
|
|
|
list_recall.append(recall)
|
|
|
list_f1.append(f1)
|
|
|
|
|
|
logging.info(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%")
|
|
|
logging.info(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}")
|
|
|
logging.info(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}")
|
|
|
logging.info(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}")
|
|
|
print(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%")
|
|
|
print(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}")
|
|
|
print(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}")
|
|
|
print(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}")
|
|
|
|
|
|
end_time = time.time() # 记录结束时间
|
|
|
# 训练结束
|
|
|
print("预测耗时:", end_time - start_time, "秒") # 打印执行时间 |