不会再改算法了, 完备

master
wangchunlin 2 years ago
parent e903ba49d0
commit 36b93fcbf0

@ -45,7 +45,7 @@ def classify_features(features_data_list):
返回:
dict: 包含分类结果和模型文件信息的字典
"""
response = requests.post("http://127.0.0.1:3397/evaluate/", json=features_data_list)
response = requests.post("http://p2p1.melulu.top:3397/evaluate/", json=features_data_list)
if response.status_code == 200:
results = response.json()
print("Precision:", results["classification_result"]["precision"])

@ -87,7 +87,7 @@ if __name__ == "__main__":
features_data_list = [features_data4, features_data2, features_data3, features_data1]
# 发送 POST 请求
response = requests.post("http://127.0.0.1:3397/inference/", json=features_data_list)
response = requests.post("http://p2p1.melulu.top:3397/inference/", json=features_data_list)
if response.status_code == 200:
# 获取分类结果列表

@ -8,11 +8,11 @@ batch_size: 16
learning_rate: 0.001
nc: 4
#data_train: train_val # train: 只用train训练val做验证 infer做测试train_val: 用train和val做训练infer做验证 infer做测试all: 全部训练全部验证全部测试数据先1/5作为infer剩下的再1/5作为val剩下的4/5作为训练
data_train: train
data_train: train_val
early_stop_patience: 50
gamma: 0.98
step_size: 10
experiments_count: 5
experiments_count: 1
#---检测和推理配置---#
# 检测和推理使用模型路径
@ -46,3 +46,79 @@ feature_weights:
- 0.08
- 0.12
#---网络结构---#
# MLP configuration
mlp:
input_dim: 10 # Number of input features
layers:
- output_dim: 32
activation: relu
- output_dim: 128
activation: relu
- output_dim: 32
activation: relu
output_dim: 4 # Number of classes
# Transformer configuration
transformer:
d_model: 32 # Reduced embedding dimension
nhead: 4 # Reduced number of attention heads
num_encoder_layers: 2 # Reduced number of encoder layers
num_decoder_layers: 2 # Reduced number of decoder layers
dim_feedforward: 128 # Reduced feedforward network dimension
dropout: 0.1 # Dropout probability
input_dim: 10 # Number of input features
output_dim: 4 # Number of classes
#---训练配置备份---#
# MLP good train param 1
# #---训练配置---#
# n_epochs: 150
# batch_size: 16
# learning_rate: 0.001
# nc: 4
# #data_train: train_val # train: 只用train训练val做验证 infer做测试train_val: 用train和val做训练infer做验证 infer做测试all: 全部训练全部验证全部测试数据先1/5作为infer剩下的再1/5作为val剩下的4/5作为训练
# data_train: train_val
# early_stop_patience: 50
# gamma: 0.98
# step_size: 10
# experiments_count: 1
# MLP good train param 2
# #---训练配置---#
# n_epochs: 300
# batch_size: 8
# learning_rate: 0.0005
# nc: 4
# #data_train: train_val # train: 只用train训练val做验证 infer做测试train_val: 用train和val做训练infer做验证 infer做测试all: 全部训练全部验证全部测试数据先1/5作为infer剩下的再1/5作为val剩下的4/5作为训练
# data_train: train_val
# early_stop_patience: 50
# gamma: 0.98
# step_size: 10
# experiments_count: 1
# Transformer good train param 1
# #---训练配置---#
# n_epochs: 150
# batch_size: 64
# learning_rate: 0.001
# nc: 4
# #data_train: train_val # train: 只用train训练val做验证 infer做测试train_val: 用train和val做训练infer做验证 infer做测试all: 全部训练全部验证全部测试数据先1/5作为infer剩下的再1/5作为val剩下的4/5作为训练
# data_train: train_val
# early_stop_patience: 50
# gamma: 0.98
# step_size: 10
# experiments_count: 1
# Transformer good train param 2
# #---训练配置---#
# n_epochs: 300
# batch_size: 8
# learning_rate: 0.0005
# nc: 4
# #data_train: train_val # train: 只用train训练val做验证 infer做测试train_val: 用train和val做训练infer做验证 infer做测试all: 全部训练全部验证全部测试数据先1/5作为infer剩下的再1/5作为val剩下的4/5作为训练
# data_train: train_val
# early_stop_patience: 50
# gamma: 0.98
# step_size: 10
# experiments_count: 1

@ -1,31 +1,34 @@
import os
import sys
root_path = os.getcwd()
sys.path.append(root_path)
import time
import datetime
import signal
import logging
import uvicorn
import pandas as pd
import yaml
import numpy as np
from fastapi import FastAPI, Request
from pydantic import BaseModel
from typing import List
from utils.common import train_detect, evaluate_model, inference_model
import atexit
from fastapi.middleware.cors import CORSMiddleware
import logging
import matplotlib.pyplot as plt
import argparse
import numpy as np
import yaml
import threading
import pickle
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
from utils.feature_process import create_feature_df, apply_feature_weights, Features
from utils.common import MLModel
app = FastAPI()
# 控制是否打印的宏定义
PRINT_LOG = True
def log_print(message):
logging.info(message)
if PRINT_LOG:
print(message)
# 保证日志写到文件
def flush_log():
for handler in logging.getLogger().handlers:
handler.flush()
# 定义fastapi返回类 inference
class PredictionResult(BaseModel):
predictions: list
@ -46,14 +49,16 @@ app.add_middleware(
allow_headers=["*"],
)
# 定义接口
# 初始化配置文件
config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "config/config.yaml"))
# 定义训练接口
@app.post("/train/")
async def classify_features(request: Request, features_list: List[Features]):
async def train_model(request: Request, features_list: List[Features]):
# 遍历每个特征对象,并将其添加到 all_features 中
all_features = create_feature_df(features_list)
# 读取 YAML 配置文件
config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "config/config.yaml"))
with open(config_path, 'r') as f:
config = yaml.load(f, Loader=yaml.FullLoader)
feature_names = config['feature_names']
@ -70,54 +75,53 @@ async def classify_features(request: Request, features_list: List[Features]):
# 训练前设置
now = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
data_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"train_feature_label_weighted_{now}.xlsx"))
data_path = os.path.abspath(os.path.join(static_dir, f"train_feature_label_weighted_{now}.xlsx"))
config['data_path'] = data_path
feature_label_weighted.to_excel(data_path, index=False)
# 添加模型保存路径
model_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"train_model_{now}.pth"))
model_path = os.path.abspath(os.path.join(static_dir, f"train_model_{now}.pth"))
config['model_path'] = model_path
# 配置日志
log_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"train_log_{now}.log"))
log_path = os.path.abspath(os.path.join(static_dir, f"train_log_{now}.log"))
logging.basicConfig(filename=log_path, level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
# 配置训练和验证结果图片路径
train_process_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"train_progress_img_{now}.png"))
train_process_path = os.path.abspath(os.path.join(static_dir, f"train_progress_img_{now}.png"))
config['train_process_path'] = train_process_path
evaluate_result_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"evaluate_result_img_{now}.png"))
evaluate_result_path = os.path.abspath(os.path.join(static_dir, f"evaluate_result_img_{now}.png"))
config['evaluate_result_path'] = evaluate_result_path
print("config: ", config)
logging.info("config: ", config)
log_print("config: " + str(config))
# 开始训练
# 初始化 MLModel 实例
ml_model = MLModel(config)
list_avg_f1 = []
list_wrong_percentage = []
list_precision = []
list_recall = []
list_f1 = []
train_times = 1 if config['data_train']==r'all' else config["experiments_count"]
for i in range(train_times):
avg_f1, wrong_percentage, precision, recall, f1 = train_detect(config)
train_times = 1 if config['data_train'] == 'all' else config["experiments_count"]
for _ in range(train_times):
avg_f1, wrong_percentage, precision, recall, f1 = ml_model.train_detect()
list_avg_f1.append(avg_f1)
list_wrong_percentage.append(wrong_percentage)
list_precision.append(precision)
list_recall.append(recall)
list_f1.append(f1)
logging.info(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%")
logging.info(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}")
logging.info(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}")
logging.info(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}")
print(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%")
print(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}")
print(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}")
print(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}")
log_print(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%")
log_print(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}")
log_print(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}")
log_print(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}")
end_time = time.time() # 记录结束时间
# 训练结束
print("预测耗时:", end_time - start_time, "") # 打印执行时间
log_print("预测耗时: " + str(end_time - start_time) + "") # 打印执行时间
# 保证日志写到文件
atexit.register(flush_log)
# 返回分类结果和模型文件下载 URLstatic不是程序执行路径而是app.mount的静态文件夹
model_file_url = f"{request.base_url}train_api/train_model_{now}.pth"
@ -139,14 +143,13 @@ async def classify_features(request: Request, features_list: List[Features]):
}
}
# 定义接口
# 定义验证接口
@app.post("/evaluate/")
async def classify_features(request: Request, features_list: List[Features]):
async def evaluate_model(request: Request, features_list: List[Features]):
# 遍历每个特征对象,并将其添加到 all_features 中
all_features = create_feature_df(features_list)
# 读取 YAML 配置文件
config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "config/config.yaml"))
with open(config_path, 'r') as f:
config = yaml.load(f, Loader=yaml.FullLoader)
feature_names = config['feature_names']
@ -163,50 +166,35 @@ async def classify_features(request: Request, features_list: List[Features]):
# 训练前设置
now = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
data_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"evaluate_feature_label_weighted_{now}.xlsx"))
data_path = os.path.abspath(os.path.join(static_dir, f"evaluate_feature_label_weighted_{now}.xlsx"))
config['data_path'] = data_path
feature_label_weighted.to_excel(data_path, index=False)
# 配置验证结果图片路径
evaluate_result_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"evaluate_result_img_{now}.png"))
evaluate_result_path = os.path.abspath(os.path.join(static_dir, f"evaluate_result_img_{now}.png"))
config['evaluate_result_path'] = evaluate_result_path
# 配置日志
log_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"evaluate_log_{now}.log"))
log_path = os.path.abspath(os.path.join(static_dir, f"evaluate_log_{now}.log"))
logging.basicConfig(filename=log_path, level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
# 开始验证
list_avg_f1 = []
list_wrong_percentage = []
list_precision = []
list_recall = []
list_f1 = []
# 特征和标签
X = feature_label_weighted[config['feature_names']].values
y = feature_label_weighted[config['label_name']].values
print("config: ", config)
logging.info("config: ", config)
avg_f1, wrong_percentage, precision, recall, f1 = evaluate_model(config["model_path"], X, y, config)
list_avg_f1.append(avg_f1)
list_wrong_percentage.append(wrong_percentage)
list_precision.append(precision)
list_recall.append(recall)
list_f1.append(f1)
# 初始化 MLModel 实例
ml_model = MLModel(config)
# 加载模型
ml_model.load_model()
logging.info(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%")
logging.info(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}")
logging.info(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}")
logging.info(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}")
print(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%")
print(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}")
print(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}")
print(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}")
avg_f1, wrong_percentage, precision, recall, f1 = ml_model.evaluate_model(X, y)
end_time = time.time() # 记录结束时间
# 训练结束
print("预测耗时:", end_time - start_time, "") # 打印执行时间
log_print("预测耗时: " + str(end_time - start_time) + "") # 打印执行时间
# 保证日志写到文件
atexit.register(flush_log)
# 返回分类结果和模型文件下载 URLstatic不是程序执行路径而是app.mount的静态文件夹
log_file_url = f"{request.base_url}evaluate_api/evaluate_log_{now}.log"
@ -226,14 +214,13 @@ async def classify_features(request: Request, features_list: List[Features]):
}
}
# 定义接口
# 定义推理接口
@app.post("/inference/")
async def classify_features(request: Request, features_list: List[Features]):
async def inference_model(request: Request, features_list: List[Features]):
# 遍历每个特征对象,并将其添加到 all_features 中
all_features = create_feature_df(features_list)
# 读取 YAML 配置文件
config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "config/config.yaml"))
with open(config_path, 'r') as f:
config = yaml.load(f, Loader=yaml.FullLoader)
feature_names = config['feature_names']
@ -250,23 +237,32 @@ async def classify_features(request: Request, features_list: List[Features]):
# 训练前设置
now = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
data_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"inference_feature_label_weighted_{now}.xlsx"))
data_path = os.path.abspath(os.path.join(static_dir, f"inference_feature_label_weighted_{now}.xlsx"))
config['data_path'] = data_path
feature_label_weighted.to_excel(data_path, index=False)
# 配置日志
log_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"inference_log_{now}.log"))
log_path = os.path.abspath(os.path.join(static_dir, f"inference_log_{now}.log"))
logging.basicConfig(filename=log_path, level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
# 特征和标签
X = feature_label_weighted[config['feature_names']].values
y = feature_label_weighted[config['label_name']].values
predictions = inference_model(config["model_path"], X, y, config)
# 初始化 MLModel 实例
ml_model = MLModel(config)
# 加载模型
ml_model.load_model()
predictions = ml_model.inference_model(X)
end_time = time.time() # 记录结束时间
print("预测耗时:", end_time - start_time, "") # 打印执行时间
log_print("预测耗时: " + str(end_time - start_time) + " ") # 打印执行时间
print("预测结果:", predictions)
log_print("预测结果: " + str(predictions))
# 保证日志写到文件
atexit.register(flush_log)
# 返回预测结果
return PredictionResult(predictions=predictions)
@ -299,7 +295,5 @@ if __name__ == "__main__":
# 同级目录下的static文件夹
app.mount("/train_api", StaticFiles(directory=static_dir_train), name="static_dir_train")
app.mount("/evaluate_api", StaticFiles(directory=static_dir_evaluate), name="static_dir_evaluate")
app.mount("/inference_api", StaticFiles(directory=static_dir_evaluate), name="static_dir_inference")
app.mount("/inference_api", StaticFiles(directory=static_dir_inference), name="static_dir_inference")
uvicorn.run(app, host="0.0.0.0", port=3397, reload=False)
## train evl 功能OK了 差infer就可以了还有就是做一个模型上传机制目前为止最好的模型就是model下面那个

@ -10,33 +10,36 @@ from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.utils.class_weight import compute_class_weight
import logging
import matplotlib.pyplot as plt
import argparse
# 控制是否打印的宏定义
PRINT_LOG = True
class MLP(nn.Module):
def __init__(self, config):
super(MLP, self).__init__()
self.model = nn.Sequential(
nn.Linear(len(config['feature_names']), 32),
nn.ReLU(),
nn.Linear(32, 128),
nn.ReLU(),
nn.Linear(128, 32),
nn.ReLU(),
nn.Linear(32, config['nc']),
)
def log_print(message):
logging.info(message)
if PRINT_LOG:
print(message)
def forward(self, x):
return self.model(x)
class MLModel:
def __init__(self, model_config):
self.config = model_config
self.model = None
def create_model(self):
self.model = MLP(self.config).to(self.config['device'])
# self.model = TransformerModel(self.config).to(self.config['device'])
def load_model(self):
self.create_model()
self.model.load_state_dict(torch.load(self.config['model_path'], map_location=self.config['device']))
def load_and_split_data(config):
def load_and_split_data(self):
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
file_path = os.path.join(parent_dir, config['data_path'])
file_path = os.path.join(parent_dir, self.config['data_path'])
data = pd.read_excel(file_path)
X = data[config['feature_names']].values
y = data[config['label_name']].values
X = data[self.config['feature_names']].values
y = data[self.config['label_name']].values
skf_outer = StratifiedKFold(n_splits=5, shuffle=True)
train_index_outer, test_index_outer = next(skf_outer.split(X, y))
@ -50,62 +53,42 @@ def load_and_split_data(config):
return X, y, X_train_val, y_train_val, X_train, y_train, X_val, y_val, X_infer, y_infer
def save_model(model_path, best_model):
torch.save(best_model, model_path)
def evaluate_model(model_path, X_infer, y_infer, config):
# 如果传入的是模型文件路径,则从该路径加载模型
if isinstance(model_path, str):
model = MLP(config).to(config['device'])
model.load_state_dict(torch.load(model_path, map_location=config['device'])) # 加载训练好的模型参数
else:
model = model_path
# infer_data = pd.DataFrame(X_infer, columns=config['feature_names'])
# infer_data[config['label_name']] = y_infer
# infer_data.to_excel(os.path.join(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")), config['infer_path']), index=False)
model.eval()
def save_model(self, model_path):
torch.save(self.model.state_dict(), model_path)
def evaluate_model(self, X_infer, y_infer):
self.model.eval()
with torch.no_grad():
outputs = model(torch.from_numpy(X_infer).float().to(config['device']))
outputs = self.model(torch.from_numpy(X_infer).float().to(self.config['device']))
_, predictions = torch.max(outputs, 1)
wrong_indices = np.where(y_infer != predictions.cpu().numpy())[0]
wrong_count = len(wrong_indices)
precision = precision_score(y_infer, predictions.cpu().numpy(), average=None)
recall = recall_score(y_infer, predictions.cpu().numpy(), average=None)
f1 = f1_score(y_infer, predictions.cpu().numpy(), average=None)
wrong_count = len(np.where(y_infer != predictions.cpu().numpy())[0])
total_count = len(y_infer)
wrong_percentage = (wrong_count / total_count) * 100
print("Infer Result: ")
logging.info("Infer Result: ")
print("预测错误数量:", wrong_count)
print("预测错误占总数量的百分比:", wrong_percentage, "%")
print("总数量:", total_count)
log_print("Evaluate Result: ")
logging.info(f"Prediction errors: {wrong_count}")
logging.info(f"Prediction error percentage: {wrong_percentage:.2f}%")
logging.info(f"Total samples: {total_count}")
precision = precision_score(y_infer, predictions.cpu().numpy(), average=None)
recall = recall_score(y_infer, predictions.cpu().numpy(), average=None)
f1 = f1_score(y_infer, predictions.cpu().numpy(), average=None)
log_print(f"Prediction errors: {wrong_count}")
log_print(f"Prediction error percentage: {wrong_percentage:.2f}%")
log_print(f"Total samples: {total_count}")
avg_precision = np.mean(precision)
avg_recall = np.mean(recall)
avg_f1 = np.mean(f1)
for i in range(len(precision)):
print(f"Class {i} Precision: {precision[i]:.4f}, Recall: {recall[i]:.4f}, F1: {f1[i]:.4f}")
log_print(f"Class {i} Precision: {precision[i]:.4f}, Recall: {recall[i]:.4f}, F1: {f1[i]:.4f}")
print("精确率:", precision)
print("召回率:", recall)
print("F1得分:", f1)
print("平均精确率:", avg_precision)
print("平均召回率:", avg_recall)
print("平均F1得分:", avg_f1)
print("Infer Result End: ")
logging.info("Infer Result End: ")
log_print("精确率:" + str(precision))
log_print("召回率:" + str(recall))
log_print("F1得分:" + str(f1))
log_print("平均精确率:" + str(avg_precision))
log_print("平均召回率:" + str(avg_recall))
log_print("平均F1得分:" + str(avg_f1))
log_print("Evaluate Result End: ")
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5))
ax1.bar(np.arange(len(precision)), precision)
@ -116,120 +99,40 @@ def evaluate_model(model_path, X_infer, y_infer, config):
ax3.set_title('F1 Score')
# 保存图片
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
evaluate_result_path = os.path.join(parent_dir, config['evaluate_result_path'])
evaluate_result_path = os.path.join(parent_dir, self.config['evaluate_result_path'])
plt.savefig(evaluate_result_path)
return avg_f1, wrong_percentage, precision, recall, f1
def inference_model(model_path, X_infer, y_infer, config):
# 如果传入的是模型文件路径,则从该路径加载模型
if isinstance(model_path, str):
model = MLP(config).to(config['device'])
model.load_state_dict(torch.load(model_path, map_location=config['device'])) # 加载训练好的模型参数
else:
model = model_path
# infer_data = pd.DataFrame(X_infer, columns=config['feature_names'])
# infer_data[config['label_name']] = y_infer
# infer_data.to_excel(os.path.join(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")), config['infer_path']), index=False)
model.eval()
return np.mean(f1), wrong_percentage, precision, recall, f1
# 推理
def inference_model(self, X_infer):
self.model.eval()
with torch.no_grad():
outputs = model(torch.from_numpy(X_infer).float().to(config['device']))
outputs = self.model(torch.from_numpy(X_infer).float().to(self.config['device']))
# 获取预测结果
_, predictions = torch.max(outputs, 1)
# 实际类别从1开始程序类别从0开始
predictions += 1
# 打印预测结果
# print("预测结果:", predictions.cpu().numpy())
# 返回预测结果
return predictions.cpu().numpy().tolist()
def train_detect(config):
X, y, X_train_val, y_train_val, X_train, y_train, X_val, y_val, X_infer, y_infer = load_and_split_data(config)
if config['data_train'] == r'train_val':
train_dataset = TensorDataset(torch.from_numpy(X_train_val).float().to(config['device']), torch.from_numpy(y_train_val).long().to(config['device']))
val_dataset = TensorDataset(torch.from_numpy(X_infer).float().to(config['device']), torch.from_numpy(y_infer).long().to(config['device']))
class_weights = torch.tensor(compute_class_weight('balanced', classes=np.unique(y_train_val), y=y_train_val), dtype=torch.float32).to(config['device'])
logging.info(f"Class weights: {class_weights}")
elif config['data_train'] == r'train':
train_dataset = TensorDataset(torch.from_numpy(X_train).float().to(config['device']), torch.from_numpy(y_train).long().to(config['device']))
val_dataset = TensorDataset(torch.from_numpy(X_val).float().to(config['device']), torch.from_numpy(y_val).long().to(config['device']))
class_weights = torch.tensor(compute_class_weight('balanced', classes=np.unique(y_train), y=y_train), dtype=torch.float32).to(config['device'])
logging.info(f"Class weights: {class_weights}")
elif config['data_train'] == r'all':
train_dataset = TensorDataset(torch.from_numpy(X).float().to(config['device']), torch.from_numpy(y).long().to(config['device']))
val_dataset = TensorDataset(torch.from_numpy(X).float().to(config['device']), torch.from_numpy(y).long().to(config['device']))
X_infer = X
y_infer = y
class_weights = torch.tensor(compute_class_weight('balanced', classes=np.unique(y), y=y), dtype=torch.float32).to(config['device'])
logging.info(f"Class weights: {class_weights}")
else:
print("Error: Set data_train first in yaml!")
logging.error("Error: Set data_train first in yaml!")
train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=config['batch_size'])
model = MLP(config).to(config['device'])
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'])
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, config['step_size'], config['gamma'])
best_val_f1, best_val_recall, best_val_precision, best_epoch, best_model = train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, config)
# 保存模型
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
model_path = os.path.join(parent_dir, config['model_path'])
save_model(model_path, best_model)
logging.info(f"Best Validation F1 Score (Macro): {best_val_f1:.4f}")
logging.info(f"Best Validation Recall (Macro): {best_val_recall:.4f}")
logging.info(f"Best Validation Precision (Macro): {best_val_precision:.4f}")
logging.info(f"Best Epoch: {best_epoch + 1}")
print(f"Best Validation F1 Score (Macro): {best_val_f1:.4f}")
print(f"Best Validation Recall (Macro): {best_val_recall:.4f}")
print(f"Best Validation Precision (Macro): {best_val_precision:.4f}")
print(f"Best Epoch: {best_epoch + 1}")
avg_f1, wrong_percentage, precision, recall, f1 = evaluate_model(model, X_infer, y_infer, config)
return avg_f1, wrong_percentage, precision, recall, f1
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, config):
n_epochs = config['n_epochs']
def train_model(self, train_loader, val_loader, criterion, optimizer, scheduler):
n_epochs = self.config['n_epochs']
best_val_f1 = 0.0
best_val_recall = 0.0
best_val_precision = 0.0
best_epoch = -1
best_model = None
patience = config['early_stop_patience']
patience = self.config['early_stop_patience']
trigger_times = 0
train_loss_history = []
train_acc_history = []
val_loss_history = []
val_acc_history = []
val_f1_history = []
val_precision_history = []
val_recall_history = []
train_loss_history, train_acc_history, val_loss_history, val_acc_history, val_f1_history, val_precision_history, val_recall_history = [[] for _ in range(7)]
plt.rcParams['figure.max_open_warning'] = 50
for epoch in range(n_epochs):
# 训练阶段
model.train()
# Training phase
self.model.train()
train_loss, train_acc = 0, 0
for inputs, targets in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
outputs = self.model(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
@ -243,12 +146,12 @@ def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler
# 更新学习率
scheduler.step()
# 验证阶段
model.eval()
# Validation phase
val_loss, val_acc, all_preds, all_targets = 0, 0, [], []
self.model.eval()
with torch.no_grad():
for inputs, targets in val_loader:
outputs = model(inputs)
outputs = self.model(inputs)
loss = criterion(outputs, targets)
val_loss += loss.item() * inputs.size(0)
_, preds = torch.max(outputs, 1)
@ -263,8 +166,7 @@ def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler
class_recalls_m = recall_score(all_targets, all_preds, average='macro')
class_f1_scores_m = f1_score(all_targets, all_preds, average='macro')
logging.info(f'Epoch {epoch+1:0{3}d} | Train Loss: {train_loss:.4f} | Train Accuracy: {train_acc:.4f} | Validation Loss: {val_loss:.4f} | Validation Accuracy: {val_acc:.4f} | Validation Mean Precision: {class_precisions_m:.4f} | Validation Mean Recall: {class_recalls_m:.4f} | Validation Mean F1_score: {class_f1_scores_m:.4f}')
print(f'Epoch {epoch+1:0{3}d} | Train Loss: {train_loss:.4f} | Train Accuracy: {train_acc:.4f} | Validation Loss: {val_loss:.4f} | Validation Accuracy: {val_acc:.4f} | Validation Mean Precision: {class_precisions_m:.4f} | Validation Mean Recall: {class_recalls_m:.4f} | Validation Mean F1_score: {class_f1_scores_m:.4f}')
log_print(f'Epoch {epoch+1:0{3}d} | Train Loss: {train_loss:.4f} | Train Accuracy: {train_acc:.4f} | Validation Loss: {val_loss:.4f} | Validation Accuracy: {val_acc:.4f} | Validation Mean Precision: {class_precisions_m:.4f} | Validation Mean Recall: {class_recalls_m:.4f} | Validation Mean F1_score: {class_f1_scores_m:.4f}')
train_loss_history.append(train_loss)
train_acc_history.append(train_acc)
@ -292,7 +194,7 @@ def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler
ax3.legend()
# 保存图片
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
train_process_path = os.path.join(parent_dir, config['train_process_path'])
train_process_path = os.path.join(parent_dir, self.config['train_process_path'])
plt.savefig(train_process_path)
if class_f1_scores_m > best_val_f1:
@ -300,49 +202,112 @@ def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler
best_val_recall = class_recalls_m
best_val_precision = class_precisions_m
best_epoch = epoch
best_model = model.state_dict()
best_model = self.model.state_dict()
trigger_times = 0
else:
trigger_times += 1
if trigger_times >= patience:
logging.info(f'Early stopping at epoch {epoch} | Best epoch : {best_epoch + 1}')
print(f'Early stopping at epoch {epoch} | Best epoch : {best_epoch + 1}')
log_print(f'Early stopping at epoch {epoch} | Best epoch : {best_epoch + 1}')
break
return best_val_f1, best_val_recall, best_val_precision, best_epoch, best_model
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--config', type=str, default='config.yaml', help='Path to the configuration file')
args = parser.parse_args()
with open(args.config, 'r') as f:
config = yaml.load(f, Loader=yaml.FullLoader)
def train_detect(self):
X, y, X_train_val, y_train_val, X_train, y_train, X_val, y_val, X_infer, y_infer = self.load_and_split_data()
if self.config['data_train'] == 'train_val':
train_dataset = TensorDataset(torch.from_numpy(X_train_val).float().to(self.config['device']), torch.from_numpy(y_train_val).long().to(self.config['device']))
val_dataset = TensorDataset(torch.from_numpy(X_infer).float().to(self.config['device']), torch.from_numpy(y_infer).long().to(self.config['device']))
class_weights = torch.tensor(compute_class_weight('balanced', classes=np.unique(y_train_val), y=y_train_val), dtype=torch.float32).to(self.config['device'])
elif self.config['data_train'] == 'train':
train_dataset = TensorDataset(torch.from_numpy(X_train).float().to(self.config['device']), torch.from_numpy(y_train).long().to(self.config['device']))
val_dataset = TensorDataset(torch.from_numpy(X_val).float().to(self.config['device']), torch.from_numpy(y_val).long().to(self.config['device']))
class_weights = torch.tensor(compute_class_weight('balanced', classes=np.unique(y_train), y=y_train), dtype=torch.float32).to(self.config['device'])
elif self.config['data_train'] == 'all':
train_dataset = TensorDataset(torch.from_numpy(X).float().to(self.config['device']), torch.from_numpy(y).long().to(self.config['device']))
val_dataset = TensorDataset(torch.from_numpy(X).float().to(self.config['device']), torch.from_numpy(y).long().to(self.config['device']))
X_infer = X
y_infer = y
class_weights = torch.tensor(compute_class_weight('balanced', classes=np.unique(y), y=y), dtype=torch.float32).to(self.config['device'])
else:
logging.error("Error: Set data_train first in yaml!")
raise ValueError("Error: Set data_train first in yaml!")
log_print(f"Class weights: {class_weights}")
# 配置日志
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
log_path = os.path.join(parent_dir, config['log_path'])
logging.basicConfig(filename=log_path, level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
list_avg_f1 = []
list_wrong_percentage = []
list_precision = []
list_recall = []
list_f1 = []
train_times = 1 if config['data_train']==r'all' else config["experiments_count"]
for i in range(train_times):
avg_f1, wrong_percentage, precision, recall, f1 = train_detect(config)
list_avg_f1.append(avg_f1)
list_wrong_percentage.append(wrong_percentage)
list_precision.append(precision)
list_recall.append(recall)
list_f1.append(f1)
logging.info(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%")
logging.info(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}")
logging.info(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}")
logging.info(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}")
print(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%")
print(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}")
print(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}")
print(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}")
train_loader = DataLoader(train_dataset, batch_size=self.config['batch_size'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=self.config['batch_size'])
self.create_model()
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = torch.optim.Adam(self.model.parameters(), lr=self.config['learning_rate'])
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, self.config['step_size'], self.config['gamma'])
best_val_f1, best_val_recall, best_val_precision, best_epoch, best_model = self.train_model(train_loader, val_loader, criterion, optimizer, scheduler)
# Save the best model
self.save_model(self.config['model_path'])
log_print(f"Best Validation F1 Score (Macro): {best_val_f1:.4f}")
log_print(f"Best Validation Recall (Macro): {best_val_recall:.4f}")
log_print(f"Best Validation Precision (Macro): {best_val_precision:.4f}")
log_print(f"Best Epoch: {best_epoch + 1}")
avg_f1, wrong_percentage, precision, recall, f1 = self.evaluate_model(X_infer, y_infer)
return avg_f1, wrong_percentage, precision, recall, f1
# class MLP(nn.Module):
# def __init__(self, config):
# super(MLP, self).__init__()
# self.model = nn.Sequential(
# nn.Linear(len(config['feature_names']), 32),
# nn.ReLU(),
# nn.Linear(32, 128),
# nn.ReLU(),
# nn.Linear(128, 32),
# nn.ReLU(),
# nn.Linear(32, config['nc']),
# )
# def forward(self, x):
# return self.model(x)
# 20260605
class MLP(nn.Module):
def __init__(self, config):
super(MLP, self).__init__()
layers = []
input_dim = config['mlp']['input_dim']
for layer_cfg in config['mlp']['layers']:
layers.append(nn.Linear(input_dim, layer_cfg['output_dim']))
if layer_cfg.get('activation', None) == 'relu':
layers.append(nn.ReLU())
input_dim = layer_cfg['output_dim']
layers.append(nn.Linear(input_dim, config['mlp']['output_dim']))
self.model = nn.Sequential(*layers)
def forward(self, x):
return self.model(x)
class TransformerModel(nn.Module):
def __init__(self, config):
super(TransformerModel, self).__init__()
self.embedding = nn.Linear(config['transformer']['input_dim'], config['transformer']['d_model'])
self.transformer = nn.Transformer(
d_model=config['transformer']['d_model'],
nhead=config['transformer']['nhead'],
num_encoder_layers=config['transformer']['num_encoder_layers'],
num_decoder_layers=config['transformer']['num_decoder_layers'],
dim_feedforward=config['transformer']['dim_feedforward'],
dropout=config['transformer']['dropout']
)
self.fc = nn.Linear(config['transformer']['d_model'], config['transformer']['output_dim'])
def forward(self, x):
x = self.embedding(x).unsqueeze(1) # Add sequence dimension
transformer_output = self.transformer(x, x)
output = self.fc(transformer_output.squeeze(1)) # Remove sequence dimension
return output

Loading…
Cancel
Save