import os import sys root_path = os.getcwd() sys.path.append(root_path) import time import datetime import signal import uvicorn import pandas as pd from fastapi import FastAPI, Request from pydantic import BaseModel from typing import List from utils.common import train_detect, evaluate_model, inference_model from fastapi.middleware.cors import CORSMiddleware import logging import matplotlib.pyplot as plt import argparse import numpy as np import yaml import threading import pickle from fastapi.responses import FileResponse from fastapi.staticfiles import StaticFiles from utils.feature_process import create_feature_df, apply_feature_weights, Features app = FastAPI() # 定义fastapi返回类 inference class PredictionResult(BaseModel): predictions: list # 定义fastapi返回类 class ClassificationResult(BaseModel): precision: list recall: list f1: list wrong_percentage: float # 允许所有域名的跨域请求 app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"], allow_headers=["*"], ) # 定义接口 @app.post("/train/") async def classify_features(request: Request, features_list: List[Features]): # 遍历每个特征对象,并将其添加到 all_features 中 all_features = create_feature_df(features_list) # 读取 YAML 配置文件 config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "config/config.yaml")) with open(config_path, 'r') as f: config = yaml.load(f, Loader=yaml.FullLoader) feature_names = config['feature_names'] feature_weights = config['feature_weights'] # 应用特征权重 feature_label_weighted = apply_feature_weights(all_features, feature_names, feature_weights) start_time = time.time() # 记录开始时间 # 创建静态文件存放文件夹 static_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "train_api")) # 设置模型文件和配置文件的存放目录,和本py同级 os.makedirs(static_dir, exist_ok=True) # 训练前设置 now = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") data_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"train_feature_label_weighted_{now}.xlsx")) config['data_path'] = data_path feature_label_weighted.to_excel(data_path, index=False) # 添加模型保存路径 model_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"train_model_{now}.pth")) config['model_path'] = model_path # 配置日志 log_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"train_log_{now}.log")) logging.basicConfig(filename=log_path, level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') # 配置训练和验证结果图片路径 train_process_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"train_progress_img_{now}.png")) config['train_process_path'] = train_process_path evaluate_result_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"evaluate_result_img_{now}.png")) config['evaluate_result_path'] = evaluate_result_path print("config: ", config) logging.info("config: ", config) # 开始训练 list_avg_f1 = [] list_wrong_percentage = [] list_precision = [] list_recall = [] list_f1 = [] train_times = 1 if config['data_train']==r'all' else config["experiments_count"] for i in range(train_times): avg_f1, wrong_percentage, precision, recall, f1 = train_detect(config) list_avg_f1.append(avg_f1) list_wrong_percentage.append(wrong_percentage) list_precision.append(precision) list_recall.append(recall) list_f1.append(f1) logging.info(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%") logging.info(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}") logging.info(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}") logging.info(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}") print(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%") print(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}") print(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}") print(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}") end_time = time.time() # 记录结束时间 # 训练结束 print("预测耗时:", end_time - start_time, "秒") # 打印执行时间 # 返回分类结果和模型文件下载 URL,static不是程序执行路径,而是app.mount的静态文件夹 model_file_url = f"{request.base_url}train_api/train_model_{now}.pth" log_file_url = f"{request.base_url}train_api/train_log_{now}.log" data_file_url = f"{request.base_url}train_api/train_feature_label_weighted_{now}.xlsx" # 返回分类结果和模型文件 return { "classification_result": ClassificationResult( precision=precision, recall=recall, f1=f1, wrong_percentage=wrong_percentage ), "data_file": { "model_file_url": model_file_url, "log_file_url": log_file_url, "data_file_url": data_file_url } } # 定义接口 @app.post("/evaluate/") async def classify_features(request: Request, features_list: List[Features]): # 遍历每个特征对象,并将其添加到 all_features 中 all_features = create_feature_df(features_list) # 读取 YAML 配置文件 config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "config/config.yaml")) with open(config_path, 'r') as f: config = yaml.load(f, Loader=yaml.FullLoader) feature_names = config['feature_names'] feature_weights = config['feature_weights'] # 应用特征权重 feature_label_weighted = apply_feature_weights(all_features, feature_names, feature_weights) start_time = time.time() # 记录开始时间 # 创建静态文件存放文件夹 static_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "evaluate_api")) # 设置模型文件和配置文件的存放目录,和本py同级 os.makedirs(static_dir, exist_ok=True) # 训练前设置 now = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") data_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"evaluate_feature_label_weighted_{now}.xlsx")) config['data_path'] = data_path feature_label_weighted.to_excel(data_path, index=False) # 配置验证结果图片路径 evaluate_result_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"evaluate_result_img_{now}.png")) config['evaluate_result_path'] = evaluate_result_path # 配置日志 log_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"evaluate_log_{now}.log")) logging.basicConfig(filename=log_path, level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') # 开始验证 list_avg_f1 = [] list_wrong_percentage = [] list_precision = [] list_recall = [] list_f1 = [] # 特征和标签 X = feature_label_weighted[config['feature_names']].values y = feature_label_weighted[config['label_name']].values print("config: ", config) logging.info("config: ", config) avg_f1, wrong_percentage, precision, recall, f1 = evaluate_model(config["model_path"], X, y, config) list_avg_f1.append(avg_f1) list_wrong_percentage.append(wrong_percentage) list_precision.append(precision) list_recall.append(recall) list_f1.append(f1) logging.info(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%") logging.info(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}") logging.info(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}") logging.info(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}") print(f"Result: Avg F1: {sum(list_avg_f1) / len(list_avg_f1):.4f} Avg Wrong Percentage: {sum(list_wrong_percentage) / len(list_wrong_percentage):.2f}%") print(f"Result: Avg Precision: {[sum(p[i] for p in list_precision) / len(list_precision) for i in range(len(list_precision[0]))]} | {np.mean(list_precision)}") print(f"Result: Avg Recall: {[sum(r[i] for r in list_recall) / len(list_recall) for i in range(len(list_recall[0]))]} | {np.mean(list_recall)}") print(f"Result: Avg F1: {[sum(f1[i] for f1 in list_f1) / len(list_f1) for i in range(len(list_f1[0]))]} | {np.mean(list_f1)}") end_time = time.time() # 记录结束时间 # 训练结束 print("预测耗时:", end_time - start_time, "秒") # 打印执行时间 # 返回分类结果和模型文件下载 URL,static不是程序执行路径,而是app.mount的静态文件夹 log_file_url = f"{request.base_url}evaluate_api/evaluate_log_{now}.log" data_file_url = f"{request.base_url}evaluate_api/evaluate_feature_label_weighted_{now}.xlsx" # 返回分类结果和模型文件 return { "classification_result": ClassificationResult( precision=precision, recall=recall, f1=f1, wrong_percentage=wrong_percentage ), "data_file": { "log_file_url": log_file_url, "data_file_url": data_file_url } } # 定义接口 @app.post("/inference/") async def classify_features(request: Request, features_list: List[Features]): # 遍历每个特征对象,并将其添加到 all_features 中 all_features = create_feature_df(features_list) # 读取 YAML 配置文件 config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "config/config.yaml")) with open(config_path, 'r') as f: config = yaml.load(f, Loader=yaml.FullLoader) feature_names = config['feature_names'] feature_weights = config['feature_weights'] # 应用特征权重 feature_label_weighted = apply_feature_weights(all_features, feature_names, feature_weights) start_time = time.time() # 记录开始时间 # 创建静态文件存放文件夹 static_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "inference_api")) # 设置模型文件和配置文件的存放目录,和本py同级 os.makedirs(static_dir, exist_ok=True) # 训练前设置 now = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") data_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"inference_feature_label_weighted_{now}.xlsx")) config['data_path'] = data_path feature_label_weighted.to_excel(data_path, index=False) # 配置日志 log_path = os.path.abspath(os.path.join(os.path.dirname(__file__), static_dir, f"inference_log_{now}.log")) logging.basicConfig(filename=log_path, level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') # 特征和标签 X = feature_label_weighted[config['feature_names']].values y = feature_label_weighted[config['label_name']].values predictions = inference_model(config["model_path"], X, y, config) end_time = time.time() # 记录结束时间 print("预测耗时:", end_time - start_time, "秒") # 打印执行时间 print("预测结果:", predictions) # 返回预测结果 return PredictionResult(predictions=predictions) # 以下是fastapi启动配置 if __name__ == "__main__": name_app = os.path.basename(__file__)[0:-3] # Get the name of the script log_config = { "version": 1, "disable_existing_loggers": True, "handlers": { "file_handler": { "class": "logging.FileHandler", "filename": "logfile.log", }, }, "root": { "handlers": ["file_handler"], "level": "INFO", }, } # 创建静态文件存放文件夹 static_dir_train = os.path.abspath(os.path.join(os.path.dirname(__file__), "train_api")) # 设置模型文件和配置文件的存放目录,和本py同级 static_dir_evaluate = os.path.abspath(os.path.join(os.path.dirname(__file__), "evaluate_api")) static_dir_inference = os.path.abspath(os.path.join(os.path.dirname(__file__), "inference_api")) os.makedirs(static_dir_train, exist_ok=True) os.makedirs(static_dir_evaluate, exist_ok=True) os.makedirs(static_dir_inference, exist_ok=True) # 同级目录下的static文件夹 app.mount("/train_api", StaticFiles(directory=static_dir_train), name="static_dir_train") app.mount("/evaluate_api", StaticFiles(directory=static_dir_evaluate), name="static_dir_evaluate") app.mount("/inference_api", StaticFiles(directory=static_dir_evaluate), name="static_dir_inference") uvicorn.run(app, host="0.0.0.0", port=3397, reload=False) ## train evl 功能OK了 差infer就可以了;还有就是做一个模型上传机制;目前为止最好的模型就是model下面那个