diff --git a/config/config.yaml b/config/config.yaml index a200a7f..b1139fa 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -38,12 +38,17 @@ training: experiments_count: 10 replace_model: true data_mode: "train_val" # 可选: "train", "train_val", "all" + class_weights: + balance_mode: "balanced" # 可选: "balanced", "balanced_subsample", "none" + # recall_weights: [1.4, 1.5, 1.1, 0.5] # 召回率调节权重 值更大则更容易检测出来 + recall_weights: [1, 1, 1, 1] # 召回率调节权重 整体平衡 整体错误率更低 + weight_normalize: "max" # 可选: "max", "sum", "none" #---模型配置---# model: num_classes: 4 # nc input_dim: 10 - architecture: "transformer" # 可选: "mlp", "transformer" + architecture: "mlp" # 可选: "mlp", "transformer" mlp: layers: - output_dim: 32 diff --git a/psy_api.py b/psy_api.py index 8c7f92f..11a8656 100644 --- a/psy_api.py +++ b/psy_api.py @@ -162,6 +162,12 @@ async def train_model(request: Request, features_list: List[Features]) -> APIRes train_dir = os.path.abspath(os.path.join( os.path.dirname(__file__), "train_api")) + # 更新配置中的路径 + api_config.config['paths']['model'].update({ + 'train_process': os.path.join(train_dir, f"train_process_{now}.png"), + 'evaluate_result_path': os.path.join(train_dir, f"evaluate_result_{now}.png") + }) + # 保存训练数据 data_path = os.path.join(train_dir, f"train_feature_label_weighted_{now}.xlsx") feature_label_weighted.to_excel(data_path, index=False) @@ -204,7 +210,9 @@ async def train_model(request: Request, features_list: List[Features]) -> APIRes data_file={ "model_file_url": f"{request.base_url}train_api/train_model_{now}.pth", "log_file_url": f"{request.base_url}train_api/train_log_{now}.log", - "data_file_url": f"{request.base_url}train_api/train_feature_label_weighted_{now}.xlsx" + "data_file_url": f"{request.base_url}train_api/train_feature_label_weighted_{now}.xlsx", + "train_process_img_url": f"{request.base_url}train_api/train_model_{now}_training_process.png", + "evaluate_result_img_url": f"{request.base_url}train_api/train_model_{now}_evaluate_result.png" } ) @@ -234,6 +242,10 @@ async def evaluate_model(request: Request, features_list: List[Features]) -> API evaluate_dir = os.path.abspath(os.path.join( os.path.dirname(__file__), "evaluate_api")) + # 更新配置中的评估结果图路径 + api_config.config['paths']['model']['evaluate_result_path'] = os.path.join( + evaluate_dir, f"evaluate_result_{now}.png") + data_path = os.path.join( evaluate_dir, f"evaluate_feature_label_weighted_{now}.xlsx") feature_label_weighted.to_excel(data_path, index=False) @@ -259,7 +271,8 @@ async def evaluate_model(request: Request, features_list: List[Features]) -> API ), data_file={ "log_file_url": f"{request.base_url}evaluate_api/evaluate_log_{now}.log", - "data_file_url": f"{request.base_url}evaluate_api/evaluate_feature_label_weighted_{now}.xlsx" + "data_file_url": f"{request.base_url}evaluate_api/evaluate_feature_label_weighted_{now}.xlsx", + "evaluate_result_img_url": f"{request.base_url}evaluate_api/evaluate_model_{now}_evaluate_result.png" } ) diff --git a/utils/model_trainer.py b/utils/model_trainer.py index d1d3838..bb38684 100644 --- a/utils/model_trainer.py +++ b/utils/model_trainer.py @@ -19,6 +19,7 @@ from typing import Dict, Any, Tuple, List import pandas as pd import datetime import shutil +import copy # 屏蔽警告 warnings.filterwarnings("ignore", category=UserWarning, module="matplotlib.font_manager") @@ -146,6 +147,7 @@ class MLModel: l1_lambda = float(self.config['training']['regularization']['l1_lambda']) l2_lambda = float(self.config['training']['regularization']['l2_lambda']) + # 初始化指标记录字典 train_metrics = {'loss': [], 'acc': []} val_metrics = {'loss': [], 'acc': [], 'f1': [], 'precision': [], 'recall': []} @@ -156,20 +158,40 @@ class MLModel: optimizer, l1_lambda, l2_lambda) # 验证阶段 - val_loss, val_acc, val_f1 = self._validate_epoch(val_loader, criterion) + val_loss, val_acc, val_f1, val_precision, val_recall = self._validate_epoch(val_loader, criterion) # 更新学习率 scheduler.step() # 记录指标 - self._update_metrics(train_metrics, val_metrics, train_loss, train_acc, - val_loss, val_acc, val_f1, epoch) + train_metrics['loss'].append(train_loss) + train_metrics['acc'].append(train_acc) + val_metrics['loss'].append(val_loss) + val_metrics['acc'].append(val_acc) + val_metrics['f1'].append(val_f1) + val_metrics['precision'].append(val_precision) + val_metrics['recall'].append(val_recall) + + # 打印训练信息 + log_message = ( + f'Epoch {epoch+1:03d} | ' + f'Train Loss: {train_loss:.4f} | ' + f'Train Acc: {train_acc:.4f} | ' + f'Val Loss: {val_loss:.4f} | ' + f'Val Acc: {val_acc:.4f} | ' + f'Val F1: {val_f1:.4f} | ' + f'Val Precision: {val_precision:.4f} | ' + f'Val Recall: {val_recall:.4f}' + ) + logging.info(log_message) + print(log_message) # 更新最佳模型信息 if val_f1 > best_val_f1: best_val_f1 = val_f1 - best_model = self.model.state_dict() - best_epoch = epoch + 1 # 记录最佳epoch + # 创建模型状态的深拷贝 + best_model = copy.deepcopy(self.model.state_dict()) + best_epoch = epoch + 1 trigger_times = 0 else: trigger_times += 1 @@ -182,12 +204,15 @@ class MLModel: print(log_message) break + # 绘制训练过程图 + self._plot_training_process(train_metrics, val_metrics) + # 打印最佳模型信息 log_message = f'Training completed. Best model at epoch {best_epoch} with F1: {best_val_f1:.4f}' logging.info(log_message) print(log_message) - return best_val_f1, best_model, best_epoch # 返回最佳epoch + return best_val_f1, best_model, best_epoch def _train_epoch(self, train_loader: DataLoader, criterion: nn.Module, optimizer: torch.optim.Optimizer, l1_lambda: float, @@ -221,7 +246,7 @@ class MLModel: return train_loss / len(train_loader.dataset), train_acc.double() / len(train_loader.dataset) - def _validate_epoch(self, val_loader: DataLoader, criterion: nn.Module) -> Tuple[float, float, float]: + def _validate_epoch(self, val_loader: DataLoader, criterion: nn.Module) -> Tuple[float, float, float, float, float]: """验证一个epoch""" self.model.eval() val_loss = 0 @@ -241,10 +266,16 @@ class MLModel: all_preds.extend(preds.cpu().numpy()) all_targets.extend(targets.cpu().numpy()) + # 计算各项指标 val_f1 = f1_score(all_targets, all_preds, average='macro') + val_precision = precision_score(all_targets, all_preds, average='macro') + val_recall = recall_score(all_targets, all_preds, average='macro') + return (val_loss / len(val_loader.dataset), val_acc.double() / len(val_loader.dataset), - val_f1) + val_f1, + val_precision, + val_recall) def evaluate_model(self, features_data: np.ndarray, labels: np.ndarray, is_training: bool = False) -> Tuple: """ @@ -290,6 +321,40 @@ class MLModel: logging.info(log_message) print(log_message) + # 添加评估结果可视化 + plt.figure(figsize=(15, 5)) + + # 设置类别标签 + class_labels = [f'类别{i}' for i in range(len(precision))] + x = np.arange(len(class_labels)) + width = 0.35 # 柱状图的宽度 + + # 精确率柱状图 + plt.subplot(131) + plt.bar(x, precision, width) + plt.title('Precision(精确率)', fontproperties=self.font_prop) + plt.xticks(x, class_labels, fontproperties=self.font_prop) + plt.ylim(0, 1) + + # 召回率柱状图 + plt.subplot(132) + plt.bar(x, recall, width) + plt.title('Recall(召回率)', fontproperties=self.font_prop) + plt.xticks(x, class_labels, fontproperties=self.font_prop) + plt.ylim(0, 1) + + # F1分数柱状图 + plt.subplot(133) + plt.bar(x, f1, width) + plt.title('F1 Score(F1得分)', fontproperties=self.font_prop) + plt.xticks(x, class_labels, fontproperties=self.font_prop) + plt.ylim(0, 1) + + plt.tight_layout() + if 'evaluate_result_path' in self.config['paths']['model']: + plt.savefig(self.config['paths']['model']['evaluate_result_path']) + plt.close() + return np.mean(f1), wrong_percentage, precision.tolist(), recall.tolist(), f1.tolist() def inference_model(self, features_data: np.ndarray) -> List[int]: @@ -323,6 +388,54 @@ class MLModel: torch.save(self.model.state_dict(), path) + def _compute_class_weights(self, y_train: np.ndarray) -> torch.Tensor: + """计算类别权重""" + weight_config = self.config['training'].get('class_weights', {}) + balance_mode = weight_config.get('balance_mode', 'balanced') + normalize_mode = weight_config.get('weight_normalize', 'max') + + # 计算基础平衡权重 + if balance_mode != 'none': + balanced_weights = compute_class_weight( + balance_mode, + classes=np.unique(y_train), + y=y_train + ) + else: + balanced_weights = np.ones(len(np.unique(y_train))) + + # 获取召回率调节权重 + recall_adjustment = np.array( + weight_config.get('recall_weights', [1.0] * len(np.unique(y_train))) + ) + + # 归一化调节权重 + if normalize_mode == 'max': + normalized_adjustment = recall_adjustment / np.max(recall_adjustment) + elif normalize_mode == 'sum': + normalized_adjustment = recall_adjustment / np.sum(recall_adjustment) * len(recall_adjustment) + else: + normalized_adjustment = recall_adjustment + + # 计算最终权重 + final_weights = balanced_weights * normalized_adjustment + + # 记录权重计算过程 + log_message = ( + f"\nClass weights calculation:\n" + f"Balance mode: {balance_mode}\n" + f"Normalize mode: {normalize_mode}\n" + f"Class distribution: {np.bincount(y_train)}\n" + f"Balanced weights: {balanced_weights}\n" + f"Recall adjustment: {recall_adjustment}\n" + f"Normalized adjustment: {normalized_adjustment}\n" + f"Final weights: {final_weights}" + ) + logging.info(log_message) + print(log_message) + + return torch.tensor(final_weights, dtype=torch.float32).to(self.device) + def train_detect(self) -> Tuple[float, float, List[float], List[float], List[float]]: """ 训练和检测模型 @@ -376,13 +489,8 @@ class MLModel: batch_size=self.config['training']['batch_size'] ) - # 计算类别权重 - class_weights = torch.tensor( - compute_class_weight('balanced', - classes=np.unique(y_train), - y=y_train), - dtype=torch.float32 - ).to(self.device) + # 使用新的权重计算方法 + class_weights = self._compute_class_weights(y_train) if self.config['training']['experimental_mode']: return self._run_experiments(X_train, y_train, X_val, y_val, class_weights) @@ -430,6 +538,8 @@ class MLModel: best_experiment_num = -1 best_experiment_epoch = -1 best_model_path = None + best_train_process_path = None + best_evaluate_result_path = None base_model_path = self.config['train_model_path'] base_name = os.path.splitext(base_model_path)[0] @@ -440,8 +550,16 @@ class MLModel: logging.info(f"Starting experiment {exp_num}/{self.config['training']['experiments_count']}") print(f"Starting experiment {exp_num}/{self.config['training']['experiments_count']}") - # 为每次实验创建模型保存路径(基础名称+序号) + # 为每次实验创建模型和图片保存路径 exp_model_path = f"{base_name}_exp{exp_num}{ext}" + exp_train_process_path = exp_model_path.replace('.pth', '_training_process.png') + exp_evaluate_result_path = exp_model_path.replace('.pth', '_evaluate_result.png') + + # 更新配置中的路径 + self.config['paths']['model'].update({ + 'train_process': exp_train_process_path, + 'evaluate_result_path': exp_evaluate_result_path + }) results = self._single_train_detect(X_train, y_train, X_val, y_val, class_weights, exp_model_path) avg_f1, wrong_percentage, precision, recall, f1, best_epoch = results @@ -461,11 +579,32 @@ class MLModel: best_experiment_num = exp_num best_experiment_epoch = best_epoch best_model_path = exp_model_path + best_train_process_path = exp_train_process_path + best_evaluate_result_path = exp_evaluate_result_path + + # 复制最佳实验的文件到目标路径(不带exp序号的版本) + base_model_path = self.config['train_model_path'] + base_train_process_path = base_model_path.replace('.pth', '_training_process.png') + base_evaluate_result_path = base_model_path.replace('.pth', '_evaluate_result.png') - # 直接复制最佳实验的模型文件到不带序号的版本和目标路径 shutil.copyfile(best_model_path, base_model_path) + shutil.copyfile(best_train_process_path, base_train_process_path) + shutil.copyfile(best_evaluate_result_path, base_evaluate_result_path) + + # 更新配置中的路径为不带序号的版本 + self.config['paths']['model'].update({ + 'train_process': base_train_process_path, + 'evaluate_result_path': base_evaluate_result_path + }) + if self.config['training']['replace_model']: - shutil.copyfile(best_model_path, self.config['paths']['model']['train']) + target_model_path = self.config['paths']['model']['train'] + target_train_process_path = target_model_path.replace('.pth', '_training_process.png') + target_evaluate_result_path = target_model_path.replace('.pth', '_evaluate_result.png') + + shutil.copyfile(best_model_path, target_model_path) + shutil.copyfile(best_train_process_path, target_train_process_path) + shutil.copyfile(best_evaluate_result_path, target_evaluate_result_path) # 打印日志信息 log_message = ( @@ -522,6 +661,14 @@ class MLModel: self.config['training']['scheduler']['gamma'] ) + # 设置训练过程图的保存路径 + if model_path: + train_process_path = model_path.replace('.pth', '_training_process.png') + self.config['paths']['model']['train_process'] = train_process_path + else: + train_process_path = self.config['train_model_path'].replace('.pth', '_training_process.png') + self.config['paths']['model']['train_process'] = train_process_path + # 训练模型 best_val_f1, best_model, best_epoch = self.train_model( train_loader, val_loader, criterion, optimizer, scheduler @@ -547,22 +694,24 @@ class MLModel: # 损失曲线 plt.subplot(131) - plt.plot(train_metrics['loss'], label='Train Loss') - plt.plot(val_metrics['loss'], label='Val Loss') - plt.title('Loss', fontproperties=self.font_prop) + plt.plot(train_metrics['loss'], label='Train Loss(训练损失)') + plt.plot(val_metrics['loss'], label='Val Loss(验证损失)') + plt.title('Loss(损失)', fontproperties=self.font_prop) plt.legend(prop=self.font_prop) # 准确率曲线 plt.subplot(132) - plt.plot(train_metrics['acc'], label='Train Acc') - plt.plot(val_metrics['acc'], label='Val Acc') - plt.title('Accuracy', fontproperties=self.font_prop) + plt.plot(train_metrics['acc'], label='Train Accuracy(训练正确率)') + plt.plot(val_metrics['acc'], label='Val Accuracy(验证正确率)') + plt.title('Accuracy(正确率)', fontproperties=self.font_prop) plt.legend(prop=self.font_prop) - # F1分数曲线 + # F1分数、精确率和召回率曲线 plt.subplot(133) - plt.plot(val_metrics['f1'], label='Val F1') - plt.title('F1 Score', fontproperties=self.font_prop) + plt.plot(val_metrics['f1'], label='Validation F1(验证F1得分)') + plt.plot(val_metrics['precision'], label='Validation Precision(验证精确率)') + plt.plot(val_metrics['recall'], label='Validation Recall(验证召回率)') + plt.title('Metrics(评估指标)', fontproperties=self.font_prop) plt.legend(prop=self.font_prop) plt.tight_layout()