1217,recall weights

1 year ago · ab399288ec
parent 9e6f84b449
commit ab399288ec
3 changed files with 197 additions and 30 deletions
--- a/config/config.yaml
+++ b/config/config.yaml
@ -38,12 +38,17 @@ training:
  experiments_count: 10
  replace_model: true
  data_mode: "train_val"  # 可选: "train", "train_val", "all"
+  class_weights:
+    balance_mode: "balanced"  # 可选: "balanced", "balanced_subsample", "none"
+    # recall_weights: [1.4, 1.5, 1.1, 0.5]  # 召回率调节权重 值更大则更容易检测出来
+    recall_weights: [1, 1, 1, 1]  # 召回率调节权重 整体平衡 整体错误率更低
+    weight_normalize: "max"    # 可选: "max", "sum", "none"

 #---模型配置---#
 model:
  num_classes: 4  # nc
  input_dim: 10
-  architecture: "transformer"  # 可选: "mlp", "transformer"
+  architecture: "mlp"  # 可选: "mlp", "transformer"
  mlp:
    layers:
      - output_dim: 32
--- a/psy_api.py
+++ b/psy_api.py
@ -162,6 +162,12 @@ async def train_model(request: Request, features_list: List[Features]) -> APIRes
    train_dir = os.path.abspath(os.path.join(
        os.path.dirname(__file__), "train_api"))
    
+    # 更新配置中的路径
+    api_config.config['paths']['model'].update({
+        'train_process': os.path.join(train_dir, f"train_process_{now}.png"),
+        'evaluate_result_path': os.path.join(train_dir, f"evaluate_result_{now}.png")
+    })
+    
    # 保存训练数据
    data_path = os.path.join(train_dir, f"train_feature_label_weighted_{now}.xlsx")
    feature_label_weighted.to_excel(data_path, index=False)
@ -204,7 +210,9 @@ async def train_model(request: Request, features_list: List[Features]) -> APIRes
        data_file={
            "model_file_url": f"{request.base_url}train_api/train_model_{now}.pth",
            "log_file_url": f"{request.base_url}train_api/train_log_{now}.log",
-            "data_file_url": f"{request.base_url}train_api/train_feature_label_weighted_{now}.xlsx"
+            "data_file_url": f"{request.base_url}train_api/train_feature_label_weighted_{now}.xlsx",
+            "train_process_img_url": f"{request.base_url}train_api/train_model_{now}_training_process.png",
+            "evaluate_result_img_url": f"{request.base_url}train_api/train_model_{now}_evaluate_result.png"
        }
    )

@ -234,6 +242,10 @@ async def evaluate_model(request: Request, features_list: List[Features]) -> API
    evaluate_dir = os.path.abspath(os.path.join(
        os.path.dirname(__file__), "evaluate_api"))
    
+    # 更新配置中的评估结果图路径
+    api_config.config['paths']['model']['evaluate_result_path'] = os.path.join(
+        evaluate_dir, f"evaluate_result_{now}.png")
+    
    data_path = os.path.join(
        evaluate_dir, f"evaluate_feature_label_weighted_{now}.xlsx")
    feature_label_weighted.to_excel(data_path, index=False)
@ -259,7 +271,8 @@ async def evaluate_model(request: Request, features_list: List[Features]) -> API
        ),
        data_file={
            "log_file_url": f"{request.base_url}evaluate_api/evaluate_log_{now}.log",
-            "data_file_url": f"{request.base_url}evaluate_api/evaluate_feature_label_weighted_{now}.xlsx"
+            "data_file_url": f"{request.base_url}evaluate_api/evaluate_feature_label_weighted_{now}.xlsx",
+            "evaluate_result_img_url": f"{request.base_url}evaluate_api/evaluate_model_{now}_evaluate_result.png"
        }
    )

--- a/utils/model_trainer.py
+++ b/utils/model_trainer.py
@ -19,6 +19,7 @@ from typing import Dict, Any, Tuple, List
 import pandas as pd
 import datetime
 import shutil
+import copy

 # 屏蔽警告
 warnings.filterwarnings("ignore", category=UserWarning, module="matplotlib.font_manager")
@ -146,6 +147,7 @@ class MLModel:
        l1_lambda = float(self.config['training']['regularization']['l1_lambda'])
        l2_lambda = float(self.config['training']['regularization']['l2_lambda'])

+        # 初始化指标记录字典
        train_metrics = {'loss': [], 'acc': []}
        val_metrics = {'loss': [], 'acc': [], 'f1': [], 'precision': [], 'recall': []}

@ -156,20 +158,40 @@ class MLModel:
                                                    optimizer, l1_lambda, l2_lambda)
            
            # 验证阶段
-            val_loss, val_acc, val_f1 = self._validate_epoch(val_loader, criterion)
+            val_loss, val_acc, val_f1, val_precision, val_recall = self._validate_epoch(val_loader, criterion)
            
            # 更新学习率
            scheduler.step()
            
            # 记录指标
-            self._update_metrics(train_metrics, val_metrics, train_loss, train_acc, 
-                               val_loss, val_acc, val_f1, epoch)
+            train_metrics['loss'].append(train_loss)
+            train_metrics['acc'].append(train_acc)
+            val_metrics['loss'].append(val_loss)
+            val_metrics['acc'].append(val_acc)
+            val_metrics['f1'].append(val_f1)
+            val_metrics['precision'].append(val_precision)
+            val_metrics['recall'].append(val_recall)
+            
+            # 打印训练信息
+            log_message = (
+                f'Epoch {epoch+1:03d} | '
+                f'Train Loss: {train_loss:.4f} | '
+                f'Train Acc: {train_acc:.4f} | '
+                f'Val Loss: {val_loss:.4f} | '
+                f'Val Acc: {val_acc:.4f} | '
+                f'Val F1: {val_f1:.4f} | '
+                f'Val Precision: {val_precision:.4f} | '
+                f'Val Recall: {val_recall:.4f}'
+            )
+            logging.info(log_message)
+            print(log_message)
            
            # 更新最佳模型信息
            if val_f1 > best_val_f1:
                best_val_f1 = val_f1
-                best_model = self.model.state_dict()
-                best_epoch = epoch + 1  # 记录最佳epoch
+                # 创建模型状态的深拷贝
+                best_model = copy.deepcopy(self.model.state_dict())
+                best_epoch = epoch + 1
                trigger_times = 0
            else:
                trigger_times += 1
@ -182,12 +204,15 @@ class MLModel:
                    print(log_message)
                    break

+        # 绘制训练过程图
+        self._plot_training_process(train_metrics, val_metrics)
+        
        # 打印最佳模型信息
        log_message = f'Training completed. Best model at epoch {best_epoch} with F1: {best_val_f1:.4f}'
        logging.info(log_message)
        print(log_message)

-        return best_val_f1, best_model, best_epoch  # 返回最佳epoch
+        return best_val_f1, best_model, best_epoch

    def _train_epoch(self, train_loader: DataLoader, criterion: nn.Module, 
                    optimizer: torch.optim.Optimizer, l1_lambda: float, 
@ -221,7 +246,7 @@ class MLModel:
            
        return train_loss / len(train_loader.dataset), train_acc.double() / len(train_loader.dataset)

-    def _validate_epoch(self, val_loader: DataLoader, criterion: nn.Module) -> Tuple[float, float, float]:
+    def _validate_epoch(self, val_loader: DataLoader, criterion: nn.Module) -> Tuple[float, float, float, float, float]:
        """验证一个epoch"""
        self.model.eval()
        val_loss = 0
@ -241,10 +266,16 @@ class MLModel:
                all_preds.extend(preds.cpu().numpy())
                all_targets.extend(targets.cpu().numpy())
        
+        # 计算各项指标
        val_f1 = f1_score(all_targets, all_preds, average='macro')
+        val_precision = precision_score(all_targets, all_preds, average='macro')
+        val_recall = recall_score(all_targets, all_preds, average='macro')
+        
        return (val_loss / len(val_loader.dataset), 
                val_acc.double() / len(val_loader.dataset), 
-                val_f1)
+                val_f1,
+                val_precision,
+                val_recall)

    def evaluate_model(self, features_data: np.ndarray, labels: np.ndarray, is_training: bool = False) -> Tuple:
        """
@ -290,6 +321,40 @@ class MLModel:
        logging.info(log_message)
        print(log_message)
        
+        # 添加评估结果可视化
+        plt.figure(figsize=(15, 5))
+        
+        # 设置类别标签
+        class_labels = [f'类别{i}' for i in range(len(precision))]
+        x = np.arange(len(class_labels))
+        width = 0.35  # 柱状图的宽度
+        
+        # 精确率柱状图
+        plt.subplot(131)
+        plt.bar(x, precision, width)
+        plt.title('Precision(精确率)', fontproperties=self.font_prop)
+        plt.xticks(x, class_labels, fontproperties=self.font_prop)
+        plt.ylim(0, 1)
+        
+        # 召回率柱状图
+        plt.subplot(132)
+        plt.bar(x, recall, width)
+        plt.title('Recall(召回率)', fontproperties=self.font_prop)
+        plt.xticks(x, class_labels, fontproperties=self.font_prop)
+        plt.ylim(0, 1)
+        
+        # F1分数柱状图
+        plt.subplot(133)
+        plt.bar(x, f1, width)
+        plt.title('F1 Score(F1得分)', fontproperties=self.font_prop)
+        plt.xticks(x, class_labels, fontproperties=self.font_prop)
+        plt.ylim(0, 1)
+        
+        plt.tight_layout()
+        if 'evaluate_result_path' in self.config['paths']['model']:
+            plt.savefig(self.config['paths']['model']['evaluate_result_path'])
+        plt.close()
+        
        return np.mean(f1), wrong_percentage, precision.tolist(), recall.tolist(), f1.tolist()

    def inference_model(self, features_data: np.ndarray) -> List[int]:
@ -323,6 +388,54 @@ class MLModel:
        
        torch.save(self.model.state_dict(), path)

+    def _compute_class_weights(self, y_train: np.ndarray) -> torch.Tensor:
+        """计算类别权重"""
+        weight_config = self.config['training'].get('class_weights', {})
+        balance_mode = weight_config.get('balance_mode', 'balanced')
+        normalize_mode = weight_config.get('weight_normalize', 'max')
+        
+        # 计算基础平衡权重
+        if balance_mode != 'none':
+            balanced_weights = compute_class_weight(
+                balance_mode, 
+                classes=np.unique(y_train), 
+                y=y_train
+            )
+        else:
+            balanced_weights = np.ones(len(np.unique(y_train)))
+        
+        # 获取召回率调节权重
+        recall_adjustment = np.array(
+            weight_config.get('recall_weights', [1.0] * len(np.unique(y_train)))
+        )
+        
+        # 归一化调节权重
+        if normalize_mode == 'max':
+            normalized_adjustment = recall_adjustment / np.max(recall_adjustment)
+        elif normalize_mode == 'sum':
+            normalized_adjustment = recall_adjustment / np.sum(recall_adjustment) * len(recall_adjustment)
+        else:
+            normalized_adjustment = recall_adjustment
+        
+        # 计算最终权重
+        final_weights = balanced_weights * normalized_adjustment
+        
+        # 记录权重计算过程
+        log_message = (
+            f"\nClass weights calculation:\n"
+            f"Balance mode: {balance_mode}\n"
+            f"Normalize mode: {normalize_mode}\n"
+            f"Class distribution: {np.bincount(y_train)}\n"
+            f"Balanced weights: {balanced_weights}\n"
+            f"Recall adjustment: {recall_adjustment}\n"
+            f"Normalized adjustment: {normalized_adjustment}\n"
+            f"Final weights: {final_weights}"
+        )
+        logging.info(log_message)
+        print(log_message)
+        
+        return torch.tensor(final_weights, dtype=torch.float32).to(self.device)
+
    def train_detect(self) -> Tuple[float, float, List[float], List[float], List[float]]:
        """
        训练和检测模型
@ -376,13 +489,8 @@ class MLModel:
            batch_size=self.config['training']['batch_size']
        )
        
-        # 计算类别权重
-        class_weights = torch.tensor(
-            compute_class_weight('balanced', 
-                               classes=np.unique(y_train), 
-                               y=y_train),
-            dtype=torch.float32
-        ).to(self.device)
+        # 使用新的权重计算方法
+        class_weights = self._compute_class_weights(y_train)
        
        if self.config['training']['experimental_mode']:
            return self._run_experiments(X_train, y_train, X_val, y_val, class_weights)
@ -430,6 +538,8 @@ class MLModel:
        best_experiment_num = -1
        best_experiment_epoch = -1
        best_model_path = None
+        best_train_process_path = None
+        best_evaluate_result_path = None
        
        base_model_path = self.config['train_model_path']
        base_name = os.path.splitext(base_model_path)[0]
@ -440,8 +550,16 @@ class MLModel:
            logging.info(f"Starting experiment {exp_num}/{self.config['training']['experiments_count']}")
            print(f"Starting experiment {exp_num}/{self.config['training']['experiments_count']}")
            
-            # 为每次实验创建模型保存路径（基础名称+序号）
+            # 为每次实验创建模型和图片保存路径
            exp_model_path = f"{base_name}_exp{exp_num}{ext}"
+            exp_train_process_path = exp_model_path.replace('.pth', '_training_process.png')
+            exp_evaluate_result_path = exp_model_path.replace('.pth', '_evaluate_result.png')
+            
+            # 更新配置中的路径
+            self.config['paths']['model'].update({
+                'train_process': exp_train_process_path,
+                'evaluate_result_path': exp_evaluate_result_path
+            })
            
            results = self._single_train_detect(X_train, y_train, X_val, y_val, class_weights, exp_model_path)
            avg_f1, wrong_percentage, precision, recall, f1, best_epoch = results
@ -461,11 +579,32 @@ class MLModel:
                best_experiment_num = exp_num
                best_experiment_epoch = best_epoch
                best_model_path = exp_model_path
+                best_train_process_path = exp_train_process_path
+                best_evaluate_result_path = exp_evaluate_result_path
+        
+        # 复制最佳实验的文件到目标路径（不带exp序号的版本）
+        base_model_path = self.config['train_model_path']
+        base_train_process_path = base_model_path.replace('.pth', '_training_process.png')
+        base_evaluate_result_path = base_model_path.replace('.pth', '_evaluate_result.png')
        
-        # 直接复制最佳实验的模型文件到不带序号的版本和目标路径
        shutil.copyfile(best_model_path, base_model_path)
+        shutil.copyfile(best_train_process_path, base_train_process_path)
+        shutil.copyfile(best_evaluate_result_path, base_evaluate_result_path)
+        
+        # 更新配置中的路径为不带序号的版本
+        self.config['paths']['model'].update({
+            'train_process': base_train_process_path,
+            'evaluate_result_path': base_evaluate_result_path
+        })
+        
        if self.config['training']['replace_model']:
-            shutil.copyfile(best_model_path, self.config['paths']['model']['train'])
+            target_model_path = self.config['paths']['model']['train']
+            target_train_process_path = target_model_path.replace('.pth', '_training_process.png')
+            target_evaluate_result_path = target_model_path.replace('.pth', '_evaluate_result.png')
+            
+            shutil.copyfile(best_model_path, target_model_path)
+            shutil.copyfile(best_train_process_path, target_train_process_path)
+            shutil.copyfile(best_evaluate_result_path, target_evaluate_result_path)
        
        # 打印日志信息
        log_message = (
@ -522,6 +661,14 @@ class MLModel:
            self.config['training']['scheduler']['gamma']
        )
        
+        # 设置训练过程图的保存路径
+        if model_path:
+            train_process_path = model_path.replace('.pth', '_training_process.png')
+            self.config['paths']['model']['train_process'] = train_process_path
+        else:
+            train_process_path = self.config['train_model_path'].replace('.pth', '_training_process.png')
+            self.config['paths']['model']['train_process'] = train_process_path
+        
        # 训练模型
        best_val_f1, best_model, best_epoch = self.train_model(
            train_loader, val_loader, criterion, optimizer, scheduler
@ -547,22 +694,24 @@ class MLModel:
        
        # 损失曲线
        plt.subplot(131)
-        plt.plot(train_metrics['loss'], label='Train Loss')
-        plt.plot(val_metrics['loss'], label='Val Loss')
-        plt.title('Loss', fontproperties=self.font_prop)
+        plt.plot(train_metrics['loss'], label='Train Loss(训练损失)')
+        plt.plot(val_metrics['loss'], label='Val Loss(验证损失)')
+        plt.title('Loss(损失)', fontproperties=self.font_prop)
        plt.legend(prop=self.font_prop)
        
        # 准确率曲线
        plt.subplot(132)
-        plt.plot(train_metrics['acc'], label='Train Acc')
-        plt.plot(val_metrics['acc'], label='Val Acc')
-        plt.title('Accuracy', fontproperties=self.font_prop)
+        plt.plot(train_metrics['acc'], label='Train Accuracy(训练正确率)')
+        plt.plot(val_metrics['acc'], label='Val Accuracy(验证正确率)')
+        plt.title('Accuracy(正确率)', fontproperties=self.font_prop)
        plt.legend(prop=self.font_prop)
        
-        # F1分数曲线
+        # F1分数、精确率和召回率曲线
        plt.subplot(133)
-        plt.plot(val_metrics['f1'], label='Val F1')
-        plt.title('F1 Score', fontproperties=self.font_prop)
+        plt.plot(val_metrics['f1'], label='Validation F1(验证F1得分)')
+        plt.plot(val_metrics['precision'], label='Validation Precision(验证精确率)')
+        plt.plot(val_metrics['recall'], label='Validation Recall(验证召回率)')
+        plt.title('Metrics(评估指标)', fontproperties=self.font_prop)
        plt.legend(prop=self.font_prop)
        
        plt.tight_layout()