训练和推理换成了回归0到1

3 years ago · be77533363
parent 9c7a560fbd
commit be77533363
2 changed files with 290 additions and 0 deletions
--- a/detect_num_mse.py
+++ b/detect_num_mse.py
@ -0,0 +1,133 @@
+"""
+文件名: detect_num.py
+
+推理部分代码
+
+作者: 王春林
+创建日期: 2023年7月14日
+最后修改日期: 2023年7月18日
+版本号: 1.0.0
+
+"""
+import pandas as pd
+import numpy as np
+from sklearn.preprocessing import MinMaxScaler
+import torch
+from torch import nn
+from torch.utils.data import DataLoader, TensorDataset
+
+# 检查GPU是否可用
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+#device = torch.device("cpu")
+
+# 读取特征和标签
+data = pd.read_excel('feature_label.xlsx')
+
+# 获取编号列
+sample_ids = data['编号'].values
+
+# 以下是你的特征名
+feature_names = ["躯体化", "强迫症状", "人际关系敏感", "抑郁", "焦虑", "敌对", "恐怖", "偏执", "精神病性", "其他", "父亲教养方式数字化", "母亲教养方式数字化", "自评家庭经济条件数字化", "有无心理治疗（咨询）史数字化", "出勤情况数字化", "学业情况数字化", "权重数字化值"]
+
+# 将特征和标签分开，并做归一化处理
+X = data[feature_names].values
+y = data['label'].values - 1  # 将标签从1-4转换为0-3
+
+scaler = MinMaxScaler()
+X = scaler.fit_transform(X)
+
+# 定义 MLP 网络
+class MLP(nn.Module):
+    def __init__(self):
+        super(MLP, self).__init__()
+        self.model = nn.Sequential(
+            nn.Linear(17, 32),  # 输入层
+            nn.ReLU(),  # 激活函数
+            nn.Linear(32, 128),  # 隐藏层
+            nn.ReLU(),  # 激活函数
+            nn.Linear(128, 32),  # 隐藏层
+            nn.ReLU(),  # 激活函数
+            nn.Linear(32, 1),  # 输出层，1个类别
+        )
+
+    def forward(self, x):
+        return self.model(x).squeeze()  # 去除多余的维度
+
+# 加载模型
+model = MLP().to(device)
+model.load_state_dict(torch.load('model_fold5.pth'))
+model.eval()
+
+# 创建数据加载器
+dataset = TensorDataset(torch.from_numpy(X).float().to(device), torch.from_numpy(y).long().to(device))
+loader = DataLoader(dataset, batch_size=32)
+
+# 推理
+corrects = 0
+sample_index = 0
+for inputs, targets in loader:
+    outputs = model(inputs)
+    print(outputs)
+    print(targets.data)
+    input()
+    # 使用阈值判断类别
+    thresholds = [1/6, 1/2, 5/6]
+    preds = torch.tensor([sum(o.item() > t for t in thresholds) for o in torch.flatten(outputs)]).to(device)
+    corrects += torch.sum(preds == targets.data)
+
+    # 打印每个样本的推理结果
+    for i in range(len(inputs)):
+        print(f'Sample ID: {sample_ids[sample_index]} | Target: {targets[i]} | Prediction: {preds[i]} (-1 in excel)')
+        sample_index += 1
+
+# 计算整体推理的正确率
+accuracy = corrects.double().cpu() / len(loader.dataset)
+print(f'Overall Accuracy: {accuracy:.4f}')
+
+# ...（之前的代码）
+
+# 创建文件来存储预测结果为0和1的学号
+file_1st_warning = open("一级预警.txt", "w", encoding="utf-8")
+file_2nd_warning = open("二级预警.txt", "w", encoding="utf-8")
+
+# 初始化每个类别的计数器
+class_counts = {0: 0, 1: 0, 2: 0, 3: 0}
+class_corrects = {0: 0, 1: 0, 2: 0, 3: 0}
+
+# 进行推理
+corrects = 0
+sample_index = 0
+for inputs, targets in loader:
+    outputs = model(inputs)
+    # 使用阈值判断类别
+    thresholds = [1/6, 1/2, 5/6]
+    preds = torch.tensor([sum(o.item() > t for t in thresholds) for o in torch.flatten(outputs)]).to(device)
+    corrects += torch.sum(preds == targets.data)
+
+    # 记录预测结果为0和1的学号
+    for i in range(len(inputs)):
+        if preds[i] == 0:
+            file_1st_warning.write(f"{sample_ids[sample_index]}\n")
+        elif preds[i] == 1:
+            file_2nd_warning.write(f"{sample_ids[sample_index]}\n")
+        sample_index += 1
+
+    # 更新每个类别的计数器和正确计数器
+    for i in range(len(targets)):
+        class_counts[targets[i].item()] += 1
+        if preds[i] == targets[i]:
+            class_corrects[targets[i].item()] += 1
+
+# 关闭文件
+file_1st_warning.close()
+file_2nd_warning.close()
+
+# 计算整体准确率
+accuracy = corrects.double().cpu() / len(loader.dataset)
+print(f'整体准确率: {accuracy:.4f}')
+
+# 打印每个类别的信息
+for class_idx in range(4):
+    class_accuracy = class_corrects[class_idx] / class_counts[class_idx]
+    print(f'类别 {class_idx + 1} | 预测数量: {class_counts[class_idx]} | 准确率: {class_accuracy:.4f}')
+
--- a/train_gpu_blance_mse.py
+++ b/train_gpu_blance_mse.py
@ -0,0 +1,157 @@
+"""
+文件名: detect_num.py
+
+训练部分代码
+
+作者: 王春林
+创建日期: 2023年7月13日
+最后修改日期: 2023年7月18日
+版本号: 1.0.0
+
+"""
+import pandas as pd
+import numpy as np
+from sklearn.preprocessing import MinMaxScaler
+from sklearn.model_selection import StratifiedKFold
+import torch
+from torch import nn
+from torch.utils.data import DataLoader, TensorDataset
+import matplotlib.pyplot as plt
+from sklearn.model_selection import KFold
+from torchsummary import summary
+
+# 检查GPU是否可用
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# 读取特征和标签
+data = pd.read_excel('feature_label.xlsx')
+
+# 以下是你的特征名
+feature_names = ["躯体化", "强迫症状", "人际关系敏感", "抑郁", "焦虑", "敌对", "恐怖", "偏执", "精神病性", "其他", "父亲教养方式数字化", "母亲教养方式数字化", "自评家庭经济条件数字化", "有无心理治疗（咨询）史数字化", "出勤情况数字化", "学业情况数字化", "权重数字化值"]
+
+# 将特征和标签分开，并做归一化处理
+X = data[feature_names].values
+y = (data['label'].values - 1) / 3  # 将标签从1-4转换为0-1
+
+scaler = MinMaxScaler()
+X = scaler.fit_transform(X)
+
+# 定义 MLP 网络
+class MLP(nn.Module):
+    def __init__(self):
+        super(MLP, self).__init__()
+        self.model = nn.Sequential(
+            nn.Linear(17, 32),  # 输入层
+            nn.ReLU(),  # 激活函数
+            nn.Linear(32, 128),  # 隐藏层
+            nn.ReLU(),  # 激活函数
+            nn.Linear(128, 32),  # 隐藏层
+            nn.ReLU(),  # 激活函数
+            nn.Linear(32, 1),  # 输出层，1个类别
+        )
+
+    def forward(self, x):
+        return self.model(x).squeeze()  # 去除多余的维度
+
+# 使用KFold而非StratifiedKFold
+kfold = KFold(n_splits=5, shuffle=True)
+
+# 用于存储所有折的损失和准确率
+all_train_losses, all_val_losses, all_train_accs, all_val_accs = [], [], [], []
+
+for fold, (train_index, test_index) in enumerate(kfold.split(X, y)):
+    X_train, X_val = X[train_index], X[test_index]
+    y_train, y_val = y[train_index], y[test_index]
+
+    train_dataset = TensorDataset(torch.from_numpy(X_train).float().to(device), torch.from_numpy(y_train).float().to(device))
+    val_dataset = TensorDataset(torch.from_numpy(X_val).float().to(device), torch.from_numpy(y_val).float().to(device))
+
+
+    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
+    val_loader = DataLoader(val_dataset, batch_size=32)
+
+    model = MLP().to(device)
+    # 查看模型网络结构
+    # print(model)
+    summary(model.to(torch.device("cuda:0")), (1,17))
+    criterion = nn.MSELoss()
+    optimizer = torch.optim.Adam(model.parameters())
+
+    n_epochs = 120  # 增加到150个epoch
+    train_losses, val_losses, train_accs, val_accs = [], [], [], []
+
+    # 存储每一折的模型和对应的验证准确率
+    best_val_acc = 0.0
+    best_model = None
+
+    for epoch in range(n_epochs):
+        model.train()
+        running_loss, corrects = 0, 0
+        for inputs, targets in train_loader:
+            optimizer.zero_grad()
+            outputs = model(inputs)
+            loss = criterion(outputs, targets)
+            loss.backward()
+            optimizer.step()
+            running_loss += loss.item() * inputs.size(0)
+            # 使用阈值判断类别
+            thresholds = [1/6, 1/2, 5/6]
+            preds = torch.tensor([sum(o.item() > t for t in thresholds) for o in outputs]).to(device)
+            corrects += torch.sum(preds == (targets.data*3).long())
+
+        epoch_loss = running_loss / len(train_loader.dataset)
+        epoch_acc = corrects.double().cpu() / len(train_loader.dataset)
+        train_losses.append(epoch_loss)
+        train_accs.append(epoch_acc)
+
+        print(f'Fold {fold+1}, Epoch {epoch+1} | Train Loss: {epoch_loss:.4f} | Train Accuracy: {epoch_acc:.4f}')
+
+        model.eval()
+        running_loss, corrects = 0, 0
+        with torch.no_grad():
+            for inputs, targets in val_loader:
+                outputs = model(inputs)
+                loss = criterion(outputs, targets)
+                running_loss += loss.item() * inputs.size(0)
+                # 使用阈值判断类别
+                thresholds = [1/6, 1/2, 5/6]
+                preds = torch.tensor([sum(o.item() > t for t in thresholds) for o in torch.flatten(outputs)]).to(device)
+                corrects += torch.sum(preds == (targets.data*3).long())
+
+        epoch_loss = running_loss / len(val_loader.dataset)
+        epoch_acc = corrects.double().cpu() / len(val_loader.dataset)
+        val_losses.append(epoch_loss)
+        val_accs.append(epoch_acc)
+
+        print(f'Fold {fold+1}, Epoch {epoch+1} | Validation Loss: {epoch_loss:.4f} | Validation Accuracy: {epoch_acc:.4f}')
+
+        # 保存最佳模型
+        if epoch_acc > best_val_acc:
+            best_val_acc = epoch_acc
+            best_model = model.state_dict()
+
+    # 保存每一折的最佳模型
+    torch.save(best_model, f'model_fold{fold+1}.pth')
+
+    all_train_losses.append(train_losses)
+    all_val_losses.append(val_losses)
+    all_train_accs.append(train_accs)
+    all_val_accs.append(val_accs)
+
+# 绘制所有折的平均损失和准确率曲线
+plt.figure(figsize=(12, 4))
+plt.subplot(1, 2, 1)
+plt.plot(range(n_epochs), np.mean(all_train_losses, axis=0), label='Train Loss')
+plt.plot(range(n_epochs), np.mean(all_val_losses, axis=0), label='Validation Loss')
+plt.legend()
+plt.title('Loss')
+
+plt.subplot(1, 2, 2)
+plt.plot(range(n_epochs), np.mean(all_train_accs, axis=0), label='Train Accuracy')
+plt.plot(range(n_epochs), np.mean(all_val_accs, axis=0), label='Validation Accuracy')
+plt.legend()
+plt.title('Accuracy')
+
+print(f'All Fold Average | Train Loss: {np.mean(all_train_losses, axis=0)[-1].item():.4f} | Train Accuracy: {np.mean(all_train_accs, axis=0)[-1].item():.4f} | Validation Loss: {np.mean(all_val_losses, axis=0)[-1].item():.4f} | Validation Accuracy: {np.mean(all_val_accs, axis=0)[-1].item():.4f}')
+
+plt.show()