diff --git a/detect_num_mse.py b/detect_num_mse.py new file mode 100644 index 0000000..49a541a --- /dev/null +++ b/detect_num_mse.py @@ -0,0 +1,133 @@ +""" +文件名: detect_num.py + +推理部分代码 + +作者: 王春林 +创建日期: 2023年7月14日 +最后修改日期: 2023年7月18日 +版本号: 1.0.0 + +""" +import pandas as pd +import numpy as np +from sklearn.preprocessing import MinMaxScaler +import torch +from torch import nn +from torch.utils.data import DataLoader, TensorDataset + +# 检查GPU是否可用 +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +#device = torch.device("cpu") + +# 读取特征和标签 +data = pd.read_excel('feature_label.xlsx') + +# 获取编号列 +sample_ids = data['编号'].values + +# 以下是你的特征名 +feature_names = ["躯体化", "强迫症状", "人际关系敏感", "抑郁", "焦虑", "敌对", "恐怖", "偏执", "精神病性", "其他", "父亲教养方式数字化", "母亲教养方式数字化", "自评家庭经济条件数字化", "有无心理治疗(咨询)史数字化", "出勤情况数字化", "学业情况数字化", "权重数字化值"] + +# 将特征和标签分开,并做归一化处理 +X = data[feature_names].values +y = data['label'].values - 1 # 将标签从1-4转换为0-3 + +scaler = MinMaxScaler() +X = scaler.fit_transform(X) + +# 定义 MLP 网络 +class MLP(nn.Module): + def __init__(self): + super(MLP, self).__init__() + self.model = nn.Sequential( + nn.Linear(17, 32), # 输入层 + nn.ReLU(), # 激活函数 + nn.Linear(32, 128), # 隐藏层 + nn.ReLU(), # 激活函数 + nn.Linear(128, 32), # 隐藏层 + nn.ReLU(), # 激活函数 + nn.Linear(32, 1), # 输出层,1个类别 + ) + + def forward(self, x): + return self.model(x).squeeze() # 去除多余的维度 + +# 加载模型 +model = MLP().to(device) +model.load_state_dict(torch.load('model_fold5.pth')) +model.eval() + +# 创建数据加载器 +dataset = TensorDataset(torch.from_numpy(X).float().to(device), torch.from_numpy(y).long().to(device)) +loader = DataLoader(dataset, batch_size=32) + +# 推理 +corrects = 0 +sample_index = 0 +for inputs, targets in loader: + outputs = model(inputs) + print(outputs) + print(targets.data) + input() + # 使用阈值判断类别 + thresholds = [1/6, 1/2, 5/6] + preds = torch.tensor([sum(o.item() > t for t in thresholds) for o in torch.flatten(outputs)]).to(device) + corrects += torch.sum(preds == targets.data) + + # 打印每个样本的推理结果 + for i in range(len(inputs)): + print(f'Sample ID: {sample_ids[sample_index]} | Target: {targets[i]} | Prediction: {preds[i]} (-1 in excel)') + sample_index += 1 + +# 计算整体推理的正确率 +accuracy = corrects.double().cpu() / len(loader.dataset) +print(f'Overall Accuracy: {accuracy:.4f}') + +# ...(之前的代码) + +# 创建文件来存储预测结果为0和1的学号 +file_1st_warning = open("一级预警.txt", "w", encoding="utf-8") +file_2nd_warning = open("二级预警.txt", "w", encoding="utf-8") + +# 初始化每个类别的计数器 +class_counts = {0: 0, 1: 0, 2: 0, 3: 0} +class_corrects = {0: 0, 1: 0, 2: 0, 3: 0} + +# 进行推理 +corrects = 0 +sample_index = 0 +for inputs, targets in loader: + outputs = model(inputs) + # 使用阈值判断类别 + thresholds = [1/6, 1/2, 5/6] + preds = torch.tensor([sum(o.item() > t for t in thresholds) for o in torch.flatten(outputs)]).to(device) + corrects += torch.sum(preds == targets.data) + + # 记录预测结果为0和1的学号 + for i in range(len(inputs)): + if preds[i] == 0: + file_1st_warning.write(f"{sample_ids[sample_index]}\n") + elif preds[i] == 1: + file_2nd_warning.write(f"{sample_ids[sample_index]}\n") + sample_index += 1 + + # 更新每个类别的计数器和正确计数器 + for i in range(len(targets)): + class_counts[targets[i].item()] += 1 + if preds[i] == targets[i]: + class_corrects[targets[i].item()] += 1 + +# 关闭文件 +file_1st_warning.close() +file_2nd_warning.close() + +# 计算整体准确率 +accuracy = corrects.double().cpu() / len(loader.dataset) +print(f'整体准确率: {accuracy:.4f}') + +# 打印每个类别的信息 +for class_idx in range(4): + class_accuracy = class_corrects[class_idx] / class_counts[class_idx] + print(f'类别 {class_idx + 1} | 预测数量: {class_counts[class_idx]} | 准确率: {class_accuracy:.4f}') + diff --git a/train_gpu_blance_mse.py b/train_gpu_blance_mse.py new file mode 100644 index 0000000..fb459e2 --- /dev/null +++ b/train_gpu_blance_mse.py @@ -0,0 +1,157 @@ +""" +文件名: detect_num.py + +训练部分代码 + +作者: 王春林 +创建日期: 2023年7月13日 +最后修改日期: 2023年7月18日 +版本号: 1.0.0 + +""" +import pandas as pd +import numpy as np +from sklearn.preprocessing import MinMaxScaler +from sklearn.model_selection import StratifiedKFold +import torch +from torch import nn +from torch.utils.data import DataLoader, TensorDataset +import matplotlib.pyplot as plt +from sklearn.model_selection import KFold +from torchsummary import summary + +# 检查GPU是否可用 +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +# 读取特征和标签 +data = pd.read_excel('feature_label.xlsx') + +# 以下是你的特征名 +feature_names = ["躯体化", "强迫症状", "人际关系敏感", "抑郁", "焦虑", "敌对", "恐怖", "偏执", "精神病性", "其他", "父亲教养方式数字化", "母亲教养方式数字化", "自评家庭经济条件数字化", "有无心理治疗(咨询)史数字化", "出勤情况数字化", "学业情况数字化", "权重数字化值"] + +# 将特征和标签分开,并做归一化处理 +X = data[feature_names].values +y = (data['label'].values - 1) / 3 # 将标签从1-4转换为0-1 + +scaler = MinMaxScaler() +X = scaler.fit_transform(X) + +# 定义 MLP 网络 +class MLP(nn.Module): + def __init__(self): + super(MLP, self).__init__() + self.model = nn.Sequential( + nn.Linear(17, 32), # 输入层 + nn.ReLU(), # 激活函数 + nn.Linear(32, 128), # 隐藏层 + nn.ReLU(), # 激活函数 + nn.Linear(128, 32), # 隐藏层 + nn.ReLU(), # 激活函数 + nn.Linear(32, 1), # 输出层,1个类别 + ) + + def forward(self, x): + return self.model(x).squeeze() # 去除多余的维度 + +# 使用KFold而非StratifiedKFold +kfold = KFold(n_splits=5, shuffle=True) + +# 用于存储所有折的损失和准确率 +all_train_losses, all_val_losses, all_train_accs, all_val_accs = [], [], [], [] + +for fold, (train_index, test_index) in enumerate(kfold.split(X, y)): + X_train, X_val = X[train_index], X[test_index] + y_train, y_val = y[train_index], y[test_index] + + train_dataset = TensorDataset(torch.from_numpy(X_train).float().to(device), torch.from_numpy(y_train).float().to(device)) + val_dataset = TensorDataset(torch.from_numpy(X_val).float().to(device), torch.from_numpy(y_val).float().to(device)) + + + train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) + val_loader = DataLoader(val_dataset, batch_size=32) + + model = MLP().to(device) + # 查看模型网络结构 + # print(model) + summary(model.to(torch.device("cuda:0")), (1,17)) + criterion = nn.MSELoss() + optimizer = torch.optim.Adam(model.parameters()) + + n_epochs = 120 # 增加到150个epoch + train_losses, val_losses, train_accs, val_accs = [], [], [], [] + + # 存储每一折的模型和对应的验证准确率 + best_val_acc = 0.0 + best_model = None + + for epoch in range(n_epochs): + model.train() + running_loss, corrects = 0, 0 + for inputs, targets in train_loader: + optimizer.zero_grad() + outputs = model(inputs) + loss = criterion(outputs, targets) + loss.backward() + optimizer.step() + running_loss += loss.item() * inputs.size(0) + # 使用阈值判断类别 + thresholds = [1/6, 1/2, 5/6] + preds = torch.tensor([sum(o.item() > t for t in thresholds) for o in outputs]).to(device) + corrects += torch.sum(preds == (targets.data*3).long()) + + epoch_loss = running_loss / len(train_loader.dataset) + epoch_acc = corrects.double().cpu() / len(train_loader.dataset) + train_losses.append(epoch_loss) + train_accs.append(epoch_acc) + + print(f'Fold {fold+1}, Epoch {epoch+1} | Train Loss: {epoch_loss:.4f} | Train Accuracy: {epoch_acc:.4f}') + + model.eval() + running_loss, corrects = 0, 0 + with torch.no_grad(): + for inputs, targets in val_loader: + outputs = model(inputs) + loss = criterion(outputs, targets) + running_loss += loss.item() * inputs.size(0) + # 使用阈值判断类别 + thresholds = [1/6, 1/2, 5/6] + preds = torch.tensor([sum(o.item() > t for t in thresholds) for o in torch.flatten(outputs)]).to(device) + corrects += torch.sum(preds == (targets.data*3).long()) + + epoch_loss = running_loss / len(val_loader.dataset) + epoch_acc = corrects.double().cpu() / len(val_loader.dataset) + val_losses.append(epoch_loss) + val_accs.append(epoch_acc) + + print(f'Fold {fold+1}, Epoch {epoch+1} | Validation Loss: {epoch_loss:.4f} | Validation Accuracy: {epoch_acc:.4f}') + + # 保存最佳模型 + if epoch_acc > best_val_acc: + best_val_acc = epoch_acc + best_model = model.state_dict() + + # 保存每一折的最佳模型 + torch.save(best_model, f'model_fold{fold+1}.pth') + + all_train_losses.append(train_losses) + all_val_losses.append(val_losses) + all_train_accs.append(train_accs) + all_val_accs.append(val_accs) + +# 绘制所有折的平均损失和准确率曲线 +plt.figure(figsize=(12, 4)) +plt.subplot(1, 2, 1) +plt.plot(range(n_epochs), np.mean(all_train_losses, axis=0), label='Train Loss') +plt.plot(range(n_epochs), np.mean(all_val_losses, axis=0), label='Validation Loss') +plt.legend() +plt.title('Loss') + +plt.subplot(1, 2, 2) +plt.plot(range(n_epochs), np.mean(all_train_accs, axis=0), label='Train Accuracy') +plt.plot(range(n_epochs), np.mean(all_val_accs, axis=0), label='Validation Accuracy') +plt.legend() +plt.title('Accuracy') + +print(f'All Fold Average | Train Loss: {np.mean(all_train_losses, axis=0)[-1].item():.4f} | Train Accuracy: {np.mean(all_train_accs, axis=0)[-1].item():.4f} | Validation Loss: {np.mean(all_val_losses, axis=0)[-1].item():.4f} | Validation Accuracy: {np.mean(all_val_accs, axis=0)[-1].item():.4f}') + +plt.show()