diff --git a/add_label_weight.py b/add_label_weight.py new file mode 100644 index 0000000..999f7c9 --- /dev/null +++ b/add_label_weight.py @@ -0,0 +1,27 @@ +import pandas as pd + +# 读取特征 +features = pd.read_excel('feature.xlsx') + +# 计算权重数字化值 +features['权重数字化值'] = features['强迫症状数字化'] * 0.135 + features['人际关系敏感数字化'] * 0.085 + features['抑郁数字化'] * 0.08 + features['多因子症状'] * 0.2 + features['母亲教养方式数字化'] * 0.09 + features['父亲教养方式数字化'] * 0.09 + features['自评家庭经济条件数字化'] * 0.06 + features['有无心理治疗(咨询)史数字化'] * 0.06 + features['学业情况数字化'] * 0.08 + features['出勤情况数字化'] * 0.12 + +# 定义SCL-90的10个因子 +scl_90_factors = ["躯体化", "强迫症状", "人际关系敏感", "抑郁", "焦虑", "敌对", "恐怖", "偏执", "精神病性", "其他"] + +# 计算预警等级 +def calculate_warning_level(row): + factors = row[scl_90_factors] + if (factors >= 4).sum() >= 1 or (factors >= 3).sum() >= 8: + return 1 + elif (factors >= 3).sum() >= 1: + return 2 + elif (factors >= 2).sum() >= 1: + return 3 + else: + return 4 + +features['label'] = features.apply(calculate_warning_level, axis=1) + +# 保存带有预警等级的数据 +features.to_excel('feature_label.xlsx', index=False) diff --git a/detect.py b/detect.py new file mode 100644 index 0000000..678be40 --- /dev/null +++ b/detect.py @@ -0,0 +1,63 @@ +import pandas as pd +import numpy as np +from sklearn.preprocessing import MinMaxScaler +import torch +from torch import nn +from torch.utils.data import DataLoader, TensorDataset + +# 检查GPU是否可用 +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +# 读取特征和标签 +data = pd.read_excel('feature_label.xlsx') + +# 以下是你的特征名 +feature_names = ["躯体化", "强迫症状", "人际关系敏感", "抑郁", "焦虑", "敌对", "恐怖", "偏执", "精神病性", "其他", "父亲教养方式数字化", "母亲教养方式数字化", "自评家庭经济条件数字化", "有无心理治疗(咨询)史数字化", "出勤情况数字化", "学业情况数字化", "权重数字化值"] + +# 将特征和标签分开,并做归一化处理 +X = data[feature_names].values +y = data['label'].values - 1 # 将标签从1-4转换为0-3 + +scaler = MinMaxScaler() +X = scaler.fit_transform(X) + +# 定义 MLP 网络 +class MLP(nn.Module): + def __init__(self): + super(MLP, self).__init__() + self.model = nn.Sequential( + nn.Linear(17, 32), # 输入层 + nn.ReLU(), # 激活函数 + nn.Linear(32, 128), # 隐藏层 + nn.ReLU(), # 激活函数 + nn.Linear(128, 32), # 隐藏层 + nn.ReLU(), # 激活函数 + nn.Linear(32, 4), # 输出层,4个类别 + ) + + def forward(self, x): + return self.model(x) + +# 加载模型 +model = MLP().to(device) +model.load_state_dict(torch.load('model_5.pth')) +model.eval() + +# 创建数据加载器 +dataset = TensorDataset(torch.from_numpy(X).float().to(device), torch.from_numpy(y).long().to(device)) +loader = DataLoader(dataset, batch_size=32) + +# 推理 +corrects = 0 +for inputs, targets in loader: + outputs = model(inputs) + _, preds = torch.max(outputs, 1) + corrects += torch.sum(preds == targets.data) + + # 打印每个样本的推理结果 + for i in range(len(inputs)): + print(f'Sample {i+1} | Target: {targets[i]} | Prediction: {preds[i]}') + +# 计算整体推理的正确率 +accuracy = corrects.double().cpu() / len(loader.dataset) +print(f'Overall Accuracy: {accuracy:.4f}') diff --git a/train_gpu.py b/train_gpu.py new file mode 100644 index 0000000..565a148 --- /dev/null +++ b/train_gpu.py @@ -0,0 +1,124 @@ +import pandas as pd +import numpy as np +from sklearn.preprocessing import MinMaxScaler +from sklearn.model_selection import StratifiedKFold +import torch +from torch import nn +from torch.utils.data import DataLoader, TensorDataset +import matplotlib.pyplot as plt + +# 检查GPU是否可用 +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +# 读取特征和标签 +data = pd.read_excel('feature_label.xlsx') + +# 以下是你的特征名 +feature_names = ["躯体化", "强迫症状", "人际关系敏感", "抑郁", "焦虑", "敌对", "恐怖", "偏执", "精神病性", "其他", "父亲教养方式数字化", "母亲教养方式数字化", "自评家庭经济条件数字化", "有无心理治疗(咨询)史数字化", "出勤情况数字化", "学业情况数字化", "权重数字化值"] + +# 将特征和标签分开,并做归一化处理 +X = data[feature_names].values +y = data['label'].values - 1 # 将标签从1-4转换为0-3 + +scaler = MinMaxScaler() +X = scaler.fit_transform(X) + +# 定义 MLP 网络 +class MLP(nn.Module): + def __init__(self): + super(MLP, self).__init__() + self.model = nn.Sequential( + nn.Linear(17, 32), # 输入层 + nn.ReLU(), # 激活函数 + nn.Linear(32, 128), # 隐藏层 + nn.ReLU(), # 激活函数 + nn.Linear(128, 32), # 隐藏层 + nn.ReLU(), # 激活函数 + nn.Linear(32, 4), # 输出层,4个类别 + ) + + def forward(self, x): + return self.model(x) + +# 使用5折交叉验证 +skf = StratifiedKFold(n_splits=5, shuffle=True) + +# 用于存储所有折的损失和准确率 +all_train_losses, all_val_losses, all_train_accs, all_val_accs = [], [], [], [] + +for fold, (train_index, test_index) in enumerate(skf.split(X, y)): + X_train, X_val = X[train_index], X[test_index] + y_train, y_val = y[train_index], y[test_index] + + train_dataset = TensorDataset(torch.from_numpy(X_train).float().to(device), torch.from_numpy(y_train).long().to(device)) + val_dataset = TensorDataset(torch.from_numpy(X_val).float().to(device), torch.from_numpy(y_val).long().to(device)) + + train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) + val_loader = DataLoader(val_dataset, batch_size=32) + + model = MLP().to(device) + criterion = nn.CrossEntropyLoss() + optimizer = torch.optim.Adam(model.parameters()) + + n_epochs = 120 # 增加到150个epoch + train_losses, val_losses, train_accs, val_accs = [], [], [], [] + + for epoch in range(n_epochs): + model.train() + running_loss, corrects = 0, 0 + for inputs, targets in train_loader: + optimizer.zero_grad() + outputs = model(inputs) + loss = criterion(outputs, targets) + loss.backward() + optimizer.step() + running_loss += loss.item() * inputs.size(0) + _, preds = torch.max(outputs, 1) + corrects += torch.sum(preds == targets.data) + + epoch_loss = running_loss / len(train_loader.dataset) + epoch_acc = corrects.double().cpu() / len(train_loader.dataset) + train_losses.append(epoch_loss) + train_accs.append(epoch_acc) + + print(f'Fold {fold+1}, Epoch {epoch+1} | Train Loss: {epoch_loss:.4f} | Train Accuracy: {epoch_acc:.4f}') + + model.eval() + running_loss, corrects = 0, 0 + with torch.no_grad(): + for inputs, targets in val_loader: + outputs = model(inputs) + loss = criterion(outputs, targets) + running_loss += loss.item() * inputs.size(0) + _, preds = torch.max(outputs, 1) + corrects += torch.sum(preds == targets.data) + + epoch_loss = running_loss / len(val_loader.dataset) + epoch_acc = corrects.double().cpu() / len(val_loader.dataset) + val_losses.append(epoch_loss) + val_accs.append(epoch_acc) + + print(f'Fold {fold+1}, Epoch {epoch+1} | Validation Loss: {epoch_loss:.4f} | Validation Accuracy: {epoch_acc:.4f}') + + all_train_losses.append(train_losses) + all_val_losses.append(val_losses) + all_train_accs.append(train_accs) + all_val_accs.append(val_accs) + +# 绘制所有折的平均损失和准确率曲线 +plt.figure(figsize=(12, 4)) +plt.subplot(1, 2, 1) +plt.plot(range(n_epochs), np.mean(all_train_losses, axis=0), label='Train Loss') +plt.plot(range(n_epochs), np.mean(all_val_losses, axis=0), label='Validation Loss') +plt.legend() +plt.title('Loss') + +plt.subplot(1, 2, 2) +plt.plot(range(n_epochs), np.mean(all_train_accs, axis=0), label='Train Accuracy') +plt.plot(range(n_epochs), np.mean(all_val_accs, axis=0), label='Validation Accuracy') +plt.legend() +plt.title('Accuracy') + +print(f'All Fold Average | Train Loss: {np.mean(all_train_losses, axis=0)[-1].item():.4f} | Train Accuracy: {np.mean(all_train_accs, axis=0)[-1].item():.4f} | Validation Loss: {np.mean(all_val_losses, axis=0)[-1].item():.4f} | Validation Accuracy: {np.mean(all_val_accs, axis=0)[-1].item():.4f}') + +plt.show()