训练和推理换成了回归0到1

main
wangchunlin 3 years ago
parent 9c7a560fbd
commit be77533363

@ -0,0 +1,133 @@
"""
文件名: detect_num.py
推理部分代码
作者: 王春林
创建日期: 2023年7月14日
最后修改日期: 2023年7月18日
版本号: 1.0.0
"""
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
# 检查GPU是否可用
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")
# 读取特征和标签
data = pd.read_excel('feature_label.xlsx')
# 获取编号列
sample_ids = data['编号'].values
# 以下是你的特征名
feature_names = ["躯体化", "强迫症状", "人际关系敏感", "抑郁", "焦虑", "敌对", "恐怖", "偏执", "精神病性", "其他", "父亲教养方式数字化", "母亲教养方式数字化", "自评家庭经济条件数字化", "有无心理治疗(咨询)史数字化", "出勤情况数字化", "学业情况数字化", "权重数字化值"]
# 将特征和标签分开,并做归一化处理
X = data[feature_names].values
y = data['label'].values - 1 # 将标签从1-4转换为0-3
scaler = MinMaxScaler()
X = scaler.fit_transform(X)
# 定义 MLP 网络
class MLP(nn.Module):
def __init__(self):
super(MLP, self).__init__()
self.model = nn.Sequential(
nn.Linear(17, 32), # 输入层
nn.ReLU(), # 激活函数
nn.Linear(32, 128), # 隐藏层
nn.ReLU(), # 激活函数
nn.Linear(128, 32), # 隐藏层
nn.ReLU(), # 激活函数
nn.Linear(32, 1), # 输出层1个类别
)
def forward(self, x):
return self.model(x).squeeze() # 去除多余的维度
# 加载模型
model = MLP().to(device)
model.load_state_dict(torch.load('model_fold5.pth'))
model.eval()
# 创建数据加载器
dataset = TensorDataset(torch.from_numpy(X).float().to(device), torch.from_numpy(y).long().to(device))
loader = DataLoader(dataset, batch_size=32)
# 推理
corrects = 0
sample_index = 0
for inputs, targets in loader:
outputs = model(inputs)
print(outputs)
print(targets.data)
input()
# 使用阈值判断类别
thresholds = [1/6, 1/2, 5/6]
preds = torch.tensor([sum(o.item() > t for t in thresholds) for o in torch.flatten(outputs)]).to(device)
corrects += torch.sum(preds == targets.data)
# 打印每个样本的推理结果
for i in range(len(inputs)):
print(f'Sample ID: {sample_ids[sample_index]} | Target: {targets[i]} | Prediction: {preds[i]} (-1 in excel)')
sample_index += 1
# 计算整体推理的正确率
accuracy = corrects.double().cpu() / len(loader.dataset)
print(f'Overall Accuracy: {accuracy:.4f}')
# ...(之前的代码)
# 创建文件来存储预测结果为0和1的学号
file_1st_warning = open("一级预警.txt", "w", encoding="utf-8")
file_2nd_warning = open("二级预警.txt", "w", encoding="utf-8")
# 初始化每个类别的计数器
class_counts = {0: 0, 1: 0, 2: 0, 3: 0}
class_corrects = {0: 0, 1: 0, 2: 0, 3: 0}
# 进行推理
corrects = 0
sample_index = 0
for inputs, targets in loader:
outputs = model(inputs)
# 使用阈值判断类别
thresholds = [1/6, 1/2, 5/6]
preds = torch.tensor([sum(o.item() > t for t in thresholds) for o in torch.flatten(outputs)]).to(device)
corrects += torch.sum(preds == targets.data)
# 记录预测结果为0和1的学号
for i in range(len(inputs)):
if preds[i] == 0:
file_1st_warning.write(f"{sample_ids[sample_index]}\n")
elif preds[i] == 1:
file_2nd_warning.write(f"{sample_ids[sample_index]}\n")
sample_index += 1
# 更新每个类别的计数器和正确计数器
for i in range(len(targets)):
class_counts[targets[i].item()] += 1
if preds[i] == targets[i]:
class_corrects[targets[i].item()] += 1
# 关闭文件
file_1st_warning.close()
file_2nd_warning.close()
# 计算整体准确率
accuracy = corrects.double().cpu() / len(loader.dataset)
print(f'整体准确率: {accuracy:.4f}')
# 打印每个类别的信息
for class_idx in range(4):
class_accuracy = class_corrects[class_idx] / class_counts[class_idx]
print(f'类别 {class_idx + 1} | 预测数量: {class_counts[class_idx]} | 准确率: {class_accuracy:.4f}')

@ -0,0 +1,157 @@
"""
文件名: detect_num.py
训练部分代码
作者: 王春林
创建日期: 2023年7月13日
最后修改日期: 2023年7月18日
版本号: 1.0.0
"""
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedKFold
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from torchsummary import summary
# 检查GPU是否可用
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 读取特征和标签
data = pd.read_excel('feature_label.xlsx')
# 以下是你的特征名
feature_names = ["躯体化", "强迫症状", "人际关系敏感", "抑郁", "焦虑", "敌对", "恐怖", "偏执", "精神病性", "其他", "父亲教养方式数字化", "母亲教养方式数字化", "自评家庭经济条件数字化", "有无心理治疗(咨询)史数字化", "出勤情况数字化", "学业情况数字化", "权重数字化值"]
# 将特征和标签分开,并做归一化处理
X = data[feature_names].values
y = (data['label'].values - 1) / 3 # 将标签从1-4转换为0-1
scaler = MinMaxScaler()
X = scaler.fit_transform(X)
# 定义 MLP 网络
class MLP(nn.Module):
def __init__(self):
super(MLP, self).__init__()
self.model = nn.Sequential(
nn.Linear(17, 32), # 输入层
nn.ReLU(), # 激活函数
nn.Linear(32, 128), # 隐藏层
nn.ReLU(), # 激活函数
nn.Linear(128, 32), # 隐藏层
nn.ReLU(), # 激活函数
nn.Linear(32, 1), # 输出层1个类别
)
def forward(self, x):
return self.model(x).squeeze() # 去除多余的维度
# 使用KFold而非StratifiedKFold
kfold = KFold(n_splits=5, shuffle=True)
# 用于存储所有折的损失和准确率
all_train_losses, all_val_losses, all_train_accs, all_val_accs = [], [], [], []
for fold, (train_index, test_index) in enumerate(kfold.split(X, y)):
X_train, X_val = X[train_index], X[test_index]
y_train, y_val = y[train_index], y[test_index]
train_dataset = TensorDataset(torch.from_numpy(X_train).float().to(device), torch.from_numpy(y_train).float().to(device))
val_dataset = TensorDataset(torch.from_numpy(X_val).float().to(device), torch.from_numpy(y_val).float().to(device))
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
model = MLP().to(device)
# 查看模型网络结构
# print(model)
summary(model.to(torch.device("cuda:0")), (1,17))
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())
n_epochs = 120 # 增加到150个epoch
train_losses, val_losses, train_accs, val_accs = [], [], [], []
# 存储每一折的模型和对应的验证准确率
best_val_acc = 0.0
best_model = None
for epoch in range(n_epochs):
model.train()
running_loss, corrects = 0, 0
for inputs, targets in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
running_loss += loss.item() * inputs.size(0)
# 使用阈值判断类别
thresholds = [1/6, 1/2, 5/6]
preds = torch.tensor([sum(o.item() > t for t in thresholds) for o in outputs]).to(device)
corrects += torch.sum(preds == (targets.data*3).long())
epoch_loss = running_loss / len(train_loader.dataset)
epoch_acc = corrects.double().cpu() / len(train_loader.dataset)
train_losses.append(epoch_loss)
train_accs.append(epoch_acc)
print(f'Fold {fold+1}, Epoch {epoch+1} | Train Loss: {epoch_loss:.4f} | Train Accuracy: {epoch_acc:.4f}')
model.eval()
running_loss, corrects = 0, 0
with torch.no_grad():
for inputs, targets in val_loader:
outputs = model(inputs)
loss = criterion(outputs, targets)
running_loss += loss.item() * inputs.size(0)
# 使用阈值判断类别
thresholds = [1/6, 1/2, 5/6]
preds = torch.tensor([sum(o.item() > t for t in thresholds) for o in torch.flatten(outputs)]).to(device)
corrects += torch.sum(preds == (targets.data*3).long())
epoch_loss = running_loss / len(val_loader.dataset)
epoch_acc = corrects.double().cpu() / len(val_loader.dataset)
val_losses.append(epoch_loss)
val_accs.append(epoch_acc)
print(f'Fold {fold+1}, Epoch {epoch+1} | Validation Loss: {epoch_loss:.4f} | Validation Accuracy: {epoch_acc:.4f}')
# 保存最佳模型
if epoch_acc > best_val_acc:
best_val_acc = epoch_acc
best_model = model.state_dict()
# 保存每一折的最佳模型
torch.save(best_model, f'model_fold{fold+1}.pth')
all_train_losses.append(train_losses)
all_val_losses.append(val_losses)
all_train_accs.append(train_accs)
all_val_accs.append(val_accs)
# 绘制所有折的平均损失和准确率曲线
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(range(n_epochs), np.mean(all_train_losses, axis=0), label='Train Loss')
plt.plot(range(n_epochs), np.mean(all_val_losses, axis=0), label='Validation Loss')
plt.legend()
plt.title('Loss')
plt.subplot(1, 2, 2)
plt.plot(range(n_epochs), np.mean(all_train_accs, axis=0), label='Train Accuracy')
plt.plot(range(n_epochs), np.mean(all_val_accs, axis=0), label='Validation Accuracy')
plt.legend()
plt.title('Accuracy')
print(f'All Fold Average | Train Loss: {np.mean(all_train_losses, axis=0)[-1].item():.4f} | Train Accuracy: {np.mean(all_train_accs, axis=0)[-1].item():.4f} | Validation Loss: {np.mean(all_val_losses, axis=0)[-1].item():.4f} | Validation Accuracy: {np.mean(all_val_accs, axis=0)[-1].item():.4f}')
plt.show()
Loading…
Cancel
Save