|
|
"""
|
|
|
文件名: train_gpu_blance_10features.py
|
|
|
|
|
|
训练部分代码
|
|
|
|
|
|
作者: 王春林
|
|
|
创建日期: 2023年10月18日
|
|
|
最后修改日期: 2023年10月20日
|
|
|
版本号: 1.0.0
|
|
|
|
|
|
"""
|
|
|
import pandas as pd
|
|
|
import numpy as np
|
|
|
import torch
|
|
|
from torch import nn
|
|
|
from sklearn.metrics import precision_score, recall_score, f1_score
|
|
|
from sklearn.utils.class_weight import compute_class_weight
|
|
|
|
|
|
# 验证集EXCEL文件名,测试集使用验证集
|
|
|
val_excel = r'val_fold0.xlsx'
|
|
|
|
|
|
# 检查GPU是否可用
|
|
|
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
device = torch.device("cpu")
|
|
|
|
|
|
# 定义 MLP 网络
|
|
|
class MLP(nn.Module):
|
|
|
def __init__(self):
|
|
|
super(MLP, self).__init__()
|
|
|
self.model = nn.Sequential(
|
|
|
nn.Linear(10, 32), # 输入层
|
|
|
nn.ReLU(), # 激活函数
|
|
|
nn.Linear(32, 128), # 隐藏层
|
|
|
nn.ReLU(), # 激活函数
|
|
|
nn.Linear(128, 32), # 隐藏层
|
|
|
nn.ReLU(), # 激活函数
|
|
|
nn.Linear(32, 4), # 输出层,4个类别
|
|
|
)
|
|
|
|
|
|
def forward(self, x):
|
|
|
return self.model(x)
|
|
|
|
|
|
# 读取特征和标签
|
|
|
val_data = pd.read_excel(val_excel)
|
|
|
|
|
|
# 以下是你的特征名
|
|
|
feature_names = ["强迫症状数字化", "人际关系敏感数字化", "抑郁数字化", "多因子症状", "母亲教养方式数字化", "父亲教养方式数字化", "自评家庭经济条件数字化", "有无心理治疗(咨询)史数字化", "学业情况数字化", "出勤情况数字化"]
|
|
|
|
|
|
# 将特征和标签分开,并做归一化处理
|
|
|
X_val = val_data[feature_names].values
|
|
|
y_val = val_data['类别'].values
|
|
|
|
|
|
X_val_tensor = torch.from_numpy(X_val).float().to(device)
|
|
|
|
|
|
# 加载模型
|
|
|
model = MLP().to(device)
|
|
|
model.load_state_dict(torch.load('train_fold0.xlsx.pth', map_location=device)) # 加载训练好的模型参数
|
|
|
model.eval()
|
|
|
|
|
|
# 进行推理
|
|
|
with torch.no_grad():
|
|
|
outputs = model(X_val_tensor)
|
|
|
|
|
|
# 获取预测结果
|
|
|
_, predictions = torch.max(outputs, 1)
|
|
|
# 打印预测结果
|
|
|
#print("预测结果:", predictions.cpu().numpy())
|
|
|
# 打印前100个预测结果和实际结果
|
|
|
print("前100个预测结果:", predictions.cpu().numpy()[:100])
|
|
|
print("前100个实际结果:", y_val[:100])
|
|
|
|
|
|
# 获取预测错误的样本序号
|
|
|
wrong_indices = np.where(y_val != predictions.cpu().numpy())[0]
|
|
|
print("预测错误的样本序号:", wrong_indices)
|
|
|
|
|
|
# 统计预测错误的数量
|
|
|
wrong_count = len(wrong_indices)
|
|
|
total_count = len(y_val)
|
|
|
wrong_percentage = (wrong_count / total_count) * 100
|
|
|
|
|
|
print("预测错误数量:", wrong_count)
|
|
|
print("预测错误占总数量的百分比:", wrong_percentage, "%")
|
|
|
print("总数量:", total_count)
|
|
|
|
|
|
# 统计每种类别的精确率、召回率、F1得分
|
|
|
precision = precision_score(y_val, predictions.cpu().numpy(), average=None)
|
|
|
recall = recall_score(y_val, predictions.cpu().numpy(), average=None)
|
|
|
f1 = f1_score(y_val, predictions.cpu().numpy(), average=None)
|
|
|
|
|
|
# 计算平均精确率、召回率和F1
|
|
|
avg_precision = np.mean(precision)
|
|
|
avg_recall = np.mean(recall)
|
|
|
avg_f1 = np.mean(f1)
|
|
|
|
|
|
print("精确率:", precision)
|
|
|
print("召回率:", recall)
|
|
|
print("F1得分:", f1)
|
|
|
print("平均精确率:", avg_precision)
|
|
|
print("平均召回率:", avg_recall)
|
|
|
print("平均F1得分:", avg_f1)
|