2023年5月17日,备份下,这是以前g改动的

main
wangchunlin 3 years ago
parent 1213ffe4e0
commit 1953479bd1

@ -4,6 +4,7 @@ import torch.nn as nn
import torch.optim as optim import torch.optim as optim
import numpy as np import numpy as np
from torch.utils.data import Dataset, DataLoader from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
# Define MLP model # Define MLP model
class MLP(nn.Module): class MLP(nn.Module):
@ -13,71 +14,77 @@ class MLP(nn.Module):
self.relu1 = nn.ReLU() self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, output_size) self.fc2 = nn.Linear(hidden_size, output_size)
self.sigmoid = nn.Sigmoid() self.sigmoid = nn.Sigmoid()
self.softmax = nn.Softmax(dim=1)
def forward(self, x): def forward(self, x):
out = self.fc1(x) out = self.fc1(x)
out = self.relu1(out) out = self.relu1(out)
out = self.fc2(out) out = self.fc2(out)
out = self.sigmoid(out)
return out return out
# Define custom dataset # Define custom dataset
class PsychologyDataset(Dataset): class PsychologyDataset(Dataset):
def __init__(self, data_file): def __init__(self, data_file):
self.data = pd.read_excel(data_file) data = pd.read_excel(data_file)
src_features = data.iloc[:, 34:44].values.astype(np.float32)
src_labels = data.iloc[:, -1].values
# 数据预处理
scaler = MinMaxScaler(feature_range=(0, 5))
#self.opt_features = scaler.fit_transform(src_features)
self.opt_features = src_features/5
# 标签编码
label_encoder = LabelEncoder()
self.opt_labels = label_encoder.fit_transform(src_labels)
def __len__(self): def __len__(self):
return len(self.data) return len(self.opt_features)
def __getitem__(self, idx): def __getitem__(self, idx):
features = self.data.iloc[idx, 36:43].values.astype(np.float32) return self.opt_features[idx], self.opt_labels[idx]
str = self.data.iloc[idx, -1]
#print(idx,str,self.data.iloc[0, 0])
label = -1
if(str==""):
label = 1
else:
label = 0
#print(features)
label = np.float32(label)
#return torch.tensor(features, dtype=torch.float), label
return features, label
# Set hyperparameters # Set hyperparameters
input_size = 7 input_size = 10
hidden_size = 16 hidden_size = 128
output_size = 1 output_size = 1
lr = 0.01 lr = 0.001
num_epochs = 100 num_epochs = 100
# Load data # Load data
dataset = PsychologyDataset("data/data_src.xlsx") dataset = PsychologyDataset("/home/wcl/psychological_prediction/data/data_src.xlsx")
dataloader = DataLoader(dataset, batch_size=1, shuffle=False) dataloader = DataLoader(dataset, batch_size=32, shuffle=False)
# Instantiate model, loss function, and optimizer # Instantiate model, loss function, and optimizer
model = MLP(input_size, hidden_size, output_size) model = MLP(input_size, hidden_size, output_size)
criterion = nn.BCELoss() criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=lr) optimizer = optim.Adam(model.parameters(), lr=lr)
#optimizer = optim.SGD(model.parameters(), lr=0.01)
# Train model # Train model
for epoch in range(num_epochs): for epoch in range(num_epochs):
running_loss = 0.0 running_loss = 0.0
train_corrects = 0 train_corrects = 0
#print(type(dataloader))
for i, data in enumerate(dataloader): for i, data in enumerate(dataloader):
#print("数据序号:", i, data)
#continue
inputs, labels = data inputs, labels = data
optimizer.zero_grad() optimizer.zero_grad()
outputs = model(inputs) outputs = model(inputs)
loss = criterion(outputs, labels.unsqueeze(1)) loss = criterion(outputs, labels.view(-1,1).to(torch.float))
#loss = criterion(outputs, labels)
loss.backward() loss.backward()
optimizer.step() optimizer.step()
running_loss += loss.item() running_loss += loss.item() * inputs.size(0)
_, preds = torch.max(outputs, 1) predicted = torch.round(torch.sigmoid(outputs))
train_corrects += torch.sum(preds == labels.data) #print((predicted == labels.view(-1,1)).sum().item())
print('Epoch [%d/%d], Loss: %.4f' % (epoch+1, num_epochs, running_loss / len(dataloader))) assert(outputs.sum().item()!=0), {outputs, predicted, labels}
train_acc = train_corrects.double() / len(dataloader) #train_corrects += torch.sum(predicted == labels.data)
correct = (predicted == labels.view(-1,1)).sum().item()
#print(correct, labels.size(0))
train_corrects += correct
print('Epoch [%d/%d], Loss: %.4f' % (epoch+1, num_epochs, running_loss / len(dataloader.dataset)))
print(len(dataloader.dataset), len(dataloader))
train_acc = float(train_corrects) / len(dataloader.dataset)
print('Epoch [%d/%d], ACC: %.4f' % (epoch+1, num_epochs, train_acc)) print('Epoch [%d/%d], ACC: %.4f' % (epoch+1, num_epochs, train_acc))
# Save trained model # Save trained model

Binary file not shown.

@ -37,9 +37,10 @@ class MMLP(nn.Module):
self.layers.append(nn.ReLU()) self.layers.append(nn.ReLU())
input_size = h input_size = h
self.layers.append(nn.Linear(input_size, output_size)) self.layers.append(nn.Linear(input_size, output_size))
# 最后一层加入激活函数后,会严重影响收敛情况,原因待分析 # 最后一层加入激活函数后,会严重影响收敛情况,原因待分析,softmax不需要加因为nn.CrossEntropyLoss()损失函数自带softmax运算加了会连续两次指数运算容易溢出至于Sigmoid原因是输入数据要做0,1的归一化
#self.layers.append(nn.Sigmoid()) #self.layers.append(nn.Sigmoid())
#self.layers.append(nn.Softmax(dim=1)) #self.layers.append(nn.Softmax(dim=1))
#self.layers.append(nn.LogSoftmax(dim=1))
def forward(self, x): def forward(self, x):
for layer in self.layers: for layer in self.layers:
@ -204,6 +205,7 @@ for fold, (train_idx, test_idx) in enumerate(skf.split(src_features, src_labels)
criterion = nn.CrossEntropyLoss() criterion = nn.CrossEntropyLoss()
#criterion = nn.BCELoss() # 使用sigmoid损失函数再使用 #criterion = nn.BCELoss() # 使用sigmoid损失函数再使用
optimizer = torch.optim.Adam(model.parameters(), lr=lr) optimizer = torch.optim.Adam(model.parameters(), lr=lr)
#optimizer = torch.optim.SGD(model.parameters(), lr=lr) # 这个优化器只有lr一个超参数适用于小规模网络结构
# 训练 MLP 模型 # 训练 MLP 模型
train_model(model, train_loader, criterion, optimizer, num_epochs) train_model(model, train_loader, criterion, optimizer, num_epochs)

Loading…
Cancel
Save