You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

92 lines
3.1 KiB
Python

import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
# Define MLP model
class MLP(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(MLP, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, output_size)
self.sigmoid = nn.Sigmoid()
self.softmax = nn.Softmax(dim=1)
def forward(self, x):
out = self.fc1(x)
out = self.relu1(out)
out = self.fc2(out)
return out
# Define custom dataset
class PsychologyDataset(Dataset):
def __init__(self, data_file):
data = pd.read_excel(data_file)
src_features = data.iloc[:, 34:44].values.astype(np.float32)
src_labels = data.iloc[:, -1].values
# 数据预处理
scaler = MinMaxScaler(feature_range=(0, 5))
#self.opt_features = scaler.fit_transform(src_features)
self.opt_features = src_features/5
# 标签编码
label_encoder = LabelEncoder()
self.opt_labels = label_encoder.fit_transform(src_labels)
def __len__(self):
return len(self.opt_features)
def __getitem__(self, idx):
return self.opt_features[idx], self.opt_labels[idx]
# Set hyperparameters
input_size = 10
hidden_size = 128
output_size = 1
lr = 0.001
num_epochs = 100
# Load data
dataset = PsychologyDataset("/home/wcl/psychological_prediction/data/data_src.xlsx")
dataloader = DataLoader(dataset, batch_size=32, shuffle=False)
# Instantiate model, loss function, and optimizer
model = MLP(input_size, hidden_size, output_size)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
#optimizer = optim.SGD(model.parameters(), lr=0.01)
# Train model
for epoch in range(num_epochs):
running_loss = 0.0
train_corrects = 0
for i, data in enumerate(dataloader):
inputs, labels = data
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels.view(-1,1).to(torch.float))
#loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item() * inputs.size(0)
predicted = torch.round(torch.sigmoid(outputs))
#print((predicted == labels.view(-1,1)).sum().item())
assert(outputs.sum().item()!=0), {outputs, predicted, labels}
#train_corrects += torch.sum(predicted == labels.data)
correct = (predicted == labels.view(-1,1)).sum().item()
#print(correct, labels.size(0))
train_corrects += correct
print('Epoch [%d/%d], Loss: %.4f' % (epoch+1, num_epochs, running_loss / len(dataloader.dataset)))
print(len(dataloader.dataset), len(dataloader))
train_acc = float(train_corrects) / len(dataloader.dataset)
print('Epoch [%d/%d], ACC: %.4f' % (epoch+1, num_epochs, train_acc))
# Save trained model
torch.save(model.state_dict(), 'psychology_model.pth')