|
|
|
|
@ -4,6 +4,7 @@ import torch.nn as nn
|
|
|
|
|
import torch.optim as optim
|
|
|
|
|
import numpy as np
|
|
|
|
|
from torch.utils.data import Dataset, DataLoader
|
|
|
|
|
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
|
|
|
|
|
|
|
|
|
|
# Define MLP model
|
|
|
|
|
class MLP(nn.Module):
|
|
|
|
|
@ -13,71 +14,77 @@ class MLP(nn.Module):
|
|
|
|
|
self.relu1 = nn.ReLU()
|
|
|
|
|
self.fc2 = nn.Linear(hidden_size, output_size)
|
|
|
|
|
self.sigmoid = nn.Sigmoid()
|
|
|
|
|
self.softmax = nn.Softmax(dim=1)
|
|
|
|
|
|
|
|
|
|
def forward(self, x):
|
|
|
|
|
out = self.fc1(x)
|
|
|
|
|
out = self.relu1(out)
|
|
|
|
|
out = self.fc2(out)
|
|
|
|
|
out = self.sigmoid(out)
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
# Define custom dataset
|
|
|
|
|
class PsychologyDataset(Dataset):
|
|
|
|
|
def __init__(self, data_file):
|
|
|
|
|
self.data = pd.read_excel(data_file)
|
|
|
|
|
data = pd.read_excel(data_file)
|
|
|
|
|
src_features = data.iloc[:, 34:44].values.astype(np.float32)
|
|
|
|
|
src_labels = data.iloc[:, -1].values
|
|
|
|
|
# 数据预处理
|
|
|
|
|
scaler = MinMaxScaler(feature_range=(0, 5))
|
|
|
|
|
#self.opt_features = scaler.fit_transform(src_features)
|
|
|
|
|
self.opt_features = src_features/5
|
|
|
|
|
# 标签编码
|
|
|
|
|
label_encoder = LabelEncoder()
|
|
|
|
|
self.opt_labels = label_encoder.fit_transform(src_labels)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __len__(self):
|
|
|
|
|
return len(self.data)
|
|
|
|
|
return len(self.opt_features)
|
|
|
|
|
|
|
|
|
|
def __getitem__(self, idx):
|
|
|
|
|
features = self.data.iloc[idx, 36:43].values.astype(np.float32)
|
|
|
|
|
str = self.data.iloc[idx, -1]
|
|
|
|
|
#print(idx,str,self.data.iloc[0, 0])
|
|
|
|
|
label = -1
|
|
|
|
|
if(str=="是"):
|
|
|
|
|
label = 1
|
|
|
|
|
else:
|
|
|
|
|
label = 0
|
|
|
|
|
#print(features)
|
|
|
|
|
label = np.float32(label)
|
|
|
|
|
#return torch.tensor(features, dtype=torch.float), label
|
|
|
|
|
return features, label
|
|
|
|
|
return self.opt_features[idx], self.opt_labels[idx]
|
|
|
|
|
|
|
|
|
|
# Set hyperparameters
|
|
|
|
|
input_size = 7
|
|
|
|
|
hidden_size = 16
|
|
|
|
|
input_size = 10
|
|
|
|
|
hidden_size = 128
|
|
|
|
|
output_size = 1
|
|
|
|
|
lr = 0.01
|
|
|
|
|
lr = 0.001
|
|
|
|
|
num_epochs = 100
|
|
|
|
|
|
|
|
|
|
# Load data
|
|
|
|
|
dataset = PsychologyDataset("data/data_src.xlsx")
|
|
|
|
|
dataloader = DataLoader(dataset, batch_size=1, shuffle=False)
|
|
|
|
|
dataset = PsychologyDataset("/home/wcl/psychological_prediction/data/data_src.xlsx")
|
|
|
|
|
dataloader = DataLoader(dataset, batch_size=32, shuffle=False)
|
|
|
|
|
|
|
|
|
|
# Instantiate model, loss function, and optimizer
|
|
|
|
|
model = MLP(input_size, hidden_size, output_size)
|
|
|
|
|
criterion = nn.BCELoss()
|
|
|
|
|
criterion = nn.BCEWithLogitsLoss()
|
|
|
|
|
|
|
|
|
|
optimizer = optim.Adam(model.parameters(), lr=lr)
|
|
|
|
|
#optimizer = optim.SGD(model.parameters(), lr=0.01)
|
|
|
|
|
|
|
|
|
|
# Train model
|
|
|
|
|
for epoch in range(num_epochs):
|
|
|
|
|
running_loss = 0.0
|
|
|
|
|
train_corrects = 0
|
|
|
|
|
#print(type(dataloader))
|
|
|
|
|
for i, data in enumerate(dataloader):
|
|
|
|
|
#print("数据序号:", i, data)
|
|
|
|
|
#continue
|
|
|
|
|
inputs, labels = data
|
|
|
|
|
optimizer.zero_grad()
|
|
|
|
|
outputs = model(inputs)
|
|
|
|
|
loss = criterion(outputs, labels.unsqueeze(1))
|
|
|
|
|
loss = criterion(outputs, labels.view(-1,1).to(torch.float))
|
|
|
|
|
#loss = criterion(outputs, labels)
|
|
|
|
|
loss.backward()
|
|
|
|
|
optimizer.step()
|
|
|
|
|
running_loss += loss.item()
|
|
|
|
|
_, preds = torch.max(outputs, 1)
|
|
|
|
|
train_corrects += torch.sum(preds == labels.data)
|
|
|
|
|
print('Epoch [%d/%d], Loss: %.4f' % (epoch+1, num_epochs, running_loss / len(dataloader)))
|
|
|
|
|
train_acc = train_corrects.double() / len(dataloader)
|
|
|
|
|
running_loss += loss.item() * inputs.size(0)
|
|
|
|
|
predicted = torch.round(torch.sigmoid(outputs))
|
|
|
|
|
#print((predicted == labels.view(-1,1)).sum().item())
|
|
|
|
|
assert(outputs.sum().item()!=0), {outputs, predicted, labels}
|
|
|
|
|
#train_corrects += torch.sum(predicted == labels.data)
|
|
|
|
|
correct = (predicted == labels.view(-1,1)).sum().item()
|
|
|
|
|
#print(correct, labels.size(0))
|
|
|
|
|
train_corrects += correct
|
|
|
|
|
|
|
|
|
|
print('Epoch [%d/%d], Loss: %.4f' % (epoch+1, num_epochs, running_loss / len(dataloader.dataset)))
|
|
|
|
|
print(len(dataloader.dataset), len(dataloader))
|
|
|
|
|
train_acc = float(train_corrects) / len(dataloader.dataset)
|
|
|
|
|
print('Epoch [%d/%d], ACC: %.4f' % (epoch+1, num_epochs, train_acc))
|
|
|
|
|
|
|
|
|
|
# Save trained model
|
|
|
|
|
|