From 1953479bd1c842f946f66d32151d5c098a3780e2 Mon Sep 17 00:00:00 2001 From: wangchunlin Date: Wed, 17 May 2023 18:32:25 +0800 Subject: [PATCH] =?UTF-8?q?2023=E5=B9=B45=E6=9C=8817=E6=97=A5,=E5=A4=87?= =?UTF-8?q?=E4=BB=BD=E4=B8=8B=EF=BC=8C=E8=BF=99=E6=98=AF=E4=BB=A5=E5=89=8D?= =?UTF-8?q?g=E6=94=B9=E5=8A=A8=E7=9A=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 67 ++++++++++++++++++++++++------------------- psychology_model.pth | Bin 2325 -> 7893 bytes val.py | 4 ++- 3 files changed, 40 insertions(+), 31 deletions(-) diff --git a/main.py b/main.py index 2b82cd0..d26c223 100644 --- a/main.py +++ b/main.py @@ -4,6 +4,7 @@ import torch.nn as nn import torch.optim as optim import numpy as np from torch.utils.data import Dataset, DataLoader +from sklearn.preprocessing import MinMaxScaler, LabelEncoder # Define MLP model class MLP(nn.Module): @@ -13,71 +14,77 @@ class MLP(nn.Module): self.relu1 = nn.ReLU() self.fc2 = nn.Linear(hidden_size, output_size) self.sigmoid = nn.Sigmoid() + self.softmax = nn.Softmax(dim=1) def forward(self, x): out = self.fc1(x) out = self.relu1(out) out = self.fc2(out) - out = self.sigmoid(out) return out # Define custom dataset class PsychologyDataset(Dataset): def __init__(self, data_file): - self.data = pd.read_excel(data_file) + data = pd.read_excel(data_file) + src_features = data.iloc[:, 34:44].values.astype(np.float32) + src_labels = data.iloc[:, -1].values + # 数据预处理 + scaler = MinMaxScaler(feature_range=(0, 5)) + #self.opt_features = scaler.fit_transform(src_features) + self.opt_features = src_features/5 + # 标签编码 + label_encoder = LabelEncoder() + self.opt_labels = label_encoder.fit_transform(src_labels) + def __len__(self): - return len(self.data) + return len(self.opt_features) def __getitem__(self, idx): - features = self.data.iloc[idx, 36:43].values.astype(np.float32) - str = self.data.iloc[idx, -1] - #print(idx,str,self.data.iloc[0, 0]) - label = -1 - if(str=="是"): - label = 1 - else: - label = 0 - #print(features) - label = np.float32(label) - #return torch.tensor(features, dtype=torch.float), label - return features, label + return self.opt_features[idx], self.opt_labels[idx] # Set hyperparameters -input_size = 7 -hidden_size = 16 +input_size = 10 +hidden_size = 128 output_size = 1 -lr = 0.01 +lr = 0.001 num_epochs = 100 # Load data -dataset = PsychologyDataset("data/data_src.xlsx") -dataloader = DataLoader(dataset, batch_size=1, shuffle=False) +dataset = PsychologyDataset("/home/wcl/psychological_prediction/data/data_src.xlsx") +dataloader = DataLoader(dataset, batch_size=32, shuffle=False) # Instantiate model, loss function, and optimizer model = MLP(input_size, hidden_size, output_size) -criterion = nn.BCELoss() +criterion = nn.BCEWithLogitsLoss() + optimizer = optim.Adam(model.parameters(), lr=lr) +#optimizer = optim.SGD(model.parameters(), lr=0.01) # Train model for epoch in range(num_epochs): running_loss = 0.0 train_corrects = 0 - #print(type(dataloader)) for i, data in enumerate(dataloader): - #print("数据序号:", i, data) - #continue inputs, labels = data optimizer.zero_grad() outputs = model(inputs) - loss = criterion(outputs, labels.unsqueeze(1)) + loss = criterion(outputs, labels.view(-1,1).to(torch.float)) + #loss = criterion(outputs, labels) loss.backward() optimizer.step() - running_loss += loss.item() - _, preds = torch.max(outputs, 1) - train_corrects += torch.sum(preds == labels.data) - print('Epoch [%d/%d], Loss: %.4f' % (epoch+1, num_epochs, running_loss / len(dataloader))) - train_acc = train_corrects.double() / len(dataloader) + running_loss += loss.item() * inputs.size(0) + predicted = torch.round(torch.sigmoid(outputs)) + #print((predicted == labels.view(-1,1)).sum().item()) + assert(outputs.sum().item()!=0), {outputs, predicted, labels} + #train_corrects += torch.sum(predicted == labels.data) + correct = (predicted == labels.view(-1,1)).sum().item() + #print(correct, labels.size(0)) + train_corrects += correct + + print('Epoch [%d/%d], Loss: %.4f' % (epoch+1, num_epochs, running_loss / len(dataloader.dataset))) + print(len(dataloader.dataset), len(dataloader)) + train_acc = float(train_corrects) / len(dataloader.dataset) print('Epoch [%d/%d], ACC: %.4f' % (epoch+1, num_epochs, train_acc)) # Save trained model diff --git a/psychology_model.pth b/psychology_model.pth index 0bed5369dd69100b3d55bd14c4ae7714018b1889..ad78f1b152a9781c573620cac716974a060cb16f 100644 GIT binary patch literal 7893 zcmbt(30P0t*MGD6Ry0TRAR09((s!@@l|+hC#tdnqG=3XUN|RCvl|rc$DhZKH_1)_j zuFPZ}6D}bxGII@Yao_j3_rCZ3|IhRO_Ves>&a=*1pS{*z>+HSHTHYR#QetAVvSRPL*2oXyC6ydo=LN2oZX&(=PD0>c90 zg#AN%2Md)7{Rw6(91z;iBQ0L2^aCSHsO%x0B~zmK{L3To$6rt@;$`5aZEr^T`3l`d? z2<`twjh-JL8L&h+JVog6gPIT*ILzClpKNO9L3<4eF|oex{jX7`DQ5D|QD$x9=3@9W z#(o6azg*-G_rP9{RP4wx9aUkMh^9UL*}@gWAz5V|OgvVHCRZb|tG){-{Za`vwJP-f z+Xkk%=QA64DxA1(nFCR7X|N>kBD|8?1F1Y$8a^@y=SyUus{I;lKCA&-%Z`%~6;gOJ z*_Bzg1)$qU6W|P%iD-{IEc~QI&m}6-0N!P+NTg)w<|=%@d}z1)+}9F=BdqB|dD~3Sabg(R@93F1$3K|JmpR z+?X?(Tc+;9tx^@C_=xY$o0P5TiNpzT%3&N<&#%M#`woZ(zW5-rs&pkLK@))mFUJT) zCz$ko4a(wvX4x?ZZQ6@qhKDnrp4nH#sqAQG-N6=Us+44k)N4l6s6PHuXIqu(bN zSo-2P$#;;&HTk-r{&^64=heoA>0M+NiE~84`mMy=X(+Jc<6upxE^{k6LJl4*fL)`n z60SP}&t_GT#12UqId2i3G4}^|B{TS3TaMi+u2`aJgTL&Y3PaU8af^jKe(b-Bw7eM0 z_6`!mVY2#=x~v!no_sFaGkYx#Xg*EuvoiSdrkyNYw+2G>jbQN9@9-ci3q~z7h9|d1 zvg|e|YIe8?$i6vT^YmjN7x4%l`kB)EJCwQBGg938$9eE%LMG|T*5a0J$|hH4HR0Rz zjaYMK7_X;Y)ye_fQX5I@oHYK#Q+H6X zXCv&ZyGF)6d`50ut3-uS8}WTi2Fy<0fHU9JGp)_3_-R=tZVzyVsM*StUAa!&I-aB0 zn*eOlQ(#)J%VDc)31q5NK%Tua{S*^UYzouB%_R^zzU>A79BZ61z8k(6ZiNS1LSYGA zMb79>q}gN6k>^c!(XytTwaZR`*rqOi!?+f9?W`U%$m+olmkcqkIs+#*Y7lFk)$k47 z@J&}On}1s#G~c<1=8p|WUz(0xzlT7QcoNCINTK8R0Ps{^j?O{xXt4V!le)eNTF+a- zOfP?ovip_Ec^2YpU1{vF%R=?eB4`+50<&AhVUUImng8u1jQQSzD^or**|8ejmUG6a zqcR+(3@U-DsP%9vsREwauZ6hx4bV@+8Kh3DL2=_o*nLzVR^`ZJg<3P4q34W;UOi*x zziVQ1NG7A+32QF?iZK)&}`xB&3M~j$dN~eF{PB+7oQ9E@IQ( z?MGS>31iOaqQY`#_Qt2Kb!NN~veX1p^y@+PaA_CGXDV>>NeLM|*q*;l$sD{tJR<4C z=i_&Yi5SVbvz-ToxX;!Z7f3Ef!MCyeSs{ARaJrT{j2TBC-4Md9$=)Q%-k)poqtrAY zj9Z!K$^s?DxjCy0aLBLw@ZG)bFlMzbe0sW_T#iV^L16%4A?wNbB@05I_uR#_Jw)AvL}P z2OqSA6WWfL`qm8{r)UHB+b+Iuz9q3W4kG=s_hPf+W(>VtgcY-MaXLR8?!2rd#ST|U zw>x7idJGi= zKeCLs0nGP#I;hWeMSjmFW|q1G16S$dXDMfV>l}#{-3wSdZeW_zHStE?I=mtVLb=m*IBk6?+1$~M3-v15mfPEKz}UyEYhD_Ee7_B_CDDohx+@O{@7RP< znnjRfu^hc!O~`s0$)x+q!1!NI6Sw2Ot(PQ@iGKTTgkRG;*>7gqtnQUP);j*mPPiwa z#|~LC&%&KO+xt-D=eU_HbE4Wk+oz)Q{3gFJCXZ~A_<&v+ruaFugPO?)aUJ2~*^{fE(6G&bZf*>u=_N9FWYJ1g@>b$> zearaSUhNpRHJwRYEr(NrJW-Rz4UBmF3#=a>j1wC&&{s>}hO2CXnY>wSue20f>Y~m~j&=<4%2K&S&{E-rA8jE!**4bG9frrGZFXVCWZEPNZgK<0*eHqAj7uSvgFl zT2q3^$%I@ivzOr7uUpd6D-pDyzXQ$QFqbPTKZwU{RJn|j8X^pcLT~LhzUxbGwri3l z+B?^>vISynuA4QRBxi%&8#-|&?;;zMj<{w{BYIZJFvmM;2>BC6&GUavezoj}DyJ1d z^K<~-Z>=S6hw3@Y`>&|SE<@a!*v%d`f5b;KK7g#RC#+pRhS}RzqN}tB%6AN-CY`lR zo|jH`mpXv_5(VmbZv|H;&!by^`wCwa_Cbb`DLxPX$ZX1%)9pu6spb1a*cI(XHB>dZ zA$M)K_p;NG_eCH1UKDMvj-$8ld7*~#7}{-?kMGZ~z#HAIBH3qOad(3$jZ1w6Wl|$R zYL+u5X|BPBtOk-Y>1C_a#W=X?Wk6*@SKxB42EQFIN6GuMX|h=XJ)#!Lt-b5b-8G-Ybv8Issq>NakX1eY zes=(tq8Z!!;TeDa2}2lIXUDcW)$?B{T*coq!ytS5Y3%rv%#T~pgA!UFn1!-5_PY>( zUs5VtRo*AUz4I}+Z)7Qa^OD1FH9PRW*8w!VR|LPGJu%9=l&9k!Mzuqc3?>kn)t|&Kn0K@HDtilcXZ|Vs6@TX>!uO5>&dcfZ_2S zY)N)8=H=aI#RgB<1^J0YJUtiJ4!=l@-U!h5do9aQvco0u^WeqKczkkuhA3eEQf6}6 z5*O=#W1prR0+mZ~uxCIC?(VsWBk%mis%u=RK(7u5!vnaZRRO^Y{V6%-#c6ee)?95zG)Tki{vC6A0Up4c+(+0b`n-bO##I!w}Xgo$AcBm?Tf>1BV`d8rGw4W{(5GY>Y8Tu<^hXgN38+$OEhBapIij18|O z+UB2;&iNwpGZE(w-PumOGoG_oAb6qPa1NMWQjQC&NQ=V_f;jg2l9%V|ZBkyj1+zHNg2I)iajJs?~32>lfq)Lb%Le2J}A}H!aRn~!>Kb1VZZtw z(b)?{7@VYsa%wTo&o+b-h)xn+Z_wrJZ~HM@>ung^#fQM&bM=i;E>JRd5z$ta!mLN0`Wi`Hfvl`i+JTfoW8y9~mgJcUm zz7Z`(i3Q0>d;&z@citmsnvOxynEKxJL>hZq%sEwF17E9NL$J>BJx==29~JYT5JT@Q ztVvYI%7a3-&r}{8bIb8yunFuvoQXOO=lQbQWvHyj_)ASkz=<{vGE8RU;&^X(qHMuE zSZG4X^)eL57r}a06X&W+S*VY1SmgA0zPIf$5e4O^7?ID778(sJ_}OZ6HH`gVF;<&x#J(%Kk{<>%7iV{hPT+FRUb zXn>kMw@6o_4_pY!#yMTolRD>1!F1I76}fBb8&CdSnCK2xbI>N+orVR z(wH0=ZfMnNJKhOhr|iZZF)Kv7J-wj3X)`ocO~BQOYV@tRB#nrFgie#6z;orD&^~V( znfa?Bv;2^Wsl&&LPU?p<2Wc}Daiv)GAs?%pvapdkVC$?Vwz!#r+LJAi^xI1aF4#vZ zWk12C3+dP<)q6hPTSw|T2f>LuZZu@gby(C^!s!{Wpy)W1ZVMVf-)xiR%0A4dF24kF zJX0gg)1QqNha@1)xSq<;o5bkrXw-c$1h(GPL#K)h?CsiPjMr!ZHy(XuFTR+=`Qi-n zwR{chsyd1@Yd!IyssaugzX*ADsVG?fRTNy{gQ*c;nb!qJQLc_3alLJcZ)VzKfRY%B z8O}f_y;FS2l}oWRisN_31d{C1B$hqwF_kF7SDA7CJacZZr#lmk_CmM*FF?apitcci2c_gMXu60aNBQM2d4d$2xzh|)@?p5e zQOTdR2!Rz+A1Mz!mc<}B9dIX;cyz6DUTbT zkHgKOS5e}M75cl$;NhhUiSkDdV=tvadg69e?C~RpXD;#86DDAbuo`zqj6=)i4eZ(W zR5-JK7`$uQMg~7xhj*H@+4Bb{@bPU4y#4h!dVMKa+nE*@g9h--um5nUbQ85 zS!eT?Ev>?5Mq`-n@M`Q2r-{e0W+a~3xF^1a$m%pYe_xP?aizMVoDa`Qd(n3CY3WQX zlRQPH>*|osjsl$dc`3G4WP+JuFzni|jj|gavi2)k#P-2H(eAWn)-<<=*_TD)DUVFd z)mMSMvRk5j17+B6k&P!5R*+7^1}q(*PHd8vF+GPYaool19bfS$cE}(-u?2f;l=6*qDYZ3)DrkZWW{W z+IZ%k-NhbSWr4}HrP!BgytD2$UrenxXW85TUztYdf5|l3y19(_|7RNiIRPp0CXR|r zP)^su+WA692q!bponCexj%g*^_{#fcQm0$3L`&)(J5i%YBMo!diC1bg<+KI2d?O#a zb!X6bd#yOz9tCP!+{`?t599i$5U#}EiTV~U!;xX#EdH@EeN7Cx4R0q?t%V_UM^`zn zl$?z^LJ+hYyg+&*^KsPTAY4c74nkH+04gUICr%=busj#PKJvxulWHEzcQ6} zANHU(a@4ud1q*256;1j?Oolrz8pJhD^rDTc#OU#gBK)w$oMv3p;$~d2r~H8HENT8X z{*h~9T$FMZwh!~8BY(H1QH67;@=_zZXh8_NX?`IIE&aJEr#)y*+5)<4lP~?&&m0e5 zj-b;z_}rescJ#)*lVqWx6-H@^aa)$1#rrXJEaj35O;Xe0g0?LNeXFtDoH!vabhYC; zItb-`<-@a!FG2G9N%-#V$f*XE!j6@Rl;@~Nhiy-%veRbLOPUv0!@IdOw|F&Ps}H3f z!L`7v=wN;ES9YVxU#vIF-Pivw@n`oR#9w@T1eeloMEzc=ve6ec1-Y>iiyh;x%*&*oBcX6150fRQA>H2NzO3MHu4I8#BO!9}D`b?BDJd#vznAsM@OEp>S$ zO}Ac|kGZR-I|qdg=VvM0!K|*?u!WQ1_jb){f4{mPz{J@2XWu)eL4(?Za&akNEGYq|NadO6m$IFYw#ZhDmNFq|HC3|&+8lgV&#IVe|&(w zql@=F{!dNw&vmFleJ}pf{4KmpPfD^A8|@INIAoM#l0FZaqn> y|B;jVP1YPqAmZ@@#Mo8oojnMJy1tB-4S z+ncqo!VhULyTY{h!t~mfj`QH{I|83vtwckQqT3IDQ3$SUH86rQ$b#2}Ft4FheWi9U zL|6Y{g<`h`em?4rJr~pnK_yC@ysH?;QuW|mJ47;72J~d`Z-`uE)Qp>#b20U&VWMz0 z=Y0PRxcUjyJZ8ww;gfLCS;eg{=-|F~m2m@$w?SEd0e4D23O!jN-0JLaxWVSzTuyc; z_t144<_s%1@^~L-6E|_bv#jHgxF`UVP2WPqmj_`q{w(r(e9?{^2HUuP;#J)um7nM0 z)P+{KGH0x9G#5G^W41CZo%n2 z#{FsgvFyR`BBMQb8*pxy`bpcv+ z_(x>>$_>&j$2erZ6gzNr02P&G;$P`y2vHYNTZS5_((+h=Roa`MGR^uVEP1BW+UOP# zN17g3gZ$Wx|NPpgawMOA8bt;3dZyQAAGTW&!lo$#L608~3g=x)S~59znKH`vsDxrI@=!KUx#FWh|0!k@BbmHi_;(U- S3uR)&5&@O)+rcNMYTp2I+=wLr diff --git a/val.py b/val.py index b55f529..b264e27 100644 --- a/val.py +++ b/val.py @@ -37,9 +37,10 @@ class MMLP(nn.Module): self.layers.append(nn.ReLU()) input_size = h self.layers.append(nn.Linear(input_size, output_size)) - # 最后一层加入激活函数后,会严重影响收敛情况,原因待分析 + # 最后一层加入激活函数后,会严重影响收敛情况,原因待分析,softmax不需要加,因为nn.CrossEntropyLoss()损失函数自带softmax运算,加了会连续两次指数运算,容易溢出;至于Sigmoid,原因是输入数据要做(0,1)的归一化 #self.layers.append(nn.Sigmoid()) #self.layers.append(nn.Softmax(dim=1)) + #self.layers.append(nn.LogSoftmax(dim=1)) def forward(self, x): for layer in self.layers: @@ -204,6 +205,7 @@ for fold, (train_idx, test_idx) in enumerate(skf.split(src_features, src_labels) criterion = nn.CrossEntropyLoss() #criterion = nn.BCELoss() # 使用sigmoid损失函数再使用 optimizer = torch.optim.Adam(model.parameters(), lr=lr) + #optimizer = torch.optim.SGD(model.parameters(), lr=lr) # 这个优化器只有lr一个超参数,适用于小规模网络结构 # 训练 MLP 模型 train_model(model, train_loader, criterion, optimizer, num_epochs)