|
|
"""
|
|
|
文件名: Data_Partition.py
|
|
|
|
|
|
将所有样本数据划分为4+1份,4份训练1份验证,在原有5倍交叉验证的基础上改动,取消5倍交叉验证机制
|
|
|
|
|
|
作者: 王春林
|
|
|
创建日期: 2024年3月18日
|
|
|
最后修改日期: 2023年3月31日
|
|
|
版本号: 1.0.0
|
|
|
|
|
|
"""
|
|
|
import pandas as pd
|
|
|
import numpy as np
|
|
|
from sklearn.preprocessing import MinMaxScaler, StandardScaler
|
|
|
from sklearn.model_selection import StratifiedKFold
|
|
|
import torch
|
|
|
from torch import nn
|
|
|
from torch.utils.data import DataLoader, TensorDataset
|
|
|
import matplotlib.pyplot as plt
|
|
|
from sklearn.metrics import precision_score, recall_score, f1_score
|
|
|
from sklearn.utils.class_weight import compute_class_weight
|
|
|
|
|
|
|
|
|
# 读取特征和标签
|
|
|
data = pd.read_excel('feature_label_weighted.xlsx')
|
|
|
|
|
|
# 以下是你的特征名
|
|
|
feature_names = ["强迫症状数字化", "人际关系敏感数字化", "抑郁数字化", "多因子症状", "母亲教养方式数字化", "父亲教养方式数字化", "自评家庭经济条件数字化", "有无心理治疗(咨询)史数字化", "学业情况数字化", "出勤情况数字化"]
|
|
|
|
|
|
# 将特征和标签分开,并做归一化处理
|
|
|
X = data[feature_names].values
|
|
|
y = data['类别'].values # 加权阶段已经处理-1,不再处理
|
|
|
|
|
|
# 使用5折交叉验证划分样本
|
|
|
skf = StratifiedKFold(n_splits=5, shuffle=True)
|
|
|
|
|
|
for fold, (train_index, test_index) in enumerate(skf.split(X, y)):
|
|
|
X_train, X_val = X[train_index], X[test_index]
|
|
|
y_train, y_val = y[train_index], y[test_index]
|
|
|
|
|
|
###### 保存为excel文件########
|
|
|
# 创建 DataFrame 并保存为 excel 文件
|
|
|
train_data = pd.DataFrame(X_train, columns=feature_names)
|
|
|
train_data['类别'] = y_train
|
|
|
train_data.to_excel(f'train_fold{fold}.xlsx', index=False)
|
|
|
|
|
|
val_data = pd.DataFrame(X_val, columns=feature_names)
|
|
|
val_data['类别'] = y_val
|
|
|
val_data.to_excel(f'val_fold{fold}.xlsx', index=False)
|
|
|
###### 保存为excel文件########
|
|
|
|