psychological_prediction/Data_Partition.py

"""
文件名: Data_Partition.py

将所有样本数据划分为4+1份，4份训练1份验证，在原有5倍交叉验证的基础上改动，取消5倍交叉验证机制

作者: 王春林
创建日期: 2024年3月18日
最后修改日期: 2023年3月31日
版本号: 1.0.0

"""
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import StratifiedKFold
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.utils.class_weight import compute_class_weight


# 读取特征和标签
data = pd.read_excel('feature_label_weighted.xlsx')

# 以下是你的特征名
feature_names = ["强迫症状数字化", "人际关系敏感数字化", "抑郁数字化", "多因子症状", "母亲教养方式数字化", "父亲教养方式数字化", "自评家庭经济条件数字化", "有无心理治疗（咨询）史数字化", "学业情况数字化", "出勤情况数字化"]

# 将特征和标签分开，并做归一化处理
X = data[feature_names].values
y = data['类别'].values  # 加权阶段已经处理-1，不再处理

# 使用5折交叉验证划分样本
skf = StratifiedKFold(n_splits=5, shuffle=True)

for fold, (train_index, test_index) in enumerate(skf.split(X, y)):
    X_train, X_val = X[train_index], X[test_index]
    y_train, y_val = y[train_index], y[test_index]

    ###### 保存为excel文件########
    # 创建 DataFrame 并保存为 excel 文件
    train_data = pd.DataFrame(X_train, columns=feature_names)
    train_data['类别'] = y_train
    train_data.to_excel(f'train_fold{fold}.xlsx', index=False)

    val_data = pd.DataFrame(X_val, columns=feature_names)
    val_data['类别'] = y_val
    val_data.to_excel(f'val_fold{fold}.xlsx', index=False)
    ###### 保存为excel文件########