|
|
"""
|
|
|
文件名: Data_Partition.py
|
|
|
|
|
|
将所有样本数据划分为4+1份,4份训练1份验证,在原有5倍交叉验证的基础上改动,取消5倍交叉验证机制
|
|
|
|
|
|
作者: 王春林
|
|
|
创建日期: 2024年3月18日
|
|
|
最后修改日期: 2023年3月31日
|
|
|
版本号: 1.0.0
|
|
|
|
|
|
"""
|
|
|
import pandas as pd
|
|
|
import numpy as np
|
|
|
from sklearn.preprocessing import MinMaxScaler, StandardScaler
|
|
|
from sklearn.model_selection import StratifiedKFold
|
|
|
import torch
|
|
|
from torch import nn
|
|
|
from torch.utils.data import DataLoader, TensorDataset
|
|
|
import matplotlib.pyplot as plt
|
|
|
from sklearn.metrics import precision_score, recall_score, f1_score
|
|
|
from sklearn.utils.class_weight import compute_class_weight
|
|
|
|
|
|
|
|
|
# 读取特征和标签
|
|
|
data = pd.read_excel('feature_label.xlsx')
|
|
|
|
|
|
# 以下是你的特征名
|
|
|
feature_names = ["强迫症状数字化", "人际关系敏感数字化", "抑郁数字化", "多因子症状", "母亲教养方式数字化", "父亲教养方式数字化", "自评家庭经济条件数字化", "有无心理治疗(咨询)史数字化", "学业情况数字化", "出勤情况数字化"]
|
|
|
|
|
|
# 定义特征权重列表
|
|
|
feature_weights = [0.135, 0.085, 0.08, 0.2, 0.09, 0.09, 0.06, 0.06, 0.08, 0.12]
|
|
|
|
|
|
# 找到最大值
|
|
|
max_value = max(feature_weights)
|
|
|
|
|
|
# 缩放权重
|
|
|
feature_weights_scaled = [x / max_value for x in feature_weights]
|
|
|
|
|
|
# 打印缩放后的特征权重
|
|
|
print("Scaled Feature Weights:", feature_weights_scaled)
|
|
|
|
|
|
# 将特征和标签分开,并做归一化处理
|
|
|
X = data[feature_names].values
|
|
|
y = data['label'].values - 1 # 将标签从1-4转换为0-3
|
|
|
|
|
|
# 分别乘以权重,放在归一化后
|
|
|
for i in range(len(feature_names)):
|
|
|
X[:, i] = X[:, i] * feature_weights_scaled[i]
|
|
|
|
|
|
feature_label_weighted = pd.DataFrame(X, columns=feature_names)
|
|
|
feature_label_weighted['类别'] = y
|
|
|
feature_label_weighted['学号'] = data['编号']
|
|
|
feature_label_weighted.to_excel('feature_label_weighted.xlsx', index=False)
|
|
|
|
|
|
|