You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

55 lines
1.9 KiB
Python

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

"""
文件名: Data_Partition.py
将所有样本数据划分为4+1份4份训练1份验证在原有5倍交叉验证的基础上改动取消5倍交叉验证机制
作者: 王春林
创建日期: 2024年3月18日
最后修改日期: 2023年3月31日
版本号: 1.0.0
"""
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import StratifiedKFold
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.utils.class_weight import compute_class_weight
# 读取特征和标签
data = pd.read_excel('feature_label.xlsx')
# 以下是你的特征名
feature_names = ["强迫症状数字化", "人际关系敏感数字化", "抑郁数字化", "多因子症状", "母亲教养方式数字化", "父亲教养方式数字化", "自评家庭经济条件数字化", "有无心理治疗(咨询)史数字化", "学业情况数字化", "出勤情况数字化"]
# 定义特征权重列表
feature_weights = [0.135, 0.085, 0.08, 0.2, 0.09, 0.09, 0.06, 0.06, 0.08, 0.12]
# 找到最大值
max_value = max(feature_weights)
# 缩放权重
feature_weights_scaled = [x / max_value for x in feature_weights]
# 打印缩放后的特征权重
print("Scaled Feature Weights:", feature_weights_scaled)
# 将特征和标签分开,并做归一化处理
X = data[feature_names].values
y = data['label'].values - 1 # 将标签从1-4转换为0-3
# 分别乘以权重,放在归一化后
for i in range(len(feature_names)):
X[:, i] = X[:, i] * feature_weights_scaled[i]
feature_label_weighted = pd.DataFrame(X, columns=feature_names)
feature_label_weighted['类别'] = y
feature_label_weighted['学号'] = data['编号']
feature_label_weighted.to_excel('feature_label_weighted.xlsx', index=False)