You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
psy/utils/feature_process.py

134 lines
4.8 KiB
Python

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import os
import sys
root_path = os.getcwd()
sys.path.append(root_path)
import time
import datetime
import signal
import uvicorn
import pandas as pd
from fastapi import FastAPI, Request
from pydantic import BaseModel
from typing import List
from fastapi.middleware.cors import CORSMiddleware
import logging
import matplotlib.pyplot as plt
import argparse
import numpy as np
import yaml
import threading
import pickle
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
# 定义单个样本的特征类
class Features(BaseModel):
# 10个SCL评测量后续再处理范围0-4
somatization: float
obsessive_compulsive: float
interpersonal_sensitivity: float
depression: float
anxiety: float
hostility: float
terror: float
paranoia: float
psychoticism: float
other: float
# 基本信息特征量
father_parenting_style: int # 温暖与理解1其他0
mother_parenting_style: int # 温暖与理解1其他0
self_assessed_family_economic_condition: int # 贫困2较差1其他0
history_of_psychological_counseling: bool # 有10
# 日常行为特征量
absenteeism_above_average: bool # 大于平均次数1小于等于0
academic_warning: bool # 有预警1无预警0
# 标签
label: int # 0-3 共4个类别
def process_features_list(features_list: List[dict]) -> List[Features]:
"""
将包含字典的列表转换为包含 Features 实例的列表
"""
return [Features(**features_dict) for features_dict in features_list]
def create_feature_df(features_list):
"""
根据features_list对象创建一个DataFrame。
参数:
features_list (Features): 包含特征值的对象的数组
返回:
pandas.DataFrame: 包含特征和标签的DataFrame
"""
# 定义一个空的 DataFrame 用于存储所有样本特征
all_features = pd.DataFrame()
for features in features_list:
relevant_features = {
"somatization": features.somatization,
"obsessive_compulsive": features.obsessive_compulsive,
"interpersonal_sensitivity": features.interpersonal_sensitivity,
"depression": features.depression,
"anxiety": features.anxiety,
"hostility": features.hostility,
"terror": features.terror,
"paranoia": features.paranoia,
"psychoticism": features.psychoticism,
"other": features.other
}
df_feature = pd.DataFrame({
# 数字化特征--基本信息
'父亲教养方式数字化': [(lambda x: 0.59 if x == 1 else 0.46)(features.father_parenting_style)],
'母亲教养方式数字化': [(lambda x: 0.69 if x == 1 else 0.56)(features.mother_parenting_style)],
'自评家庭经济条件数字化': [(lambda x: 0.54 if x in [2, 1] else 0.47)(features.self_assessed_family_economic_condition)],
'有无心理治疗(咨询)史数字化': [(lambda x: 0.21 if x else 0.09)(features.history_of_psychological_counseling)],
# 数字化特征--症状因子
'强迫症状数字化': [features.obsessive_compulsive / 4],
'人际关系敏感数字化': [features.interpersonal_sensitivity / 4],
'抑郁数字化': [features.depression / 4],
'多因子症状': [(lambda x: sum(1 for value in x.values() if value > 3.0) / 10)(relevant_features)],
# 数字化特征--日常行为
'出勤情况数字化': [0.74 if features.absenteeism_above_average else 0.67],
'学业情况数字化': [0.59 if features.academic_warning else 0.50],
"类别": [features.label]
})
all_features = pd.concat([all_features, df_feature], ignore_index=True)
return all_features
def apply_feature_weights(df, feature_names, feature_weights):
"""
将特征权重应用到DataFrame中。
参数:
df (pandas.DataFrame): 包含特征和标签的DataFrame
feature_names (list): 特征名称列表
feature_weights (list): 特征权重列表
返回:
pandas.DataFrame: 应用权重后的DataFrame
"""
# 找到最大值
max_value = max(feature_weights)
# 缩放权重
feature_weights_scaled = [x / max_value for x in feature_weights]
# 打印缩放后的特征权重
# print("Scaled Feature Weights:", feature_weights_scaled)
# 将特征和标签分开,并做归一化处理
X = df[feature_names].values
y = df['类别'].values
# 分别乘以权重,放在归一化后
for i in range(len(feature_names)):
X[:, i] = X[:, i] * feature_weights_scaled[i]
feature_label_weighted = pd.DataFrame(X, columns=feature_names)
feature_label_weighted['类别'] = y
return feature_label_weighted