psy/utils/feature_process.py

import os
import sys
root_path = os.getcwd()
sys.path.append(root_path)

import time
import datetime
import signal
import uvicorn
import pandas as pd
from fastapi import FastAPI, Request
from pydantic import BaseModel
from typing import List
from fastapi.middleware.cors import CORSMiddleware
import logging
import matplotlib.pyplot as plt
import argparse
import numpy as np
import yaml
import threading
import pickle
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles

# 定义单个样本的特征类
class Features(BaseModel):
    # 10个SCL评测量（后续再处理）范围0-4
    somatization: float
    obsessive_compulsive: float
    interpersonal_sensitivity: float
    depression: float
    anxiety: float
    hostility: float
    terror: float
    paranoia: float
    psychoticism: float
    other: float
    # 基本信息特征量
    father_parenting_style: int # 温暖与理解：1；其他：0
    mother_parenting_style: int # 温暖与理解：1；其他：0
    self_assessed_family_economic_condition: int # 贫困：2；较差：1；其他：0
    history_of_psychological_counseling: bool # 有：1；无：0
    # 日常行为特征量
    absenteeism_above_average: bool # 大于平均次数：1；小于等于：0
    academic_warning: bool # 有预警：1；无预警：0
    # 标签
    label: int # 0-3 共4个类别

def process_features_list(features_list: List[dict]) -> List[Features]:
    """
    将包含字典的列表转换为包含 Features 实例的列表
    """
    return [Features(**features_dict) for features_dict in features_list]

def create_feature_df(features_list):
    """
    根据features_list对象创建一个DataFrame。

    参数:
    features_list (Features): 包含特征值的对象的数组

    返回:
    pandas.DataFrame: 包含特征和标签的DataFrame
    """

    # 定义一个空的 DataFrame 用于存储所有样本特征
    all_features = pd.DataFrame()

    for features in features_list:
        relevant_features = {
            "somatization": features.somatization,
            "obsessive_compulsive": features.obsessive_compulsive,
            "interpersonal_sensitivity": features.interpersonal_sensitivity,
            "depression": features.depression,
            "anxiety": features.anxiety,
            "hostility": features.hostility,
            "terror": features.terror,
            "paranoia": features.paranoia,
            "psychoticism": features.psychoticism,
            "other": features.other
        }
        df_feature = pd.DataFrame({
            # 数字化特征--基本信息
            '父亲教养方式数字化': [(lambda x: 0.59 if x == 1 else 0.46)(features.father_parenting_style)],
            '母亲教养方式数字化': [(lambda x: 0.69 if x == 1 else 0.56)(features.mother_parenting_style)],
            '自评家庭经济条件数字化': [(lambda x: 0.54 if x in [2, 1] else 0.47)(features.self_assessed_family_economic_condition)],
            '有无心理治疗（咨询）史数字化': [(lambda x: 0.21 if x else 0.09)(features.history_of_psychological_counseling)],
            # 数字化特征--症状因子
            '强迫症状数字化': [features.obsessive_compulsive / 4],
            '人际关系敏感数字化': [features.interpersonal_sensitivity / 4],
            '抑郁数字化': [features.depression / 4],
            '多因子症状': [(lambda x: sum(1 for value in x.values() if value > 3.0) / 10)(relevant_features)],
            # 数字化特征--日常行为
            '出勤情况数字化': [0.74 if features.absenteeism_above_average else 0.67],
            '学业情况数字化': [0.59 if features.academic_warning else 0.50],
            "类别": [features.label]
        })
        all_features = pd.concat([all_features, df_feature], ignore_index=True)

    return all_features

def apply_feature_weights(df, feature_names, feature_weights):
    """
    将特征权重应用到DataFrame中。

    参数:
    df (pandas.DataFrame): 包含特征和标签的DataFrame
    feature_names (list): 特征名称列表
    feature_weights (list): 特征权重列表

    返回:
    pandas.DataFrame: 应用权重后的DataFrame
    """
    # 找到最大值
    max_value = max(feature_weights)

    # 缩放权重
    feature_weights_scaled = [x / max_value for x in feature_weights]

    # 打印缩放后的特征权重
    # print("Scaled Feature Weights:", feature_weights_scaled)

    # 将特征和标签分开，并做归一化处理
    X = df[feature_names].values
    y = df['类别'].values

    # 分别乘以权重，放在归一化后
    for i in range(len(feature_names)):
        X[:, i] = X[:, i] * feature_weights_scaled[i]

    feature_label_weighted = pd.DataFrame(X, columns=feature_names)
    feature_label_weighted['类别'] = y

    return feature_label_weighted