|
|
import os
|
|
|
import sys
|
|
|
root_path = os.getcwd()
|
|
|
sys.path.append(root_path)
|
|
|
|
|
|
import time
|
|
|
import datetime
|
|
|
import signal
|
|
|
import uvicorn
|
|
|
import pandas as pd
|
|
|
from fastapi import FastAPI, Request
|
|
|
from pydantic import BaseModel
|
|
|
from typing import List
|
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
|
import logging
|
|
|
import matplotlib.pyplot as plt
|
|
|
import argparse
|
|
|
import numpy as np
|
|
|
import yaml
|
|
|
import threading
|
|
|
import pickle
|
|
|
from fastapi.responses import FileResponse
|
|
|
from fastapi.staticfiles import StaticFiles
|
|
|
|
|
|
# 定义单个样本的特征类
|
|
|
class Features(BaseModel):
|
|
|
# 10个SCL评测量(后续再处理)范围0-4
|
|
|
somatization: float
|
|
|
obsessive_compulsive: float
|
|
|
interpersonal_sensitivity: float
|
|
|
depression: float
|
|
|
anxiety: float
|
|
|
hostility: float
|
|
|
terror: float
|
|
|
paranoia: float
|
|
|
psychoticism: float
|
|
|
other: float
|
|
|
# 基本信息特征量
|
|
|
father_parenting_style: int # 温暖与理解:1;其他:0
|
|
|
mother_parenting_style: int # 温暖与理解:1;其他:0
|
|
|
self_assessed_family_economic_condition: int # 贫困:2;较差:1;其他:0
|
|
|
history_of_psychological_counseling: bool # 有:1;无:0
|
|
|
# 日常行为特征量
|
|
|
absenteeism_above_average: bool # 大于平均次数:1;小于等于:0
|
|
|
academic_warning: bool # 有预警:1;无预警:0
|
|
|
# 标签
|
|
|
label: int # 0-3 共4个类别
|
|
|
|
|
|
def process_features_list(features_list: List[dict]) -> List[Features]:
|
|
|
"""
|
|
|
将包含字典的列表转换为包含 Features 实例的列表
|
|
|
"""
|
|
|
return [Features(**features_dict) for features_dict in features_list]
|
|
|
|
|
|
def create_feature_df(features_list):
|
|
|
"""
|
|
|
根据features_list对象创建一个DataFrame。
|
|
|
|
|
|
参数:
|
|
|
features_list (Features): 包含特征值的对象的数组
|
|
|
|
|
|
返回:
|
|
|
pandas.DataFrame: 包含特征和标签的DataFrame
|
|
|
"""
|
|
|
|
|
|
# 定义一个空的 DataFrame 用于存储所有样本特征
|
|
|
all_features = pd.DataFrame()
|
|
|
|
|
|
for features in features_list:
|
|
|
relevant_features = {
|
|
|
"somatization": features.somatization,
|
|
|
"obsessive_compulsive": features.obsessive_compulsive,
|
|
|
"interpersonal_sensitivity": features.interpersonal_sensitivity,
|
|
|
"depression": features.depression,
|
|
|
"anxiety": features.anxiety,
|
|
|
"hostility": features.hostility,
|
|
|
"terror": features.terror,
|
|
|
"paranoia": features.paranoia,
|
|
|
"psychoticism": features.psychoticism,
|
|
|
"other": features.other
|
|
|
}
|
|
|
df_feature = pd.DataFrame({
|
|
|
# 数字化特征--基本信息
|
|
|
'父亲教养方式数字化': [(lambda x: 0.59 if x == 1 else 0.46)(features.father_parenting_style)],
|
|
|
'母亲教养方式数字化': [(lambda x: 0.69 if x == 1 else 0.56)(features.mother_parenting_style)],
|
|
|
'自评家庭经济条件数字化': [(lambda x: 0.54 if x in [2, 1] else 0.47)(features.self_assessed_family_economic_condition)],
|
|
|
'有无心理治疗(咨询)史数字化': [(lambda x: 0.21 if x else 0.09)(features.history_of_psychological_counseling)],
|
|
|
# 数字化特征--症状因子
|
|
|
'强迫症状数字化': [features.obsessive_compulsive / 4],
|
|
|
'人际关系敏感数字化': [features.interpersonal_sensitivity / 4],
|
|
|
'抑郁数字化': [features.depression / 4],
|
|
|
'多因子症状': [(lambda x: sum(1 for value in x.values() if value > 3.0) / 10)(relevant_features)],
|
|
|
# 数字化特征--日常行为
|
|
|
'出勤情况数字化': [0.74 if features.absenteeism_above_average else 0.67],
|
|
|
'学业情况数字化': [0.59 if features.academic_warning else 0.50],
|
|
|
"类别": [features.label]
|
|
|
})
|
|
|
all_features = pd.concat([all_features, df_feature], ignore_index=True)
|
|
|
|
|
|
return all_features
|
|
|
|
|
|
def apply_feature_weights(df, feature_names, feature_weights):
|
|
|
"""
|
|
|
将特征权重应用到DataFrame中。
|
|
|
|
|
|
参数:
|
|
|
df (pandas.DataFrame): 包含特征和标签的DataFrame
|
|
|
feature_names (list): 特征名称列表
|
|
|
feature_weights (list): 特征权重列表
|
|
|
|
|
|
返回:
|
|
|
pandas.DataFrame: 应用权重后的DataFrame
|
|
|
"""
|
|
|
# 找到最大值
|
|
|
max_value = max(feature_weights)
|
|
|
|
|
|
# 缩放权重
|
|
|
feature_weights_scaled = [x / max_value for x in feature_weights]
|
|
|
|
|
|
# 打印缩放后的特征权重
|
|
|
# print("Scaled Feature Weights:", feature_weights_scaled)
|
|
|
|
|
|
# 将特征和标签分开,并做归一化处理
|
|
|
X = df[feature_names].values
|
|
|
y = df['类别'].values
|
|
|
|
|
|
# 分别乘以权重,放在归一化后
|
|
|
for i in range(len(feature_names)):
|
|
|
X[:, i] = X[:, i] * feature_weights_scaled[i]
|
|
|
|
|
|
feature_label_weighted = pd.DataFrame(X, columns=feature_names)
|
|
|
feature_label_weighted['类别'] = y
|
|
|
|
|
|
return feature_label_weighted |