diff --git a/ArtificialClassification.py b/ArtificialClassification.py new file mode 100644 index 0000000..18b7ef8 --- /dev/null +++ b/ArtificialClassification.py @@ -0,0 +1,38 @@ + +import pandas as pd +import numpy as np + +# 读取Excel文件 +df = pd.read_excel('data/data_src.xlsx') + +# 提取10个因子的列数据 +factor_columns = ['躯体化','强迫症状','人际关系敏感','抑郁','焦虑','敌对','恐怖','偏执','精神病性','其他'] +factors_data = df[factor_columns] +#factors_data = df.iloc[:, 34:44] + +# 计算每行中因子分值大于等于3的数量 +factors_above_3 = factors_data.ge(3).sum(axis=1) +# print(factors_above_3) +# input() + +# 计算精神病性因子分值大于等于4的数量 +psychotic_above_4 = df['精神病性'].ge(4) + +# 划分等级类别 +df['Level'] = None +df.loc[(factors_above_3 >= 8) | psychotic_above_4, 'Level'] = 1 +df.loc[(factors_above_3 >= 1) & (factors_above_3 <= 7), 'Level'] = 2 +df.loc[(factors_above_3 >= 1) & (factors_above_3 < 3), 'Level'] = 3 +df.loc[factors_above_3 < 1, 'Level'] = 4 + +# 选择需要保留的列数据 +selected_columns = ['编号']+factor_columns + ['Level'] +selected_data = df[selected_columns] + +# 重置行索引,确保顺序与之前的Excel一致 +#selected_data = selected_data.reset_index(drop=True) +selected_data = selected_data.sort_index() + +# 写入新的Excel文件 +selected_data.to_excel('output_excel_file.xlsx', index=False) + diff --git a/output_excel_file.xlsx b/output_excel_file.xlsx new file mode 100644 index 0000000..b973e59 Binary files /dev/null and b/output_excel_file.xlsx differ diff --git a/readme.txt b/readme.txt new file mode 100644 index 0000000..7ba5308 --- /dev/null +++ b/readme.txt @@ -0,0 +1,7 @@ +nn.BCEWithLogitsLoss()和nn.CrossEntropyLoss()最后一层都不需要激活函数,这两个函数中都带有激活函数的计算,softmax函数如果被执行两次,可能会数据溢出精度 + +样本需要先归一化,特别是sigmoid输出时,否者也会溢出,反正会出问题 + +增加中间的神经元数量可以提高效果,1%到2%的提升 + +使用nn.BCEWithLogitsLoss()函数时,计算每一次训练的准确度,需要手动sigmoid一次