diff --git a/LR_Process_concat_sheet.py b/LR_Process_concat_sheet.py new file mode 100644 index 0000000..17fe4d6 --- /dev/null +++ b/LR_Process_concat_sheet.py @@ -0,0 +1,16 @@ +import pandas as pd + +# 读取原始Excel文件的所有sheet +excel_file = pd.ExcelFile('data/LeaveRecord.xlsx') +df = pd.concat([excel_file.parse(sheet_name) for sheet_name in excel_file.sheet_names]) + +# 统计每个学生的请假次数和姓名 +student_counts = df.groupby('学号').size().reset_index(name='请假次数') +student_names = df.groupby('学号')['姓名'].unique().reset_index() +student_counts['姓名'] = student_names['姓名'].apply(lambda x: ','.join(x)) + +# 判断是否错误 +student_counts['是否错误'] = student_counts['姓名'].apply(lambda x: 1 if len(x.split(',')) > 1 else 0) + +# 保存结果到新的Excel文件 +student_counts.to_excel('output_LR_1.xlsx', index=False) diff --git a/LR_Process_single_sheet.py b/LR_Process_single_sheet.py new file mode 100644 index 0000000..c712929 --- /dev/null +++ b/LR_Process_single_sheet.py @@ -0,0 +1,19 @@ +import pandas as pd + +# 读取原始Excel文件 +df = pd.read_excel('data/LeaveRecord.xlsx', sheet_name=None) + +# 创建一个空的DataFrame来保存统计结果 +result_df = pd.DataFrame(columns=['学号', '请假次数']) + +# 遍历每个sheet +for sheet_name, sheet_data in df.items(): + # 统计每个学生的请假次数 + student_counts = sheet_data['学号'].value_counts().reset_index() + student_counts.columns = ['学号', '请假次数'] + + # 将统计结果添加到结果DataFrame中 + result_df = pd.concat([result_df, student_counts], ignore_index=True) + +# 保存结果DataFrame到新的Excel文件 +result_df.to_excel('output_LR.xlsx', index=False) diff --git a/data/2020_2021_1.xlsx b/data/2020_2021_1.xlsx new file mode 100644 index 0000000..60761b9 Binary files /dev/null and b/data/2020_2021_1.xlsx differ diff --git a/data/2020_2021_2.xlsx b/data/2020_2021_2.xlsx new file mode 100644 index 0000000..8d23599 Binary files /dev/null and b/data/2020_2021_2.xlsx differ diff --git a/data/2021_2022_1.xlsx b/data/2021_2022_1.xlsx new file mode 100644 index 0000000..84546ea Binary files /dev/null and b/data/2021_2022_1.xlsx differ diff --git a/data/2021_2022_2.xlsx b/data/2021_2022_2.xlsx new file mode 100644 index 0000000..a300f9d Binary files /dev/null and b/data/2021_2022_2.xlsx differ diff --git a/data/LeaveRecord.xlsx b/data/LeaveRecord.xlsx new file mode 100644 index 0000000..9e6389d Binary files /dev/null and b/data/LeaveRecord.xlsx differ diff --git a/val.py b/val.py index b264e27..2c09113 100644 --- a/val.py +++ b/val.py @@ -159,13 +159,14 @@ skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=random.randi input_size = src_features.shape[1] hidden_sizes = [32,128,32] output_size = len(set(src_labels)) # CrossEntropyLoss()损失函数类别数量就是输出size,sigmoid损失只适用于二分类,size为1,这里统一使用Cross -lr = 0.0001 # learn rate 学习率 -num_epochs = 1000 +lr = 0.001 # learn rate 学习率 +num_epochs = 100 batch_size = 128 # 定义全局结果变量 fold_accuracy=[] fold_loss=[] +model_name=0 # 遍历每个 fold for fold, (train_idx, test_idx) in enumerate(skf.split(src_features, src_labels)): @@ -217,6 +218,9 @@ for fold, (train_idx, test_idx) in enumerate(skf.split(src_features, src_labels) fold_loss.append(test_loss) print(f'Accuracy for fold {fold}: {fold_accuracy[fold]*100} %, loss: {fold_loss[fold]}') print('--------------------------------') + # Save trained model + torch.save(model.state_dict(), 'psychology_model_val_{}.pth'.format(str(model_name))) + model_name+=1 print('K-FOLD CROSS VALIDATION RESULTS') print(f'Fold accuracies: {fold_accuracy}')