You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
61 lines
2.8 KiB
Python
61 lines
2.8 KiB
Python
import pandas as pd
|
|
|
|
# 定义文件列表和学期数量
|
|
files = ["data/2020_2021_1.xlsx", "data/2020_2021_2.xlsx", "data/2021_2022_1.xlsx", "data/2021_2022_2.xlsx"]
|
|
semester_count = len(files)
|
|
|
|
# 初始化空字典用于存储学生信息和学期数量
|
|
student_data = {}
|
|
|
|
# 遍历每个文件
|
|
for file in files:
|
|
# 读取每个文件的简洁版sheet
|
|
df = pd.read_excel(file, sheet_name="简洁版")
|
|
|
|
# 合并数据到学生字典中
|
|
for _, row in df.iterrows():
|
|
student_id = row["学号"]
|
|
student_name = row["姓名"]
|
|
|
|
# 如果学号已存在于字典中
|
|
if student_id in student_data:
|
|
# 检查是否有错误姓名
|
|
if student_name != student_data[student_id]["姓名"]:
|
|
student_data[student_id]["是否错误"] = 1
|
|
student_data[student_id]["姓名"] += f", {student_name}"
|
|
# 更新不合格课程门数和不合格课程门数组成
|
|
student_data[student_id]["不合格课程门数"] += row["不合格课程门数"]
|
|
student_data[student_id]["不合格课程门数组成"].append(row["不合格课程门数"])
|
|
# 否则,添加学号及初始信息
|
|
else:
|
|
student_data[student_id] = {
|
|
"学号": student_id,
|
|
"姓名": student_name,
|
|
"不合格课程门数": row["不合格课程门数"],
|
|
"不合格课程门数组成": [row["不合格课程门数"]],
|
|
"是否错误": 0,
|
|
"是否受过退学预警": 0,
|
|
"受过退学预警次数": 0
|
|
}
|
|
|
|
# 统计每个学生在所有文件中的退学预警次数
|
|
for student_id, data in student_data.items():
|
|
warning_count = 0
|
|
for file in files:
|
|
df = pd.read_excel(file, sheet_name="简洁版", usecols=["学号", "是否受过退学预警"])
|
|
if student_id in df["学号"].values:
|
|
if "是" in df.loc[df["学号"] == student_id, "是否受过退学预警"].values or "曾受过退学预警" in df.loc[df["学号"] == student_id, "是否受过退学预警"].values:
|
|
warning_count += 1
|
|
student_data[student_id]["受过退学预警次数"] = warning_count
|
|
if warning_count > 0:
|
|
student_data[student_id]["是否受过退学预警"] = 1
|
|
|
|
# 将不合格课程门数组成转为逗号分隔的字符串
|
|
for data in student_data.values():
|
|
data["不合格课程门数组成"] = ",".join(str(num) for num in data["不合格课程门数组成"])
|
|
|
|
# 创建DataFrame并保存结果到新的Excel文件
|
|
output_df = pd.DataFrame(student_data.values())
|
|
output_df = output_df[["学号", "姓名", "不合格课程门数", "不合格课程门数组成", "是否错误", "是否受过退学预警", "受过退学预警次数"]]
|
|
output_df.to_excel("data_processed/Dropout_Warning_RES.xlsx", index=False)
|