commit 34770cfeffe0fd5b7b3f0b60ada76d05a25be4a6 Author: ZZY <2450266535@qq.com> Date: Thu Dec 12 22:13:16 2024 +0800 init: 添加 GLUT 学分统计爬虫项目 - 新增 README.md 文件,包含项目描述、初始化步骤和使用方法 - 添加 default.env 文件,用于配置环境变量 - 实现 glut.py,包含登录、获取成绩、解析成绩等功能 - 添加 index.html,提供 Web 界面展示成绩 - 实现 main.py,提供命令行接口 - 添加 requirements.txt,列出项目依赖 - 实现 server.py,提供 HTTP 服务接口 diff --git a/README.md b/README.md new file mode 100644 index 0000000..98443d8 --- /dev/null +++ b/README.md @@ -0,0 +1,40 @@ +# GLUT spider web + +- 项目描述 + - 用于获取GLUT学分统计,即爬虫案例 + +- 初始化项目 + +```shell +# Windows +python -m venv .venv +pip install -r requirements.txt + +# Linux +python3 -m venv .venv +pip3 install -r requirements.txt +``` + +- 配置环境变量( **推荐配置** ) + + 将`default.env`内容填写完整后改名为`.env` + +- 使用方法 + +```shell +# 帮助文档 +python main.py -h + +# 开启服务器 +python main.py -s + +# 使用命令行操作,如果没有配置环境变量则需要给出用户名和密码 +# 注意默认会输出scores.json +> python main.py -t 2023_sp +Fetching scores for 2023 sp... +aggregate_score: 72.32824427480917 + +> python main.py -t 2023_sp -u 32220520xxxxx -p xxxxx +Fetching scores for 2023 sp... +aggregate_score: 72.32824427480917 +``` diff --git a/default.env b/default.env new file mode 100644 index 0000000..47025bf --- /dev/null +++ b/default.env @@ -0,0 +1,3 @@ +USERNAME=3220520xxxxx +PASSWORD=YourPassword +PORT=8000 diff --git a/glut.py b/glut.py new file mode 100644 index 0000000..20bf632 --- /dev/null +++ b/glut.py @@ -0,0 +1,179 @@ +from dataclasses import dataclass, asdict +from enum import Enum, auto +import requests +from lxml import etree + +class Term(Enum): + SPRING = auto() + AUTUMN = auto() + ALL = auto() + + def __repr__(self): + return self.name.lower() + + +class GLUTAcademic: + class LoginFailedError(Exception): + pass + + FINAL_GRADE_DICT = { + '优秀': 95, + '良': 85, + '中': 75, + '及格': 65, + '不及格': 40, + '': 0, + } + + @dataclass + class Score: + year: int + term: Term + department: str + course_number: int + course_name: str + course_sequence: int + instructor: str + final_grade: int + gpa: float + credits: float + hours: float + assessment_method: str + course_attribute: str + notes: str + exam_type: str + retake_flag: str + course_requirements: str + course_category: str + coefficient: str + second_degree_minor: str + pass_flag: str + + def as_dict(self): + d = asdict(self) + d['term'] = repr(self.term) + return d + + def __str__(self): + return (f"Score(学年={self.year}, 学期={self.term}, 开课院系={self.department}, " + f"课程号={self.course_number}, 课程名={self.course_name}, 课序号={self.course_sequence}, " + f"主讲教师={self.instructor}, 总评={self.final_grade}, 绩点={self.gpa}, " + f"学分={self.credits}, 学时={self.hours}, 考核方式={self.assessment_method}, " + f"选课属性={self.course_attribute}, 备注={self.notes}, 考试性质={self.exam_type}, " + f"是否缓考={self.retake_flag}, 课程要求={self.course_requirements}, " + f"课程类别={self.course_category}, 系数={self.coefficient}, " + f"二学位辅修={self.second_degree_minor}, 及格标志={self.pass_flag})") + + def __init__(self, username: int, password: str): + self.session = requests.Session() + self.username = username + self.password = password + self.login_status = False + self.base_url = 'https://jw.glut.edu.cn' + + def login(self): + url = f"{self.base_url}/academic/j_acegi_security_check"\ + f"?j_username={self.username}&j_password={self.password}&j_captcha=undefined" + res = self.session.get(url) + if res.status_code == 200 and res.headers.get('X-Frame-Options') != 'DENY': + self.login_status = True + else: + raise self.LoginFailedError('login error maybe username or password is wrong') + + def get_cookies(self): + if not self.login_status: + return None + return '; '.join([f'{key}={value}' for key, value in self.session.cookies.items()]) + + def get_scores_raw(self, year: int = 2024, term: Term = Term.ALL): + url = f"{self.base_url}/academic/manager/score/studentOwnScore.do" + headers = {'Content-Type': 'application/x-www-form-urlencoded'} + term_dict = { + Term.SPRING: "1", + Term.AUTUMN: "2", + Term.ALL: "" + } + + data = { + 'year': year - 2013 + 33, + 'term': term_dict[term], + 'prop': '', + 'groupName': '', + 'para': '0', + 'sortColumn': '', + 'Submit': '查询' + } + res = self.session.post(url, data=data, headers=headers) + + if res.status_code != 200 or res.headers.get('X-Frame-Options') == 'DENY': + raise RuntimeError("Failed to get scores") + return res.content + + def parse_scores(self, raw_score: bytes) -> list[Score]: + tree = etree.fromstring(raw_score, parser=etree.HTMLParser(encoding='utf-8')) + table = tree.xpath('//table[@class="datalist"]') + if not table or len(table) != 1: + err_msg = f"Failed to parse scores {table}" + with open('error.html', 'wb') as f: + f.write(raw_score) + raise RuntimeError(err_msg) + table = table[0] + tr_list = table.xpath('tr')[1:] + return [self.parse_scope(tr.xpath('td/text()')) for tr in tr_list] + + def parse_scope(self, texts: list[str]) -> Score: + texts = [t.strip() for t in texts] + final_grade = self.FINAL_GRADE_DICT.get(texts[7], -1) + if final_grade == -1: + try: + final_grade = float(texts[7]) + except ValueError as exc: + raise ValueError('Final grade is not a int number') from exc + + term_map = { + '春': Term.SPRING, + '秋': Term.AUTUMN + } + term = term_map.get(texts[1]) + if term is None: + raise ValueError(f'term error maybe 春 or 秋 but got {texts[1]}') + + return self.Score( + year = int(texts[0]), + term = term, + department= texts[2], + course_number = int(texts[3]), + course_name = texts[4], + course_sequence = int(texts[5]), + instructor = texts[6], + final_grade = final_grade, + gpa = float(texts[8]), + credits = float(texts[9]), + hours = float(texts[10]), + assessment_method = texts[11], + course_attribute = texts[12], + notes = texts[13], + exam_type = texts[14], + retake_flag = texts[15], + course_requirements = texts[16], + course_category = texts[17], + coefficient = texts[18], + second_degree_minor = texts[19], + pass_flag = texts[20] + ) + + def get_scores(self, year: int = 2024, term: Term = Term.ALL) -> list[Score]: + raw_score = self.get_scores_raw(year, term) + return self.parse_scores(raw_score) + + def calculate_scores(self, scores: list[Score]) -> float: + if not scores: + return 0.0 + total_credits = 0 + total_grades = 0 + for i in scores: + if i.course_attribute != '必修' or i.course_name.startswith('体育'): + continue + total_credits += i.credits + total_grades += i.final_grade * i.credits + return total_grades / total_credits diff --git a/index.html b/index.html new file mode 100644 index 0000000..1e8f393 --- /dev/null +++ b/index.html @@ -0,0 +1,137 @@ + + +
+ + +课程名 | +总评成绩 | +绩点 | +
---|