py_spider/glut.py
ZZY 34770cfeff init: 添加 GLUT 学分统计爬虫项目
- 新增 README.md 文件,包含项目描述、初始化步骤和使用方法
- 添加 default.env 文件,用于配置环境变量
- 实现 glut.py,包含登录、获取成绩、解析成绩等功能
- 添加 index.html,提供 Web 界面展示成绩
- 实现 main.py,提供命令行接口
- 添加 requirements.txt,列出项目依赖
- 实现 server.py,提供 HTTP 服务接口
2024-12-12 22:13:16 +08:00

180 lines
6.1 KiB
Python

from dataclasses import dataclass, asdict
from enum import Enum, auto
import requests
from lxml import etree
class Term(Enum):
SPRING = auto()
AUTUMN = auto()
ALL = auto()
def __repr__(self):
return self.name.lower()
class GLUTAcademic:
class LoginFailedError(Exception):
pass
FINAL_GRADE_DICT = {
'优秀': 95,
'': 85,
'': 75,
'及格': 65,
'不及格': 40,
'': 0,
}
@dataclass
class Score:
year: int
term: Term
department: str
course_number: int
course_name: str
course_sequence: int
instructor: str
final_grade: int
gpa: float
credits: float
hours: float
assessment_method: str
course_attribute: str
notes: str
exam_type: str
retake_flag: str
course_requirements: str
course_category: str
coefficient: str
second_degree_minor: str
pass_flag: str
def as_dict(self):
d = asdict(self)
d['term'] = repr(self.term)
return d
def __str__(self):
return (f"Score(学年={self.year}, 学期={self.term}, 开课院系={self.department}, "
f"课程号={self.course_number}, 课程名={self.course_name}, 课序号={self.course_sequence}, "
f"主讲教师={self.instructor}, 总评={self.final_grade}, 绩点={self.gpa}, "
f"学分={self.credits}, 学时={self.hours}, 考核方式={self.assessment_method}, "
f"选课属性={self.course_attribute}, 备注={self.notes}, 考试性质={self.exam_type}, "
f"是否缓考={self.retake_flag}, 课程要求={self.course_requirements}, "
f"课程类别={self.course_category}, 系数={self.coefficient}, "
f"二学位辅修={self.second_degree_minor}, 及格标志={self.pass_flag})")
def __init__(self, username: int, password: str):
self.session = requests.Session()
self.username = username
self.password = password
self.login_status = False
self.base_url = 'https://jw.glut.edu.cn'
def login(self):
url = f"{self.base_url}/academic/j_acegi_security_check"\
f"?j_username={self.username}&j_password={self.password}&j_captcha=undefined"
res = self.session.get(url)
if res.status_code == 200 and res.headers.get('X-Frame-Options') != 'DENY':
self.login_status = True
else:
raise self.LoginFailedError('login error maybe username or password is wrong')
def get_cookies(self):
if not self.login_status:
return None
return '; '.join([f'{key}={value}' for key, value in self.session.cookies.items()])
def get_scores_raw(self, year: int = 2024, term: Term = Term.ALL):
url = f"{self.base_url}/academic/manager/score/studentOwnScore.do"
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
term_dict = {
Term.SPRING: "1",
Term.AUTUMN: "2",
Term.ALL: ""
}
data = {
'year': year - 2013 + 33,
'term': term_dict[term],
'prop': '',
'groupName': '',
'para': '0',
'sortColumn': '',
'Submit': '查询'
}
res = self.session.post(url, data=data, headers=headers)
if res.status_code != 200 or res.headers.get('X-Frame-Options') == 'DENY':
raise RuntimeError("Failed to get scores")
return res.content
def parse_scores(self, raw_score: bytes) -> list[Score]:
tree = etree.fromstring(raw_score, parser=etree.HTMLParser(encoding='utf-8'))
table = tree.xpath('//table[@class="datalist"]')
if not table or len(table) != 1:
err_msg = f"Failed to parse scores {table}"
with open('error.html', 'wb') as f:
f.write(raw_score)
raise RuntimeError(err_msg)
table = table[0]
tr_list = table.xpath('tr')[1:]
return [self.parse_scope(tr.xpath('td/text()')) for tr in tr_list]
def parse_scope(self, texts: list[str]) -> Score:
texts = [t.strip() for t in texts]
final_grade = self.FINAL_GRADE_DICT.get(texts[7], -1)
if final_grade == -1:
try:
final_grade = float(texts[7])
except ValueError as exc:
raise ValueError('Final grade is not a int number') from exc
term_map = {
'': Term.SPRING,
'': Term.AUTUMN
}
term = term_map.get(texts[1])
if term is None:
raise ValueError(f'term error maybe 春 or 秋 but got {texts[1]}')
return self.Score(
year = int(texts[0]),
term = term,
department= texts[2],
course_number = int(texts[3]),
course_name = texts[4],
course_sequence = int(texts[5]),
instructor = texts[6],
final_grade = final_grade,
gpa = float(texts[8]),
credits = float(texts[9]),
hours = float(texts[10]),
assessment_method = texts[11],
course_attribute = texts[12],
notes = texts[13],
exam_type = texts[14],
retake_flag = texts[15],
course_requirements = texts[16],
course_category = texts[17],
coefficient = texts[18],
second_degree_minor = texts[19],
pass_flag = texts[20]
)
def get_scores(self, year: int = 2024, term: Term = Term.ALL) -> list[Score]:
raw_score = self.get_scores_raw(year, term)
return self.parse_scores(raw_score)
def calculate_scores(self, scores: list[Score]) -> float:
if not scores:
return 0.0
total_credits = 0
total_grades = 0
for i in scores:
if i.course_attribute != '必修' or i.course_name.startswith('体育'):
continue
total_credits += i.credits
total_grades += i.final_grade * i.credits
return total_grades / total_credits