init: 添加 GLUT 学分统计爬虫项目
- 新增 README.md 文件,包含项目描述、初始化步骤和使用方法 - 添加 default.env 文件,用于配置环境变量 - 实现 glut.py,包含登录、获取成绩、解析成绩等功能 - 添加 index.html,提供 Web 界面展示成绩 - 实现 main.py,提供命令行接口 - 添加 requirements.txt,列出项目依赖 - 实现 server.py,提供 HTTP 服务接口
This commit is contained in:
commit
34770cfeff
40
README.md
Normal file
40
README.md
Normal file
@ -0,0 +1,40 @@
|
||||
# GLUT spider web
|
||||
|
||||
- 项目描述
|
||||
- 用于获取GLUT学分统计,即爬虫案例
|
||||
|
||||
- 初始化项目
|
||||
|
||||
```shell
|
||||
# Windows
|
||||
python -m venv .venv
|
||||
pip install -r requirements.txt
|
||||
|
||||
# Linux
|
||||
python3 -m venv .venv
|
||||
pip3 install -r requirements.txt
|
||||
```
|
||||
|
||||
- 配置环境变量( **推荐配置** )
|
||||
|
||||
将`default.env`内容填写完整后改名为`.env`
|
||||
|
||||
- 使用方法
|
||||
|
||||
```shell
|
||||
# 帮助文档
|
||||
python main.py -h
|
||||
|
||||
# 开启服务器
|
||||
python main.py -s
|
||||
|
||||
# 使用命令行操作,如果没有配置环境变量则需要给出用户名和密码
|
||||
# 注意默认会输出scores.json
|
||||
> python main.py -t 2023_sp
|
||||
Fetching scores for 2023 sp...
|
||||
aggregate_score: 72.32824427480917
|
||||
|
||||
> python main.py -t 2023_sp -u 32220520xxxxx -p xxxxx
|
||||
Fetching scores for 2023 sp...
|
||||
aggregate_score: 72.32824427480917
|
||||
```
|
3
default.env
Normal file
3
default.env
Normal file
@ -0,0 +1,3 @@
|
||||
USERNAME=3220520xxxxx
|
||||
PASSWORD=YourPassword
|
||||
PORT=8000
|
179
glut.py
Normal file
179
glut.py
Normal file
@ -0,0 +1,179 @@
|
||||
from dataclasses import dataclass, asdict
|
||||
from enum import Enum, auto
|
||||
import requests
|
||||
from lxml import etree
|
||||
|
||||
class Term(Enum):
|
||||
SPRING = auto()
|
||||
AUTUMN = auto()
|
||||
ALL = auto()
|
||||
|
||||
def __repr__(self):
|
||||
return self.name.lower()
|
||||
|
||||
|
||||
class GLUTAcademic:
|
||||
class LoginFailedError(Exception):
|
||||
pass
|
||||
|
||||
FINAL_GRADE_DICT = {
|
||||
'优秀': 95,
|
||||
'良': 85,
|
||||
'中': 75,
|
||||
'及格': 65,
|
||||
'不及格': 40,
|
||||
'': 0,
|
||||
}
|
||||
|
||||
@dataclass
|
||||
class Score:
|
||||
year: int
|
||||
term: Term
|
||||
department: str
|
||||
course_number: int
|
||||
course_name: str
|
||||
course_sequence: int
|
||||
instructor: str
|
||||
final_grade: int
|
||||
gpa: float
|
||||
credits: float
|
||||
hours: float
|
||||
assessment_method: str
|
||||
course_attribute: str
|
||||
notes: str
|
||||
exam_type: str
|
||||
retake_flag: str
|
||||
course_requirements: str
|
||||
course_category: str
|
||||
coefficient: str
|
||||
second_degree_minor: str
|
||||
pass_flag: str
|
||||
|
||||
def as_dict(self):
|
||||
d = asdict(self)
|
||||
d['term'] = repr(self.term)
|
||||
return d
|
||||
|
||||
def __str__(self):
|
||||
return (f"Score(学年={self.year}, 学期={self.term}, 开课院系={self.department}, "
|
||||
f"课程号={self.course_number}, 课程名={self.course_name}, 课序号={self.course_sequence}, "
|
||||
f"主讲教师={self.instructor}, 总评={self.final_grade}, 绩点={self.gpa}, "
|
||||
f"学分={self.credits}, 学时={self.hours}, 考核方式={self.assessment_method}, "
|
||||
f"选课属性={self.course_attribute}, 备注={self.notes}, 考试性质={self.exam_type}, "
|
||||
f"是否缓考={self.retake_flag}, 课程要求={self.course_requirements}, "
|
||||
f"课程类别={self.course_category}, 系数={self.coefficient}, "
|
||||
f"二学位辅修={self.second_degree_minor}, 及格标志={self.pass_flag})")
|
||||
|
||||
def __init__(self, username: int, password: str):
|
||||
self.session = requests.Session()
|
||||
self.username = username
|
||||
self.password = password
|
||||
self.login_status = False
|
||||
self.base_url = 'https://jw.glut.edu.cn'
|
||||
|
||||
def login(self):
|
||||
url = f"{self.base_url}/academic/j_acegi_security_check"\
|
||||
f"?j_username={self.username}&j_password={self.password}&j_captcha=undefined"
|
||||
res = self.session.get(url)
|
||||
if res.status_code == 200 and res.headers.get('X-Frame-Options') != 'DENY':
|
||||
self.login_status = True
|
||||
else:
|
||||
raise self.LoginFailedError('login error maybe username or password is wrong')
|
||||
|
||||
def get_cookies(self):
|
||||
if not self.login_status:
|
||||
return None
|
||||
return '; '.join([f'{key}={value}' for key, value in self.session.cookies.items()])
|
||||
|
||||
def get_scores_raw(self, year: int = 2024, term: Term = Term.ALL):
|
||||
url = f"{self.base_url}/academic/manager/score/studentOwnScore.do"
|
||||
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
|
||||
term_dict = {
|
||||
Term.SPRING: "1",
|
||||
Term.AUTUMN: "2",
|
||||
Term.ALL: ""
|
||||
}
|
||||
|
||||
data = {
|
||||
'year': year - 2013 + 33,
|
||||
'term': term_dict[term],
|
||||
'prop': '',
|
||||
'groupName': '',
|
||||
'para': '0',
|
||||
'sortColumn': '',
|
||||
'Submit': '查询'
|
||||
}
|
||||
res = self.session.post(url, data=data, headers=headers)
|
||||
|
||||
if res.status_code != 200 or res.headers.get('X-Frame-Options') == 'DENY':
|
||||
raise RuntimeError("Failed to get scores")
|
||||
return res.content
|
||||
|
||||
def parse_scores(self, raw_score: bytes) -> list[Score]:
|
||||
tree = etree.fromstring(raw_score, parser=etree.HTMLParser(encoding='utf-8'))
|
||||
table = tree.xpath('//table[@class="datalist"]')
|
||||
if not table or len(table) != 1:
|
||||
err_msg = f"Failed to parse scores {table}"
|
||||
with open('error.html', 'wb') as f:
|
||||
f.write(raw_score)
|
||||
raise RuntimeError(err_msg)
|
||||
table = table[0]
|
||||
tr_list = table.xpath('tr')[1:]
|
||||
return [self.parse_scope(tr.xpath('td/text()')) for tr in tr_list]
|
||||
|
||||
def parse_scope(self, texts: list[str]) -> Score:
|
||||
texts = [t.strip() for t in texts]
|
||||
final_grade = self.FINAL_GRADE_DICT.get(texts[7], -1)
|
||||
if final_grade == -1:
|
||||
try:
|
||||
final_grade = float(texts[7])
|
||||
except ValueError as exc:
|
||||
raise ValueError('Final grade is not a int number') from exc
|
||||
|
||||
term_map = {
|
||||
'春': Term.SPRING,
|
||||
'秋': Term.AUTUMN
|
||||
}
|
||||
term = term_map.get(texts[1])
|
||||
if term is None:
|
||||
raise ValueError(f'term error maybe 春 or 秋 but got {texts[1]}')
|
||||
|
||||
return self.Score(
|
||||
year = int(texts[0]),
|
||||
term = term,
|
||||
department= texts[2],
|
||||
course_number = int(texts[3]),
|
||||
course_name = texts[4],
|
||||
course_sequence = int(texts[5]),
|
||||
instructor = texts[6],
|
||||
final_grade = final_grade,
|
||||
gpa = float(texts[8]),
|
||||
credits = float(texts[9]),
|
||||
hours = float(texts[10]),
|
||||
assessment_method = texts[11],
|
||||
course_attribute = texts[12],
|
||||
notes = texts[13],
|
||||
exam_type = texts[14],
|
||||
retake_flag = texts[15],
|
||||
course_requirements = texts[16],
|
||||
course_category = texts[17],
|
||||
coefficient = texts[18],
|
||||
second_degree_minor = texts[19],
|
||||
pass_flag = texts[20]
|
||||
)
|
||||
|
||||
def get_scores(self, year: int = 2024, term: Term = Term.ALL) -> list[Score]:
|
||||
raw_score = self.get_scores_raw(year, term)
|
||||
return self.parse_scores(raw_score)
|
||||
|
||||
def calculate_scores(self, scores: list[Score]) -> float:
|
||||
if not scores:
|
||||
return 0.0
|
||||
total_credits = 0
|
||||
total_grades = 0
|
||||
for i in scores:
|
||||
if i.course_attribute != '必修' or i.course_name.startswith('体育'):
|
||||
continue
|
||||
total_credits += i.credits
|
||||
total_grades += i.final_grade * i.credits
|
||||
return total_grades / total_credits
|
137
index.html
Normal file
137
index.html
Normal file
@ -0,0 +1,137 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>成绩展示</title>
|
||||
<style>
|
||||
table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
th, td {
|
||||
border: 1px solid black;
|
||||
padding: 8px;
|
||||
text-align: left;
|
||||
}
|
||||
th {
|
||||
background-color: #f2f2f2;
|
||||
}
|
||||
.form-group {
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
label {
|
||||
display: block;
|
||||
margin-bottom: 5px;
|
||||
}
|
||||
input[type="text"], input[type="number"] {
|
||||
width: 100%;
|
||||
padding: 8px;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
button {
|
||||
padding: 10px 15px;
|
||||
background-color: #007BFF;
|
||||
color: white;
|
||||
border: none;
|
||||
cursor: pointer;
|
||||
}
|
||||
button:hover {
|
||||
background-color: #0056b3;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>成绩展示</h1>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="year">学年:</label>
|
||||
<input type="number" id="year" name="year" value="2023">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="term">学期:</label>
|
||||
<input type="text" id="term" name="term" value="SUMMER">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="username">用户名:</label>
|
||||
<input type="text" id="username" name="username">
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="password">密码:</label>
|
||||
<input type="password" id="password" name="password">
|
||||
</div>
|
||||
<!-- <button id="jmpLinkButton" style="display:none;" onclick="jmpLinkWithCookies()">教务管理系统</button> -->
|
||||
<button onclick="fetchScores()">获取成绩</button>
|
||||
|
||||
<h5>PGA: <span id="pga"></span></h5>
|
||||
<table id="scoresTable">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>课程名</th>
|
||||
<th>总评成绩</th>
|
||||
<th>绩点</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<input type="hidden" id="cookies" value="">
|
||||
|
||||
<script>
|
||||
async function jmpLinkWithCookies() {
|
||||
var url = "https://jw.glut.edu.cn/academic/index_new.jsp";
|
||||
var cookies = document.getElementById('cookies').value;
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: 'GET',
|
||||
credentials: 'include',
|
||||
headers: {
|
||||
'Cookie': cookies
|
||||
}
|
||||
});
|
||||
if (response.ok) {
|
||||
window.open(url);
|
||||
} else {
|
||||
alert('无法跳转到教务系统');
|
||||
}
|
||||
} catch (error) {
|
||||
alert('无法跳转到教务系统 error');
|
||||
}
|
||||
}
|
||||
|
||||
function fetchScores() {
|
||||
const year = document.getElementById('year').value;
|
||||
const term = document.getElementById('term').value;
|
||||
const username = document.getElementById('username').value;
|
||||
const password = document.getElementById('password').value;
|
||||
|
||||
const url = `/get_scores?year=${encodeURIComponent(year)}&term=${encodeURIComponent(term)}&username=${encodeURIComponent(username)}&password=${encodeURIComponent(password)}`;
|
||||
|
||||
fetch(url)
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
const scores = data.scores;
|
||||
const tableBody = document.querySelector('#scoresTable tbody');
|
||||
tableBody.innerHTML = '';
|
||||
data.pga;
|
||||
document.getElementById("pga").innerHTML = data.pga;
|
||||
|
||||
scores.forEach(score => {
|
||||
const row = document.createElement('tr');
|
||||
row.innerHTML = `
|
||||
<td>${score.course_name}</td>
|
||||
<td>${score.final_grade}</td>
|
||||
<td>${score.gpa}</td>
|
||||
`;
|
||||
tableBody.appendChild(row);
|
||||
});
|
||||
|
||||
// document.getElementById('jmpLinkButton').style.display = 'block';
|
||||
document.getElementById('cookies').value = data.cookies;
|
||||
})
|
||||
.catch(error => console.error('Error fetching scores:', error));
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
72
main.py
Normal file
72
main.py
Normal file
@ -0,0 +1,72 @@
|
||||
import json
|
||||
import argparse
|
||||
from os import getenv
|
||||
from dotenv import load_dotenv
|
||||
from glut import GLUTAcademic, Term
|
||||
from server import run as server_run
|
||||
|
||||
load_dotenv()
|
||||
|
||||
def fetch_scores(glut_academic: GLUTAcademic, terms: list[str]):
|
||||
all_scores:list[GLUTAcademic.Score] = []
|
||||
for term in terms:
|
||||
year, term_str = term.split('_')
|
||||
term_str = term_str.lower()
|
||||
|
||||
if term_str == 'sp':
|
||||
term = Term.SPRING
|
||||
elif term_str == 'au':
|
||||
term = Term.AUTUMN
|
||||
else:
|
||||
raise ValueError(f'Invalid term: {term}, must be one of 2023_sp, 2024_au')
|
||||
|
||||
print(f'Fetching scores for {year} {term_str}...')
|
||||
all_scores.extend(
|
||||
glut_academic.get_scores(int(year), term)
|
||||
)
|
||||
|
||||
return json.dumps([score.as_dict() for score in all_scores],
|
||||
ensure_ascii=False, indent=4), all_scores
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='GLUT Academic Score Calculator',
|
||||
)
|
||||
parser.add_argument('-u', '--username', type=str, default=getenv('USERNAME', default=None))
|
||||
parser.add_argument('-p', '--password', type=str, default=getenv('PASSWORD', default=None))
|
||||
parser.add_argument('-s', '--server', action='store_true',
|
||||
help='Run the server instead of fetching scores')
|
||||
parser.add_argument('-t', '--terms', type=str, nargs='+',
|
||||
help='Year and term combinations (e.g., 2023_sp 2024_au)')
|
||||
parser.add_argument('-o', '--output', type=str, default='scores.json',
|
||||
help='Output file name (default: scores.json)')
|
||||
parser.add_argument('-q', '--quiet', action='store_true', default=True,
|
||||
help='Suppress terminal output')
|
||||
parser.add_argument('--disable-file-output', action='store_true',
|
||||
help='Disable file output')
|
||||
parser.add_argument('--port', type=int, default=int(getenv('PORT', default='8000')),
|
||||
help='Port number for the server')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.username or not args.password:
|
||||
raise ValueError("USERNAME and PASSWORD must be set in the .env file")
|
||||
|
||||
if args.server:
|
||||
server_run(port=args.port)
|
||||
else:
|
||||
glut_acdemic = GLUTAcademic(int(args.username), args.password)
|
||||
glut_acdemic.login()
|
||||
|
||||
res, origin_li = fetch_scores(glut_acdemic, args.terms)
|
||||
|
||||
if not args.disable_file_output:
|
||||
with open('scores.json', 'w', encoding='utf-8') as f:
|
||||
f.write(res)
|
||||
|
||||
print(f"aggregate_score: {glut_acdemic.calculate_scores(origin_li)}")
|
||||
if not args.quiet:
|
||||
print(res)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
@ -0,0 +1,3 @@
|
||||
requests
|
||||
lxml
|
||||
python-dotenv
|
62
server.py
Normal file
62
server.py
Normal file
@ -0,0 +1,62 @@
|
||||
import json
|
||||
from http.server import SimpleHTTPRequestHandler, HTTPServer
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
from glut import GLUTAcademic, Term
|
||||
|
||||
class RequestHandler(SimpleHTTPRequestHandler):
|
||||
def do_GET(self):
|
||||
parsed_path = urlparse(self.path)
|
||||
query_params = parse_qs(parsed_path.query)
|
||||
|
||||
if parsed_path.path == '/get_scores':
|
||||
year = int(query_params.get('year', [2023])[0])
|
||||
term = query_params.get('term', ['SUMMER'])[0]
|
||||
username = query_params.get('username', [None])[0]
|
||||
password = query_params.get('password', [None])[0]
|
||||
|
||||
if not username or not password:
|
||||
self.send_response(400)
|
||||
self.end_headers()
|
||||
self.wfile.write(b'Username and password are required.')
|
||||
return
|
||||
|
||||
glut = GLUTAcademic(username, password)
|
||||
try:
|
||||
glut.login()
|
||||
except GLUTAcademic.LoginFailedError as e:
|
||||
self.send_response(401)
|
||||
self.end_headers()
|
||||
self.wfile.write(str(e).encode())
|
||||
return
|
||||
|
||||
res = glut.get_scores(year, getattr(Term, term))
|
||||
pga = glut.calculate_scores(res)
|
||||
|
||||
# 将成绩数据保存到scores.txt
|
||||
res = [i.as_dict() for i in res]
|
||||
res_json = json.dumps(res, ensure_ascii=False, indent=4)
|
||||
with open('scores.txt', 'w', encoding='utf-8') as f:
|
||||
f.write(res_json)
|
||||
|
||||
response_data = {
|
||||
'scores': res,
|
||||
'pga': pga,
|
||||
'cookies': glut.get_cookies(),
|
||||
}
|
||||
|
||||
self.send_response(200)
|
||||
self.send_header('Content-type', 'application/json')
|
||||
self.end_headers()
|
||||
self.wfile.write(json.dumps(response_data, ensure_ascii=False, indent=4)
|
||||
.encode('utf-8'))
|
||||
else:
|
||||
super().do_GET()
|
||||
|
||||
def run(server_class=HTTPServer, handler_class=RequestHandler, port=8000):
|
||||
server_address = ('', port)
|
||||
httpd = server_class(server_address, handler_class)
|
||||
print(f'Starting httpd server on http://{httpd.server_address[0]}:{httpd.server_address[1]} ...')
|
||||
httpd.serve_forever()
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
Loading…
x
Reference in New Issue
Block a user