refactor: 重构项目结构并更新依赖配置

- 移除原有的 docx_thesis 模块及其相关文件 (cli.py, config.py, converter.py)
- 新增 .claudeignore 文件以忽略 Python 生成文件和缓存
- 更新 .gitignore 文件添加更多忽略规则包括 .mypy_cache/, .ruff_cache/,
  .claude/, *.md 等
- 添加 README.md 使用说明文档
- 修改 pyproject.toml 依赖配置,新增 docxtpl、pyyaml,
  移除原 thesis 命令入口点并更新为 transit.__main__
- 新增 transit 模块及相应初始化文件
- 重命名 main.py 为快速入口脚本
This commit is contained in:
zzy
2026-05-08 21:06:01 +08:00
parent 5cbc1d9b76
commit ae70d05672
16 changed files with 697 additions and 941 deletions

132
transit/renderer.py Normal file
View File

@@ -0,0 +1,132 @@
"""
论文生成编排器。
组装 配置 + 解析 + 模板渲染 + 正文注入 的完整流水线。
"""
from collections import defaultdict
from pathlib import Path
from docxtpl import DocxTemplate
from docx import Document
from .config import load_config, ThesisConfig
from .parser import parse_markdown
from .body import body_to_paragraphs, replace_placeholder
_TEXT_FIELDS = [
"title",
"abstact_cn_context",
"abstract_cn_keywords",
"abstract_en_context",
"abstract_en_keywords",
"acknowledgement",
"reference",
"appendix",
"student_name",
"student_id",
"college",
"major",
"class",
"advisor",
"advisor_title",
]
def generate_thesis(
template_path: str | Path,
data_path: str | Path,
config_path: str | Path | None = None,
output_path: str | Path = "output.docx",
) -> dict:
"""执行从数据到 Word 的完整论文生成流程。
Parameters
----------
template_path : str | Path
docxtpl 模板文件路径(.docx
data_path : str | Path
Markdown 论文正文文件路径(.md
config_path : str | Path | None
TOML 配置文件路径。为 ``None`` 时尝试自动查找。
output_path : str | Path
输出 Word 文件路径。
"""
data_path = Path(data_path)
# 1. 加载配置
if config_path is None:
candidates = [
Path("thesis_config.toml"),
data_path.with_suffix(".toml"),
]
config_path = next((p for p in candidates if p.exists()), None)
config: ThesisConfig | None = None
if config_path and Path(config_path).exists():
config = load_config(config_path)
print(f"[配置] 配置文件: {config_path}")
else:
config = ThesisConfig()
print("[配置] 未找到配置文件,使用默认值。")
# 2. 解析 Markdown
with open(data_path, "r", encoding="utf-8") as f:
md_text = f.read()
context = parse_markdown(
md_text,
body_start_kw=config.body_start_keywords,
body_end_kw=config.body_end_keywords,
)
# 3. 合并配置 → 上下文(配置优先)
for k, v in config.to_dict().items():
if k == "title" and config.title_from_md and context.get("title"):
continue # 以 markdown 标题为准
if v != "<None>":
context[k] = v
# 4. 用 defaultdict 兜底缺失键
ctx = defaultdict(lambda: "<None>", context)
# 5. 解析正文为段落列表
body_md = ctx.get("body_md", "")
body_paragraphs = body_to_paragraphs(body_md) if body_md else []
# 6. 占位符
ctx["body_placeholder"] = "__CONTEXT_PLACEHOLDER__"
# 7. 渲染模板
doc = DocxTemplate(str(template_path))
doc.render(ctx)
# 8. 保存临时文件,再做后处理
temp_path = Path(output_path).with_suffix(".tmp")
doc.save(str(temp_path))
# 9. 正文注入
final_doc = Document(str(temp_path))
replace_placeholder(final_doc, "__CONTEXT_PLACEHOLDER__", body_paragraphs)
final_doc.save(str(output_path))
temp_path.unlink(missing_ok=True)
print(f"[完成] 论文生成完成: {output_path}")
# 10. 字段填充报告
print("\n--- 字段填充情况 ---")
for key in _TEXT_FIELDS:
val = ctx[key]
if val == "<None>":
print(f" [缺失] {key}")
else:
preview = str(val)[:60].replace("\n", " ")
print(f" [OK] {key}: {preview}...")
missing = [k for k in _TEXT_FIELDS if ctx[k] == "<None>"]
if missing:
print("\n[警告] 以下字段缺失,已填充 '<None>'")
for f in missing:
print(f" - {f}")
return dict(ctx)