refactor: 重构项目结构并更新依赖配置

- 移除原有的 docx_thesis 模块及其相关文件 (cli.py, config.py, converter.py) - 新增 .claudeignore 文件以忽略 Python 生成文件和缓存 - 更新 .gitignore 文件添加更多忽略规则包括 .mypy_cache/, .ruff_cache/, .claude/, *.md 等 - 添加 README.md 使用说明文档 - 修改 pyproject.toml 依赖配置，新增 docxtpl、pyyaml，移除原 thesis 命令入口点并更新为 transit.__main__ - 新增 transit 模块及相应初始化文件 - 重命名 main.py 为快速入口脚本
2026-05-08 21:06:01 +08:00
parent 5cbc1d9b76
commit ae70d05672
16 changed files with 697 additions and 941 deletions
--- a/transit/renderer.py
+++ b/transit/renderer.py
@@ -0,0 +1,132 @@
+"""
+论文生成编排器。
+
+组装 配置 + 解析 + 模板渲染 + 正文注入 的完整流水线。
+"""
+
+from collections import defaultdict
+from pathlib import Path
+from docxtpl import DocxTemplate
+from docx import Document
+
+from .config import load_config, ThesisConfig
+from .parser import parse_markdown
+from .body import body_to_paragraphs, replace_placeholder
+
+
+_TEXT_FIELDS = [
+    "title",
+    "abstact_cn_context",
+    "abstract_cn_keywords",
+    "abstract_en_context",
+    "abstract_en_keywords",
+    "acknowledgement",
+    "reference",
+    "appendix",
+    "student_name",
+    "student_id",
+    "college",
+    "major",
+    "class",
+    "advisor",
+    "advisor_title",
+]
+
+
+def generate_thesis(
+    template_path: str | Path,
+    data_path: str | Path,
+    config_path: str | Path | None = None,
+    output_path: str | Path = "output.docx",
+) -> dict:
+    """执行从数据到 Word 的完整论文生成流程。
+
+    Parameters
+    ----------
+    template_path : str | Path
+        docxtpl 模板文件路径（.docx）。
+    data_path : str | Path
+        Markdown 论文正文文件路径（.md）。
+    config_path : str | Path | None
+        TOML 配置文件路径。为 ``None`` 时尝试自动查找。
+    output_path : str | Path
+        输出 Word 文件路径。
+    """
+    data_path = Path(data_path)
+
+    # 1. 加载配置
+    if config_path is None:
+        candidates = [
+            Path("thesis_config.toml"),
+            data_path.with_suffix(".toml"),
+        ]
+        config_path = next((p for p in candidates if p.exists()), None)
+
+    config: ThesisConfig | None = None
+    if config_path and Path(config_path).exists():
+        config = load_config(config_path)
+        print(f"[配置] 配置文件: {config_path}")
+    else:
+        config = ThesisConfig()
+        print("[配置] 未找到配置文件，使用默认值。")
+
+    # 2. 解析 Markdown
+    with open(data_path, "r", encoding="utf-8") as f:
+        md_text = f.read()
+
+    context = parse_markdown(
+        md_text,
+        body_start_kw=config.body_start_keywords,
+        body_end_kw=config.body_end_keywords,
+    )
+
+    # 3. 合并配置 → 上下文（配置优先）
+    for k, v in config.to_dict().items():
+        if k == "title" and config.title_from_md and context.get("title"):
+            continue  # 以 markdown 标题为准
+        if v != "<None>":
+            context[k] = v
+
+    # 4. 用 defaultdict 兜底缺失键
+    ctx = defaultdict(lambda: "<None>", context)
+
+    # 5. 解析正文为段落列表
+    body_md = ctx.get("body_md", "")
+    body_paragraphs = body_to_paragraphs(body_md) if body_md else []
+
+    # 6. 占位符
+    ctx["body_placeholder"] = "__CONTEXT_PLACEHOLDER__"
+
+    # 7. 渲染模板
+    doc = DocxTemplate(str(template_path))
+    doc.render(ctx)
+
+    # 8. 保存临时文件，再做后处理
+    temp_path = Path(output_path).with_suffix(".tmp")
+    doc.save(str(temp_path))
+
+    # 9. 正文注入
+    final_doc = Document(str(temp_path))
+    replace_placeholder(final_doc, "__CONTEXT_PLACEHOLDER__", body_paragraphs)
+    final_doc.save(str(output_path))
+    temp_path.unlink(missing_ok=True)
+
+    print(f"[完成] 论文生成完成: {output_path}")
+
+    # 10. 字段填充报告
+    print("\n--- 字段填充情况 ---")
+    for key in _TEXT_FIELDS:
+        val = ctx[key]
+        if val == "<None>":
+            print(f"  [缺失] {key}")
+        else:
+            preview = str(val)[:60].replace("\n", " ")
+            print(f"  [OK] {key}: {preview}...")
+
+    missing = [k for k in _TEXT_FIELDS if ctx[k] == "<None>"]
+    if missing:
+        print("\n[警告] 以下字段缺失，已填充 '<None>'：")
+        for f in missing:
+            print(f"  - {f}")
+
+    return dict(ctx)