Files
md2word/transit/renderer.py
zzy 74d28ea2d8 feat(transit): 添加参考文献解析功能并修复段落编号继承问题
新增了参考文献处理模块,支持按照 GB 7714 《文后参考文献著录规则》顺序编码制
解析和格式化参考文献。同时修复了段落替换过程中自动编号丢失的问题。

- 新增 transit/references.py 模块,提供参考文献解析和格式化功能
- 在 body.py 的 replace_placeholder 函数中实现段落编号属性的正确继承
- 修改 transit/__init__.py 导入新的参考文献处理函数
- 更新 transit/config.py 添加参考文献样式配置项
- 修改 transit/renderer.py 集成参考文献处理流程
2026-05-08 22:14:51 +08:00

149 lines
4.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
论文生成编排器。
组装 配置 + 解析 + 模板渲染 + 正文注入 的完整流水线。
"""
from collections import defaultdict
from pathlib import Path
from docxtpl import DocxTemplate
from docx import Document
from .config import load_config, ThesisConfig
from .parser import parse_markdown
from .body import body_to_paragraphs, replace_placeholder
from .references import references_to_paragraphs
_TEXT_FIELDS = [
"title",
"abstact_cn_context",
"abstract_cn_keywords",
"abstract_en_context",
"abstract_en_keywords",
"acknowledgement",
"reference",
"appendix",
"student_name",
"student_id",
"college",
"major",
"class",
"advisor",
"advisor_title",
]
def generate_thesis(
template_path: str | Path,
data_path: str | Path,
config_path: str | Path | None = None,
output_path: str | Path = "output.docx",
) -> dict:
"""执行从数据到 Word 的完整论文生成流程。
Parameters
----------
template_path : str | Path
docxtpl 模板文件路径(.docx
data_path : str | Path
Markdown 论文正文文件路径(.md
config_path : str | Path | None
TOML 配置文件路径。为 ``None`` 时尝试自动查找。
output_path : str | Path
输出 Word 文件路径。
"""
data_path = Path(data_path)
# 1. 加载配置
if config_path is None:
candidates = [
Path("thesis_config.toml"),
data_path.with_suffix(".toml"),
]
config_path = next((p for p in candidates if p.exists()), None)
config: ThesisConfig | None = None
if config_path and Path(config_path).exists():
config = load_config(config_path)
print(f"[配置] 配置文件: {config_path}")
else:
config = ThesisConfig()
print("[配置] 未找到配置文件,使用默认值。")
# 2. 解析 Markdown
with open(data_path, "r", encoding="utf-8") as f:
md_text = f.read()
context = parse_markdown(
md_text,
body_start_kw=config.body_start_keywords,
body_end_kw=config.body_end_keywords,
)
# 3. 合并配置 → 上下文(配置优先)
for k, v in config.to_dict().items():
if k == "title" and config.title_from_md and context.get("title"):
continue # 以 markdown 标题为准
if v != "<None>":
context[k] = v
# 4. 用 defaultdict 兜底缺失键
ctx = defaultdict(lambda: "<None>", context)
# 5. 解析正文为段落列表
body_md = ctx.get("body_md", "")
body_paragraphs = (
body_to_paragraphs(body_md, level_offset=config.level_offset, body_style=config.body_style)
if body_md else []
)
# 6. 解析参考文献为段落列表
ref_text = ctx.get("reference", "")
ref_paragraphs = references_to_paragraphs(ref_text, ref_style=config.reference_style)
# 7. 占位符(替代模板变量,后处理时替换)
ctx["body_placeholder"] = "__CONTEXT_PLACEHOLDER__"
ctx["reference"] = "__REFERENCE_PLACEHOLDER__"
# 7. 渲染模板
doc = DocxTemplate(str(template_path))
doc.render(ctx)
# 8. 保存临时文件,再做后处理
temp_path = Path(output_path).with_suffix(".tmp")
doc.save(str(temp_path))
# 9. 正文注入+参考文献注入
final_doc = Document(str(temp_path))
replace_placeholder(
final_doc, "__CONTEXT_PLACEHOLDER__", body_paragraphs,
default_body_style=config.body_style,
)
replace_placeholder(
final_doc, "__REFERENCE_PLACEHOLDER__", ref_paragraphs,
default_body_style=config.reference_style,
)
final_doc.save(str(output_path))
temp_path.unlink(missing_ok=True)
print(f"[完成] 论文生成完成: {output_path}")
# 10. 字段填充报告
print("\n--- 字段填充情况 ---")
for key in _TEXT_FIELDS:
val = ctx[key]
if val == "<None>":
print(f" [缺失] {key}")
else:
preview = str(val)[:60].replace("\n", " ")
print(f" [OK] {key}: {preview}...")
missing = [k for k in _TEXT_FIELDS if ctx[k] == "<None>"]
if missing:
print("\n[警告] 以下字段缺失,已填充 '<None>'")
for f in missing:
print(f" - {f}")
return dict(ctx)