Files
md2word/transit/renderer.py
zzy c29a3e6af0 feat(transit): 改进正文段落到Word文档的转换功能
支持自定义标题级别偏移量和正文样式,增强样式应用的灵活性。
- 新增 level_offset 参数用于调整标题级别
- 新增 body_style 参数用于设置正文段落样式
- 改进样式应用逻辑,支持多种样式的降级机制
- 更新配置文件以支持新的样式配置选项
- 修改解析器使致谢、参考文献和附录部分只提取正文内容
2026-05-08 21:44:09 +08:00

139 lines
3.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
论文生成编排器。
组装 配置 + 解析 + 模板渲染 + 正文注入 的完整流水线。
"""
from collections import defaultdict
from pathlib import Path
from docxtpl import DocxTemplate
from docx import Document
from .config import load_config, ThesisConfig
from .parser import parse_markdown
from .body import body_to_paragraphs, replace_placeholder
_TEXT_FIELDS = [
"title",
"abstact_cn_context",
"abstract_cn_keywords",
"abstract_en_context",
"abstract_en_keywords",
"acknowledgement",
"reference",
"appendix",
"student_name",
"student_id",
"college",
"major",
"class",
"advisor",
"advisor_title",
]
def generate_thesis(
template_path: str | Path,
data_path: str | Path,
config_path: str | Path | None = None,
output_path: str | Path = "output.docx",
) -> dict:
"""执行从数据到 Word 的完整论文生成流程。
Parameters
----------
template_path : str | Path
docxtpl 模板文件路径(.docx
data_path : str | Path
Markdown 论文正文文件路径(.md
config_path : str | Path | None
TOML 配置文件路径。为 ``None`` 时尝试自动查找。
output_path : str | Path
输出 Word 文件路径。
"""
data_path = Path(data_path)
# 1. 加载配置
if config_path is None:
candidates = [
Path("thesis_config.toml"),
data_path.with_suffix(".toml"),
]
config_path = next((p for p in candidates if p.exists()), None)
config: ThesisConfig | None = None
if config_path and Path(config_path).exists():
config = load_config(config_path)
print(f"[配置] 配置文件: {config_path}")
else:
config = ThesisConfig()
print("[配置] 未找到配置文件,使用默认值。")
# 2. 解析 Markdown
with open(data_path, "r", encoding="utf-8") as f:
md_text = f.read()
context = parse_markdown(
md_text,
body_start_kw=config.body_start_keywords,
body_end_kw=config.body_end_keywords,
)
# 3. 合并配置 → 上下文(配置优先)
for k, v in config.to_dict().items():
if k == "title" and config.title_from_md and context.get("title"):
continue # 以 markdown 标题为准
if v != "<None>":
context[k] = v
# 4. 用 defaultdict 兜底缺失键
ctx = defaultdict(lambda: "<None>", context)
# 5. 解析正文为段落列表
body_md = ctx.get("body_md", "")
body_paragraphs = (
body_to_paragraphs(body_md, level_offset=config.level_offset, body_style=config.body_style)
if body_md else []
)
# 6. 占位符
ctx["body_placeholder"] = "__CONTEXT_PLACEHOLDER__"
# 7. 渲染模板
doc = DocxTemplate(str(template_path))
doc.render(ctx)
# 8. 保存临时文件,再做后处理
temp_path = Path(output_path).with_suffix(".tmp")
doc.save(str(temp_path))
# 9. 正文注入
final_doc = Document(str(temp_path))
replace_placeholder(
final_doc, "__CONTEXT_PLACEHOLDER__", body_paragraphs,
default_body_style=config.body_style,
)
final_doc.save(str(output_path))
temp_path.unlink(missing_ok=True)
print(f"[完成] 论文生成完成: {output_path}")
# 10. 字段填充报告
print("\n--- 字段填充情况 ---")
for key in _TEXT_FIELDS:
val = ctx[key]
if val == "<None>":
print(f" [缺失] {key}")
else:
preview = str(val)[:60].replace("\n", " ")
print(f" [OK] {key}: {preview}...")
missing = [k for k in _TEXT_FIELDS if ctx[k] == "<None>"]
if missing:
print("\n[警告] 以下字段缺失,已填充 '<None>'")
for f in missing:
print(f" - {f}")
return dict(ctx)