配置类 ThesisConfig 现在使用 metadata 字典直接透传 TOML 配置, 无需为每个变量单独声明字段。新增模板变量只需修改 TOML 文件, 无需修改 Python 代码。 BREAKING CHANGE: 配置文件结构发生改变,从单独字段改为统一的 metadata 节点。
144 lines
4.4 KiB
Python
144 lines
4.4 KiB
Python
"""
|
||
论文生成编排器。
|
||
|
||
组装 配置 + 解析 + 模板渲染 + 正文注入 的完整流水线。
|
||
"""
|
||
|
||
from collections import defaultdict
|
||
from pathlib import Path
|
||
from docxtpl import DocxTemplate
|
||
from docx import Document
|
||
|
||
from .config import load_config, ThesisConfig
|
||
from .parser import parse_markdown
|
||
from .body import body_to_paragraphs, replace_placeholder
|
||
from .references import references_to_paragraphs
|
||
|
||
|
||
# 解析器可能产生的字段(用于填充报告)
|
||
_PARSER_FIELDS = [
|
||
"title",
|
||
"abstact_cn_context",
|
||
"abstract_cn_keywords",
|
||
"abstract_en_context",
|
||
"abstract_en_keywords",
|
||
"acknowledgement",
|
||
"reference",
|
||
"appendix",
|
||
"body_md",
|
||
]
|
||
|
||
|
||
def generate_thesis(
|
||
template_path: str | Path,
|
||
data_path: str | Path,
|
||
config_path: str | Path | None = None,
|
||
output_path: str | Path = "output.docx",
|
||
) -> dict:
|
||
"""执行从数据到 Word 的完整论文生成流程。
|
||
|
||
Parameters
|
||
----------
|
||
template_path : str | Path
|
||
docxtpl 模板文件路径(.docx)。
|
||
data_path : str | Path
|
||
Markdown 论文正文文件路径(.md)。
|
||
config_path : str | Path | None
|
||
TOML 配置文件路径。为 ``None`` 时尝试自动查找。
|
||
output_path : str | Path
|
||
输出 Word 文件路径。
|
||
"""
|
||
data_path = Path(data_path)
|
||
|
||
# 1. 加载配置
|
||
if config_path is None:
|
||
candidates = [
|
||
Path("thesis_config.toml"),
|
||
data_path.with_suffix(".toml"),
|
||
]
|
||
config_path = next((p for p in candidates if p.exists()), None)
|
||
|
||
config: ThesisConfig | None = None
|
||
if config_path and Path(config_path).exists():
|
||
config = load_config(config_path)
|
||
print(f"[配置] 配置文件: {config_path}")
|
||
else:
|
||
config = ThesisConfig()
|
||
print("[配置] 未找到配置文件,使用默认值。")
|
||
|
||
# 2. 解析 Markdown
|
||
with open(data_path, "r", encoding="utf-8") as f:
|
||
md_text = f.read()
|
||
|
||
context = parse_markdown(
|
||
md_text,
|
||
body_start_kw=config.body_start_keywords,
|
||
body_end_kw=config.body_end_keywords,
|
||
)
|
||
|
||
# 3. 合并配置 → 上下文(配置填充解析器未产生的空白)
|
||
for k, v in config.to_dict().items():
|
||
if k == "title" and config.title_from_md and context.get("title"):
|
||
continue # 以 markdown 标题为准
|
||
context.setdefault(k, v)
|
||
|
||
# 4. 用 defaultdict 兜底缺失键
|
||
ctx = defaultdict(lambda: "<None>", context)
|
||
|
||
# 5. 解析正文为段落列表
|
||
body_md = ctx.get("body_md", "")
|
||
body_paragraphs = (
|
||
body_to_paragraphs(body_md, level_offset=config.level_offset, body_style=config.body_style)
|
||
if body_md else []
|
||
)
|
||
|
||
# 6. 解析参考文献为段落列表
|
||
ref_text = ctx.get("reference", "")
|
||
ref_paragraphs = references_to_paragraphs(ref_text, ref_style=config.reference_style)
|
||
|
||
# 7. 占位符(替代模板变量,后处理时替换)
|
||
ctx["body_placeholder"] = "__CONTEXT_PLACEHOLDER__"
|
||
ctx["reference"] = "__REFERENCE_PLACEHOLDER__"
|
||
|
||
# 7. 渲染模板
|
||
doc = DocxTemplate(str(template_path))
|
||
doc.render(ctx)
|
||
|
||
# 8. 保存临时文件,再做后处理
|
||
temp_path = Path(output_path).with_suffix(".tmp")
|
||
doc.save(str(temp_path))
|
||
|
||
# 9. 正文注入+参考文献注入
|
||
final_doc = Document(str(temp_path))
|
||
replace_placeholder(
|
||
final_doc, "__CONTEXT_PLACEHOLDER__", body_paragraphs,
|
||
default_body_style=config.body_style,
|
||
)
|
||
replace_placeholder(
|
||
final_doc, "__REFERENCE_PLACEHOLDER__", ref_paragraphs,
|
||
default_body_style=config.reference_style,
|
||
)
|
||
final_doc.save(str(output_path))
|
||
temp_path.unlink(missing_ok=True)
|
||
|
||
print(f"[完成] 论文生成完成: {output_path}")
|
||
|
||
# 10. 字段填充报告(动态收集所有模板与解析字段)
|
||
report_fields = list(dict.fromkeys([*config.metadata.keys(), *_PARSER_FIELDS]))
|
||
print("\n--- 字段填充情况 ---")
|
||
for key in report_fields:
|
||
val = ctx.get(key, "<None>")
|
||
if val == "<None>":
|
||
print(f" [缺失] {key}")
|
||
else:
|
||
preview = str(val)[:60].replace("\n", " ")
|
||
print(f" [OK] {key}: {preview}...")
|
||
|
||
missing = [k for k in report_fields if ctx.get(k, "<None>") == "<None>"]
|
||
if missing:
|
||
print("\n[警告] 以下字段缺失,已填充 '<None>':")
|
||
for f in missing:
|
||
print(f" - {f}")
|
||
|
||
return dict(ctx)
|