diff --git a/transit/body.py b/transit/body.py index 8d7caeb..703ff70 100644 --- a/transit/body.py +++ b/transit/body.py @@ -11,12 +11,22 @@ from docx import Document _PAT_HEADING = re.compile(r"^(#{1,6})\s+(.+)$", re.MULTILINE) -def body_to_paragraphs(md_text: str) -> list[dict]: +def body_to_paragraphs( + md_text: str, + *, + level_offset: int = 0, + body_style: str = "Body Text Indent", +) -> list[dict]: """将 Markdown 正文按标题和段落拆分为结构化列表。 - 返回的每个元素:: - {"text": str, "level": int, "style": str} - 其中 ``style`` 为 ``Heading N`` 或 ``Normal``。 + Parameters + ---------- + md_text : str + 正文 Markdown。 + level_offset : int + 标题级别偏移量(正文从 ``##`` 开始时传 ``-1``,使其输出为 ``Heading 1``)。 + body_style : str + 正文段落的 Word 样式名。 """ paragraphs: list[dict] = [] last_end = 0 @@ -30,10 +40,10 @@ def body_to_paragraphs(md_text: str) -> list[dict]: block = block.strip() if block: paragraphs.append( - {"text": block, "level": 0, "style": "Normal"} + {"text": block, "level": 0, "style": body_style} ) - level = len(m.group(1)) + level = len(m.group(1)) + level_offset heading_text = m.group(2).strip() paragraphs.append( {"text": heading_text, "level": level, "style": f"Heading {level}"} @@ -47,21 +57,31 @@ def body_to_paragraphs(md_text: str) -> list[dict]: block = block.strip() if block: paragraphs.append( - {"text": block, "level": 0, "style": "Normal"} + {"text": block, "level": 0, "style": body_style} ) return paragraphs -def replace_placeholder(doc: Document, placeholder: str, paragraphs: list[dict]): +def replace_placeholder( + doc: Document, + placeholder: str, + paragraphs: list[dict], + *, + default_body_style: str | None = None, +): """在 *doc* 中找到包含 *placeholder* 的段落,替换为 *paragraphs* 列表。 - 每个段落的 ``style`` 字段会从文档样式中查找并应用。 + 正文段落的样式优先级: + 1. ``style`` 字段指定的样式名(来自 ``body_to_paragraphs`` 的 ``body_style``) + 2. 占位符段落自身的样式(模板中已设好的样式) + 3. ``Normal`` """ placeholder_found = False for para in doc.paragraphs: if placeholder in para.text: placeholder_found = True + placeholder_style = para.style.name if para.style else None parent = para._element.getparent() idx = list(parent).index(para._element) parent.remove(para._element) @@ -69,19 +89,36 @@ def replace_placeholder(doc: Document, placeholder: str, paragraphs: list[dict]) for pd_data in reversed(paragraphs): new_p = doc.add_paragraph(pd_data["text"]) style_name = pd_data["style"] - try: - new_p.style = doc.styles[style_name] - except KeyError: - matched = False - for s in doc.styles: - if s.name.lower() == style_name.lower(): - new_p.style = s - matched = True - break - if not matched: - new_p.style = doc.styles["Normal"] + + # 尝试应用样式,逐步降级 + applied = _apply_style(new_p, doc, style_name) + if not applied and style_name.startswith("Heading"): + # 标题样式找不到 + new_p.style = doc.styles["Normal"] + elif not applied: + # 正文样式找不到 → 尝试占位符自身的样式 + if placeholder_style: + _apply_style(new_p, doc, placeholder_style) + if new_p.style.name == "Normal" and placeholder_style: + new_p.style = doc.styles[placeholder_style] + parent.insert(idx, new_p._element) break if not placeholder_found: print(f"警告:未找到占位符 '{placeholder}',正文段落未注入。") + + +def _apply_style(paragraph, doc, style_name: str) -> bool: + """尝试给段落应用样式,成功返回 ``True``。""" + try: + paragraph.style = doc.styles[style_name] + return True + except KeyError: + pass + # 大小写不敏感匹配 + for s in doc.styles: + if s.name.lower() == style_name.lower(): + paragraph.style = s + return True + return False diff --git a/transit/config.py b/transit/config.py index aa04052..72bf921 100644 --- a/transit/config.py +++ b/transit/config.py @@ -22,6 +22,8 @@ class ThesisConfig: body_end_keywords: list[str] = field( default_factory=lambda: ["致谢", "参考文献", "附录"] ) + body_style: str = "Body Text Indent" + level_offset: int = -1 def to_dict(self) -> dict: """转成模板渲染用的扁平字典,排除 options 命名空间。""" @@ -60,4 +62,6 @@ def load_config(path: str | Path) -> ThesisConfig: body_end_keywords=opts.get( "body_end_keywords", ["致谢", "参考文献", "附录"] ), + body_style=opts.get("body_style", "Body Text Indent"), + level_offset=opts.get("level_offset", -1), ) diff --git a/transit/parser.py b/transit/parser.py index 94a9531..32fe236 100644 --- a/transit/parser.py +++ b/transit/parser.py @@ -113,19 +113,19 @@ def parse_markdown( else: data["body_md"] = "" - # ── 致谢 ── + # ── 致谢(仅正文,不含标题行) ── ack = _find_section(content, ["致谢"]) if ack: - data["acknowledgement"] = content[ack[0] : ack[1]].strip() + data["acknowledgement"] = _get_section_body(content, ack) - # ── 参考文献 ── + # ── 参考文献(仅正文,不含标题行) ── ref = _find_section(content, ["参考文献"]) if ref: - data["reference"] = content[ref[0] : ref[1]].strip() + data["reference"] = _get_section_body(content, ref) - # ── 附录 ── + # ── 附录(仅正文,不含标题行) ── app = _find_section(content, ["附录"]) if app: - data["appendix"] = content[app[0] : app[1]].strip() + data["appendix"] = _get_section_body(content, app) return data diff --git a/transit/renderer.py b/transit/renderer.py index 156a0c3..92da6dd 100644 --- a/transit/renderer.py +++ b/transit/renderer.py @@ -92,7 +92,10 @@ def generate_thesis( # 5. 解析正文为段落列表 body_md = ctx.get("body_md", "") - body_paragraphs = body_to_paragraphs(body_md) if body_md else [] + body_paragraphs = ( + body_to_paragraphs(body_md, level_offset=config.level_offset, body_style=config.body_style) + if body_md else [] + ) # 6. 占位符 ctx["body_placeholder"] = "__CONTEXT_PLACEHOLDER__" @@ -107,7 +110,10 @@ def generate_thesis( # 9. 正文注入 final_doc = Document(str(temp_path)) - replace_placeholder(final_doc, "__CONTEXT_PLACEHOLDER__", body_paragraphs) + replace_placeholder( + final_doc, "__CONTEXT_PLACEHOLDER__", body_paragraphs, + default_body_style=config.body_style, + ) final_doc.save(str(output_path)) temp_path.unlink(missing_ok=True)