init

2026-05-07 21:37:06 +08:00
commit 5cbc1d9b76
10 changed files with 1226 additions and 0 deletions
--- a/docx_thesis/converter.py
+++ b/docx_thesis/converter.py
@@ -0,0 +1,795 @@
+"""Convert Markdown graduation thesis → formatted Word .docx.
+
+Parses markdown line-by-line and writes a python-docx document that
+complies with 桂林理工大学 理工类毕业设计（论文）格式要求.
+"""
+
+from __future__ import annotations
+
+import re
+from pathlib import Path
+
+from docx import Document
+from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_LINE_SPACING
+from docx.oxml import parse_xml
+from docx.oxml.ns import nsdecls, qn
+from docx.shared import Cm, Pt, RGBColor
+from docx.text.paragraph import Paragraph
+from docx.text.run import Run
+
+from .config import ThesisFormat
+
+
+# ── font helpers ─────────────────────────────────────────────────────────
+
+
+def _set_font(
+    run: Run,
+    cn_font: str,
+    en_font: str | None = None,
+    size: float | None = None,
+    bold: bool | None = None,
+    italic: bool | None = None,
+):
+    if en_font:
+        run.font.name = en_font
+    if cn_font:
+        rpr = run._element.get_or_add_rPr()
+        rfonts = rpr.find(qn("w:rFonts"))
+        if rfonts is None:
+            rfonts = parse_xml(f'<w:rFonts {nsdecls("w")}/>')
+            rpr.insert(0, rfonts)
+        rfonts.set(qn("w:eastAsia"), cn_font)
+    if size is not None:
+        run.font.size = Pt(size)
+    if bold is not None:
+        run.font.bold = bold
+    if italic is not None:
+        run.font.italic = italic
+
+
+def _set_spacing(p: Paragraph, before: int = 0, after: int = 0,
+                 line_spacing: float = 1.0):
+    pf = p.paragraph_format
+    pf.space_before = Pt(before)
+    pf.space_after = Pt(after)
+    pf.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
+    pf.line_spacing = line_spacing
+
+
+def _set_indent(p: Paragraph, chars: int = 2):
+    if chars > 0:
+        p.paragraph_format.first_line_indent = Cm(chars * 0.37)
+
+
+def _set_page_number_fmt(section, fmt: str):
+    sect_pr = section._sectPr
+    el = sect_pr.find(qn("w:pgNumType"))
+    if el is None:
+        el = parse_xml(f'<w:pgNumType {nsdecls("w")}/>')
+        sect_pr.append(el)
+    el.set(qn("w:fmt"), fmt)
+
+
+def _setup_footer(section, roman: bool):
+    footer = section.footer
+    footer.is_linked_to_previous = False
+
+    # clear default empty paragraph runs to avoid extra blank line
+    for p in footer.paragraphs:
+        for r in p.runs:
+            r.text = ""
+
+    p = footer.paragraphs[0]
+    p.alignment = WD_ALIGN_PARAGRAPH.CENTER
+    p.paragraph_format.space_before = Pt(0)
+    p.paragraph_format.space_after = Pt(0)
+
+    r = p.add_run()
+    _set_font(r, "宋体", "Times New Roman", size=9)
+    r._element.append(parse_xml(
+        f'<w:fldChar {nsdecls("w")} w:fldCharType="begin"/>'))
+    r2 = p.add_run()
+    r2._element.append(parse_xml(
+        f'<w:instrText {nsdecls("w")} xml:space="preserve"> PAGE </w:instrText>'))
+    r3 = p.add_run()
+    r3._element.append(parse_xml(
+        f'<w:fldChar {nsdecls("w")} w:fldCharType="end"/>'))
+
+    _set_page_number_fmt(section, "lowerRoman" if roman else "decimal")
+
+
+# ── inline markdown parser ───────────────────────────────────────────────
+
+
+def _parse_inline(text: str):
+    """Tokenise line → list of (text, attrs) tuples."""
+    tokens: list[tuple[str, dict]] = []
+    buf = ""
+    i = 0
+    n = len(text)
+
+    def flush():
+        nonlocal buf
+        if buf:
+            tokens.append((buf, {}))
+            buf = ""
+
+    while i < n:
+        ch = text[i]
+        # `code`
+        if ch == "`":
+            flush()
+            j = text.find("`", i + 1)
+            if j == -1:
+                buf += ch
+                i += 1
+                continue
+            tokens.append((text[i + 1:j], {"code": True}))
+            i = j + 1
+            continue
+        # **bold**
+        if text[i:i + 2] == "**":
+            flush()
+            j = text.find("**", i + 2)
+            if j == -1:
+                buf += ch
+                i += 1
+                continue
+            inner = text[i + 2:j]
+            sub = _parse_inline(inner)
+            for t, a in sub:
+                a["bold"] = True
+                tokens.append((t, a))
+            i = j + 2
+            continue
+        # *italic*   (single star, not **)
+        if ch == "*" and i + 1 < n and text[i + 1] != "*":
+            flush()
+            j = text.find("*", i + 1)
+            if j == -1:
+                buf += ch
+                i += 1
+                continue
+            tokens.append((text[i + 1:j], {"italic": True}))
+            i = j + 1
+            continue
+        buf += ch
+        i += 1
+    flush()
+    return tokens
+
+
+def _add_inline(p: Paragraph, tokens: list, cfg: ThesisFormat,
+                size: float | None = None, bold: bool = False):
+    for text, attrs in tokens:
+        run = p.add_run(text)
+        b = bold or attrs.get("bold", False)
+        it = attrs.get("italic", False)
+        code = attrs.get("code", False)
+        cn = cfg.font_code if code else cfg.font_cn
+        en = cfg.font_code if code else cfg.font_en
+        _set_font(run, cn, en, size=size or cfg.size_body,
+                  bold=b, italic=it if not b else None)
+
+
+# ── block-level parser ───────────────────────────────────────────────────
+
+
+def _parse_blocks(text: str):
+    lines = text.split("\n")
+    blocks: list[dict] = []
+    i, n = 0, len(lines)
+
+    while i < n:
+        line = lines[i]
+
+        # thematic break
+        if line.strip() == "---":
+            blocks.append({"type": "thematic_break"})
+            i += 1
+            continue
+
+        # fenced code block
+        if line.strip().startswith("```") or line.strip().startswith("~~~"):
+            fence = line.strip()[:3]
+            info = line.strip()[3:].strip()
+            code_lines: list[str] = []
+            i += 1
+            while i < n and not lines[i].strip().startswith(fence):
+                code_lines.append(lines[i])
+                i += 1
+            i += 1
+            blocks.append({"type": "block_code", "info": info,
+                           "raw": "\n".join(code_lines)})
+            continue
+
+        # heading
+        m = re.match(r"^(#{1,6})\s+(.+)$", line)
+        if m:
+            blocks.append({"type": "heading",
+                           "level": len(m.group(1)),
+                           "text": m.group(2).strip()})
+            i += 1
+            continue
+
+        # blockquote
+        if line.strip().startswith(">"):
+            ql: list[str] = []
+            while i < n and (lines[i].strip().startswith(">")
+                             or lines[i].strip() == ""):
+                ql.append(re.sub(r"^>\s?", "", lines[i]))
+                i += 1
+            blocks.append({"type": "block_quote",
+                           "text": "\n".join(ql).strip()})
+            continue
+
+        # list
+        if re.match(r"^(\s*)([-*+]\s|\d+\.\s)", line):
+            items: list[str] = []
+            while i < n:
+                if re.match(r"^(\s*)([-*+]\s|\d+\.\s)", lines[i]):
+                    t = re.sub(r"^(\s*)[-*+]\s|\d+\.\s", "", lines[i], 1)
+                    items.append(t)
+                    i += 1
+                    while i < n and lines[i].strip() \
+                            and not re.match(r"^(\s*)([-*+]\s|\d+\.\s)",
+                                             lines[i]):
+                        if lines[i][0] in " \t":
+                            items[-1] += " " + lines[i].strip()
+                            i += 1
+                        else:
+                            break
+                elif lines[i].strip() == "":
+                    i += 1
+                else:
+                    break
+            blocks.append({"type": "list", "items": items})
+            continue
+
+        # blank
+        if line.strip() == "":
+            i += 1
+            continue
+
+        # paragraph (accumulate)
+        para: list[str] = []
+        while i < n and lines[i].strip():
+            para.append(lines[i])
+            i += 1
+        t = "\n".join(para).strip()
+        if t:
+            blocks.append({"type": "paragraph", "text": t})
+
+    return blocks
+
+
+# ── converter ────────────────────────────────────────────────────────────
+
+
+class ThesisConverter:
+    """Markdown → 理工类毕业论文 Word 文档。
+
+    处理流程:
+      1. 解析 MD → blocks
+      2. 扫描 blocks 提取论文题目（H1）
+      3. 按章节类别写入带正确格式的 Word
+      4. 每章自动分页、页面网格、字体字号严格按学校要求
+    """
+
+    def __init__(self, config: ThesisFormat | None = None):
+        self.config = config or ThesisFormat()
+        self.doc = Document()
+        self._thesis_title: str = ""       # 论文题目（来自 H1）
+        self._has_title = False            # 是否已保存论文题目
+        self._section_break_added = False  # 是否插入过正文分节符
+
+    # ── public API ──────────────────────────────────────────────────
+
+    def convert(self, md_path: str | Path, docx_path: str | Path):
+        text = Path(md_path).read_text(encoding="utf-8")
+        text = self._strip_manual_toc(text)
+        blocks = _parse_blocks(text)
+
+        # extract H1 thesis title
+        for blk in blocks:
+            if blk["type"] == "heading" and blk["level"] == 1:
+                self._thesis_title = blk["text"]
+                break
+
+        self._setup_document()
+        self._process_blocks(blocks)
+        self.doc.save(str(docx_path))
+
+    # ── strip manual TOC ────────────────────────────────────────────
+
+    @staticmethod
+    def _strip_manual_toc(text: str) -> str:
+        lines = text.split("\n")
+        toc_start = -1
+        sep_end = -1
+        for i, line in enumerate(lines):
+            if re.search(r"[目目]\s*[次次]", line) and line.startswith("#"):
+                toc_start = i
+            if toc_start >= 0 and line.strip() == "---" and i > toc_start:
+                sep_end = i
+                break
+        if toc_start >= 0 and sep_end > toc_start:
+            kept = lines[:toc_start + 1]
+            kept.append("")
+            kept.extend(lines[sep_end:])
+            return "\n".join(kept)
+        return text
+
+    # ── page setup ──────────────────────────────────────────────────
+
+    def _setup_document(self):
+        cfg = self.config
+        sec = self.doc.sections[0]
+        self._apply_page_setup(sec, roman=True)
+
+        # default font
+        styles = self.doc.styles
+        normal = styles["Normal"]
+        rpr = normal.element.get_or_add_rPr()
+        rfonts = rpr.find(qn("w:rFonts"))
+        if rfonts is None:
+            rfonts = parse_xml(f'<w:rFonts {nsdecls("w")}/>')
+            rpr.insert(0, rfonts)
+        rfonts.set(qn("w:ascii"), cfg.font_en)
+        rfonts.set(qn("w:hAnsi"), cfg.font_en)
+        rfonts.set(qn("w:eastAsia"), cfg.font_cn)
+        rfonts.set(qn("w:cs"), cfg.font_en)
+
+        sz = rpr.find(qn("w:sz"))
+        if sz is None:
+            sz = parse_xml(
+                f'<w:sz {nsdecls("w")} w:val="{int(cfg.size_body * 2)}"/>')
+            rpr.append(sz)
+        pf = normal.paragraph_format
+        pf.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
+        pf.line_spacing = cfg.line_spacing_body
+
+        self._config_heading_styles()
+
+        _setup_footer(sec, roman=True)
+
+    def _config_heading_styles(self):
+        """Configure Heading 1/2/3 built-in styles to match thesis formatting.
+
+        This ensures Word's TOC field can detect headings and auto-generate
+        the table of contents correctly.
+        """
+        cfg = self.config
+        styles = self.doc.styles
+
+        # ── Heading 1 = 章 (三号宋体加粗左) ──────────────────────────
+        h1 = styles["Heading 1"]
+        h1.font.name = cfg.font_heading_en
+        rpr = h1.element.get_or_add_rPr()
+        rfonts = rpr.find(qn("w:rFonts"))
+        if rfonts is None:
+            rfonts = parse_xml(f'<w:rFonts {nsdecls("w")}/>')
+            rpr.insert(0, rfonts)
+        rfonts.set(qn("w:eastAsia"), cfg.font_cn_heading)
+        h1.font.size = Pt(cfg.size_chapter)
+        h1.font.bold = True
+        h1.font.color.rgb = RGBColor(0, 0, 0)
+        h1.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT
+        h1.paragraph_format.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
+        h1.paragraph_format.line_spacing = cfg.line_spacing_heading
+        h1.paragraph_format.space_before = Pt(0)
+        h1.paragraph_format.space_after = Pt(0)
+        # Keep with next + page break before
+        pPr = h1.element.get_or_add_pPr()
+        keep_next = parse_xml(f'<w:keepNext {nsdecls("w")}/>')
+        pPr.append(keep_next)
+
+        # ── Heading 2 = 节 (小三号宋体加粗左) ────────────────────────
+        h2 = styles["Heading 2"]
+        h2.font.name = cfg.font_heading_en
+        rpr2 = h2.element.get_or_add_rPr()
+        rfonts2 = rpr2.find(qn("w:rFonts"))
+        if rfonts2 is None:
+            rfonts2 = parse_xml(f'<w:rFonts {nsdecls("w")}/>')
+            rpr2.insert(0, rfonts2)
+        rfonts2.set(qn("w:eastAsia"), cfg.font_cn_heading)
+        h2.font.size = Pt(cfg.size_section)
+        h2.font.bold = True
+        h2.font.color.rgb = RGBColor(0, 0, 0)
+        h2.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT
+        h2.paragraph_format.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
+        h2.paragraph_format.line_spacing = cfg.line_spacing_heading
+        h2.paragraph_format.space_before = Pt(0)
+        h2.paragraph_format.space_after = Pt(0)
+
+        # ── Heading 3 = 条 (四号宋体加粗左) ──────────────────────────
+        h3 = styles["Heading 3"]
+        h3.font.name = cfg.font_heading_en
+        rpr3 = h3.element.get_or_add_rPr()
+        rfonts3 = rpr3.find(qn("w:rFonts"))
+        if rfonts3 is None:
+            rfonts3 = parse_xml(f'<w:rFonts {nsdecls("w")}/>')
+            rpr3.insert(0, rfonts3)
+        rfonts3.set(qn("w:eastAsia"), cfg.font_cn_heading)
+        h3.font.size = Pt(cfg.size_subsection)
+        h3.font.bold = True
+        h3.font.color.rgb = RGBColor(0, 0, 0)
+        h3.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT
+        h3.paragraph_format.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
+        h3.paragraph_format.line_spacing = cfg.line_spacing_heading
+        h3.paragraph_format.space_before = Pt(0)
+        h3.paragraph_format.space_after = Pt(0)
+
+    def _add_section_break_main(self):
+        sec = self.doc.add_section()
+        self._apply_page_setup(sec, roman=False)
+        self._section_break_added = True
+
+    def _apply_page_setup(self, sec, roman: bool = True):
+        """Apply margins, grid, and footer to a section."""
+        cfg = self.config
+        sec.page_width = Cm(cfg.page_width)
+        sec.page_height = Cm(cfg.page_height)
+
+        sect_pr = sec._sectPr
+        for el in list(sect_pr):
+            if el.tag in (qn("w:pgMar"), qn("w:docGrid")):
+                sect_pr.remove(el)
+
+        pgMar = parse_xml(
+            f'<w:pgMar {nsdecls("w")} '
+            f'w:top="{int(cfg.margin_top * 567)}" '
+            f'w:bottom="{int(cfg.margin_bottom * 567)}" '
+            f'w:left="{int(cfg.margin_left * 567)}" '
+            f'w:right="{int(cfg.margin_right * 567)}" '
+            f'w:header="0" '
+            f'w:footer="{int(cfg.footer_distance * 567)}"/>')
+        sect_pr.append(pgMar)
+
+        text_height_mm = (cfg.page_height - cfg.margin_top
+                          - cfg.margin_bottom) * 10
+        line_pitch = int(text_height_mm / cfg.grid_lines_per_page * 56.7)
+        text_width_mm = (cfg.page_width - cfg.margin_left
+                         - cfg.margin_right) * 10
+        char_pitch = int(text_width_mm / cfg.grid_chars_per_line * 56.7)
+        dg = parse_xml(
+            f'<w:docGrid {nsdecls("w")} '
+            f'w:type="linesAndChars" '
+            f'w:linePitch="{line_pitch}" '
+            f'w:charSpace="{char_pitch}"/>')
+        sect_pr.append(dg)
+
+        _setup_footer(sec, roman=roman)
+
+    # ── block processing ────────────────────────────────────────────
+
+    def _process_blocks(self, blocks):
+        # State machine:
+        #   before_abstract → abstract_cn → abstract_en → toc → main
+        state = "before_abstract"
+        self._seen_first_chapter = False
+
+        for blk in blocks:
+            t = blk["type"]
+
+            if t == "heading" and blk["level"] == 1:
+                # Skip H1 (thesis title) — not rendered on Chinese abstract
+                continue
+
+            if t == "heading" and blk["level"] == 2:
+                txt = blk["text"].strip()
+                if txt.replace(" ", "") == "摘  要".replace(" ", ""):
+                    state = "abstract_cn"
+                    self._add_abstract_title("摘  要")
+                    continue
+                if txt == "Abstract":
+                    self._add_abstract_title_en()
+                    state = "abstract_en"
+                    continue
+                if "目" in txt and "次" in txt:
+                    state = "toc"
+                    self._add_toc("目  次")
+                    continue
+                # Normal chapter
+                if state in ("before_abstract", "abstract_cn", "abstract_en", "toc"):
+                    self._add_section_break_main()
+                    state = "main"
+                self._add_page_break_if_not_first()
+                self._add_chapter(txt)
+                continue
+
+            if t == "heading" and blk["level"] == 3:
+                self._ensure_main_section(state)
+                state = "main"
+                txt = blk["text"].strip()
+                if re.match(r"^\d+\.\d+\.\d+\s", txt):
+                    self._add_subsection(txt)
+                else:
+                    self._add_section(txt)
+                continue
+
+            if t == "heading" and blk["level"] >= 4:
+                self._ensure_main_section(state)
+                state = "main"
+                # headings below 3 → body-style bold
+                self._add_body_para(blk["text"], bold=True, indent=False)
+                continue
+
+            # paragraphs / code / blockquote / list / thematic_break
+
+            if t == "paragraph":
+                txt = blk["text"]
+                if not txt.strip():
+                    continue
+                if state == "abstract_cn":
+                    if txt.startswith("关键词："):
+                        self._add_keywords(txt, cn=True)
+                    else:
+                        self._add_abstract_body(txt)
+                    continue
+                if state == "abstract_en":
+                    if txt.startswith("Key words:"):
+                        self._add_keywords(txt, cn=False)
+                    else:
+                        self._add_abstract_body(txt)  # 英文摘要正文
+                    continue
+                # Normal body
+                self._ensure_main_section(state)
+                state = "main"
+                if txt.startswith("关键词："):
+                    self._add_keywords(txt, cn=True)
+                elif txt.startswith("Key words:"):
+                    self._add_keywords(txt, cn=False)
+                else:
+                    self._add_body_para(txt)
+                continue
+
+            if t == "block_code":
+                # code can appear in abstract or main — skip abstract code
+                if state in ("abstract_cn", "abstract_en", "toc"):
+                    continue
+                self._ensure_main_section(state)
+                state = "main"
+                self._process_code(blk)
+                continue
+
+            if t == "block_quote":
+                txt = blk.get("text", "").strip()
+                if not txt:
+                    continue
+                self._ensure_main_section(state)
+                state = "main"
+                self._add_body_para(txt)
+                continue
+
+            if t == "list":
+                self._ensure_main_section(state)
+                state = "main"
+                for item in blk.get("items", []):
+                    self._add_body_para("• " + item)
+                continue
+
+            if t == "thematic_break":
+                # In front matter or already processed — handled by state
+                continue
+
+    def _ensure_main_section(self, state: str):
+        if state in ("before_abstract", "abstract_cn", "abstract_en", "toc"):
+            if not self._section_break_added:
+                self._add_section_break_main()
+
+    def _add_page_break_if_not_first(self):
+        if self._seen_first_chapter:
+            self.doc.add_page_break()
+        else:
+            self._seen_first_chapter = True
+
+    # ══════════════════════════════════════════════════════════════
+    #  rendering methods
+    # ══════════════════════════════════════════════════════════════
+
+    # ── abstract ──────────────────────────────────────────────────
+
+    def _add_abstract_title(self, text: str):
+        """摘要题头：三号宋体加粗居中 (3.3节)"""
+        cfg = self.config
+        p = self.doc.add_paragraph()
+        p.style = self.doc.styles["Heading 1"]
+        p.alignment = WD_ALIGN_PARAGRAPH.CENTER
+        _set_spacing(p, before=0, after=0,
+                     line_spacing=cfg.line_spacing_heading)
+        run = p.add_run(text)
+        _set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
+                  size=cfg.size_abstract_title, bold=True)
+        # blank line after title (§3.3)
+        self.doc.add_paragraph()
+
+    def _add_abstract_body(self, text: str):
+        """摘要正文：小四宋体，首行缩进2字符"""
+        cfg = self.config
+        p = self.doc.add_paragraph()
+        p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
+        _set_spacing(p, before=0, after=0,
+                     line_spacing=cfg.line_spacing_body)
+        _set_indent(p, cfg.first_line_indent_chars)
+        tokens = _parse_inline(text)
+        _add_inline(p, tokens, cfg)
+
+    def _add_abstract_title_en(self):
+        """英文摘要页：标题＋论文题目＋作者署名 (2.3节)"""
+        cfg = self.config
+        p = self.doc.add_paragraph()
+        p.style = self.doc.styles["Heading 1"]
+        p.alignment = WD_ALIGN_PARAGRAPH.CENTER
+        _set_spacing(p, before=0, after=0,
+                     line_spacing=cfg.line_spacing_heading)
+        run = p.add_run("Abstract")
+        _set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
+                  size=cfg.size_abstract_title, bold=True)
+        self.doc.add_paragraph()
+
+        # Thesis title in English (centered)
+        if self._thesis_title:
+            # crude English translation placeholder — user should replace
+            p2 = self.doc.add_paragraph()
+            p2.alignment = WD_ALIGN_PARAGRAPH.CENTER
+            _set_spacing(p2, before=0, after=0,
+                         line_spacing=cfg.line_spacing_heading)
+            r = p2.add_run(self._thesis_title)
+            _set_font(r, cfg.font_cn_heading, cfg.font_heading_en,
+                      size=cfg.size_section, bold=True)
+
+        # Author & teacher line
+        p3 = self.doc.add_paragraph()
+        p3.alignment = WD_ALIGN_PARAGRAPH.CENTER
+        _set_spacing(p3, before=6, after=6,
+                     line_spacing=cfg.line_spacing_heading)
+        r = p3.add_run("Student:  \tTeacher:  ")
+        _set_font(r, cfg.font_cn, cfg.font_en, size=cfg.size_body)
+
+    # ── keywords ──────────────────────────────────────────────────
+
+    def _add_keywords(self, text: str, cn: bool):
+        """关键词：小四宋体加粗顶格 (3.3节)"""
+        cfg = self.config
+        p = self.doc.add_paragraph()
+        p.alignment = WD_ALIGN_PARAGRAPH.LEFT
+        _set_spacing(p, before=0, after=0,
+                     line_spacing=cfg.line_spacing_body)
+
+        label = cfg.keywords_label_cn if cn else cfg.keywords_label_en
+        m = re.match(r"\*\*" + re.escape(label) + r"\*\*(.*)", text)
+        if m:
+            run = p.add_run(label)
+            _set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
+                      size=cfg.size_keyword_label, bold=True)
+            rest = m.group(1).strip()
+            tokens = _parse_inline(rest)
+            _add_inline(p, tokens, cfg, size=cfg.size_keyword_label)
+        else:
+            tokens = _parse_inline(text)
+            _add_inline(p, tokens, cfg, size=cfg.size_keyword_label)
+
+    # ── TOC ────────────────────────────────────────────────────────
+
+    def _add_toc(self, title: str):
+        cfg = self.config
+        p = self.doc.add_paragraph()
+        p.style = self.doc.styles["Heading 1"]
+        p.alignment = WD_ALIGN_PARAGRAPH.CENTER
+        _set_spacing(p, before=0, after=0,
+                     line_spacing=cfg.line_spacing_heading)
+        run = p.add_run(title)
+        _set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
+                  size=cfg.size_abstract_title, bold=True)
+
+        self.doc.add_paragraph()  # blank line
+
+        # Word TOC field
+        p2 = self.doc.add_paragraph()
+        _set_spacing(p2, before=0, after=0,
+                     line_spacing=cfg.line_spacing_body)
+        r = p2.add_run()
+        r._element.append(parse_xml(
+            f'<w:fldChar {nsdecls("w")} w:fldCharType="begin"/>'))
+        r2 = p2.add_run()
+        r2._element.append(parse_xml(
+            f'<w:instrText {nsdecls("w")} xml:space="preserve">'
+            ' TOC \\o "1-3" \\h \\z \\u </w:instrText>'))
+        r3 = p2.add_run()
+        r3._element.append(parse_xml(
+            f'<w:fldChar {nsdecls("w")} w:fldCharType="separate"/>'))
+        r4 = p2.add_run("（请右键此处 > 更新域）")
+        _set_font(r4, cfg.font_cn, cfg.font_en, size=cfg.size_body)
+        r5 = p2.add_run()
+        r5._element.append(parse_xml(
+            f'<w:fldChar {nsdecls("w")} w:fldCharType="end"/>'))
+
+    # ── chapter headings (第一层次) ──────────────────────────────
+
+    def _add_chapter(self, text: str):
+        """章标题：三号宋体加粗，顶格 (§3.2 表3)"""
+        cfg = self.config
+        p = self.doc.add_paragraph()
+        p.style = self.doc.styles["Heading 1"]
+        p.alignment = WD_ALIGN_PARAGRAPH.LEFT
+        _set_spacing(p, before=0, after=0,
+                     line_spacing=cfg.line_spacing_heading)
+
+        # Ensure double space between number and title (§2.5.2 表1)
+        formatted = re.sub(r"^(\d+)\s+", r"\1  ", text)
+        run = p.add_run(formatted)
+        _set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
+                  size=cfg.size_chapter, bold=True)
+
+    # ── section heading (第二层次) ───────────────────────────────
+
+    def _add_section(self, text: str):
+        """节标题：小三号宋体加粗，顶格 (§3.2 表3)"""
+        cfg = self.config
+        p = self.doc.add_paragraph()
+        p.style = self.doc.styles["Heading 2"]
+        p.alignment = WD_ALIGN_PARAGRAPH.LEFT
+        _set_spacing(p, before=0, after=0,
+                     line_spacing=cfg.line_spacing_heading)
+
+        # Single space between number and title (§2.5.2 表1)
+        formatted = re.sub(r"^(\d+\.\d+)\s+", r"\1 ", text)
+        run = p.add_run(formatted)
+        _set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
+                  size=cfg.size_section, bold=True)
+
+    # ── subsection heading (第三层次) ──────────────────────────
+
+    def _add_subsection(self, text: str):
+        """条标题：四号宋体加粗，顶格 (§3.2 表3)"""
+        cfg = self.config
+        p = self.doc.add_paragraph()
+        p.style = self.doc.styles["Heading 3"]
+        p.alignment = WD_ALIGN_PARAGRAPH.LEFT
+        _set_spacing(p, before=0, after=0,
+                     line_spacing=cfg.line_spacing_heading)
+        run = p.add_run(text)
+        _set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
+                  size=cfg.size_subsection, bold=True)
+
+    # ── body paragraph ──────────────────────────────────────────
+
+    def _add_body_para(self, text: str, bold: bool = False,
+                       indent: bool = True):
+        """正文：小四宋体，首行缩进2字符 (§3.2)"""
+        cfg = self.config
+        p = self.doc.add_paragraph()
+        p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
+        _set_spacing(p, before=0, after=0,
+                     line_spacing=cfg.line_spacing_body)
+        if indent:
+            _set_indent(p, cfg.first_line_indent_chars)
+        tokens = _parse_inline(text)
+        _add_inline(p, tokens, cfg, bold=bold)
+
+    # ── code block ──────────────────────────────────────────────
+
+    def _process_code(self, blk: dict):
+        code = blk.get("raw", "")
+        if not code.strip():
+            return
+        cfg = self.config
+        p = self.doc.add_paragraph()
+        _set_spacing(p, before=0, after=0,
+                     line_spacing=cfg.line_spacing_code)
+        pf = p.paragraph_format
+        pf.left_indent = Cm(0.75)
+
+        pPr = p._element.get_or_add_pPr()
+        shd = parse_xml(
+            f'<w:shd {nsdecls("w")} w:fill="F2F2F2" w:val="clear"/>')
+        pPr.append(shd)
+
+        for line in code.split("\n"):
+            if line:
+                run = p.add_run(line)
+                _set_font(run, cfg.font_code, cfg.font_code,
+                          size=cfg.size_code)
+            p.add_run("\n")