md2word/docx_thesis/converter.py

"""Convert Markdown graduation thesis → formatted Word .docx.

Parses markdown line-by-line and writes a python-docx document that
complies with 桂林理工大学 理工类毕业设计（论文）格式要求.
"""

from __future__ import annotations

import re
from pathlib import Path

from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_LINE_SPACING
from docx.oxml import parse_xml
from docx.oxml.ns import nsdecls, qn
from docx.shared import Cm, Pt, RGBColor
from docx.text.paragraph import Paragraph
from docx.text.run import Run

from .config import ThesisFormat


# ── font helpers ─────────────────────────────────────────────────────────


def _set_font(
    run: Run,
    cn_font: str,
    en_font: str | None = None,
    size: float | None = None,
    bold: bool | None = None,
    italic: bool | None = None,
):
    if en_font:
        run.font.name = en_font
    if cn_font:
        rpr = run._element.get_or_add_rPr()
        rfonts = rpr.find(qn("w:rFonts"))
        if rfonts is None:
            rfonts = parse_xml(f'<w:rFonts {nsdecls("w")}/>')
            rpr.insert(0, rfonts)
        rfonts.set(qn("w:eastAsia"), cn_font)
    if size is not None:
        run.font.size = Pt(size)
    if bold is not None:
        run.font.bold = bold
    if italic is not None:
        run.font.italic = italic


def _set_spacing(p: Paragraph, before: int = 0, after: int = 0,
                 line_spacing: float = 1.0):
    pf = p.paragraph_format
    pf.space_before = Pt(before)
    pf.space_after = Pt(after)
    pf.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
    pf.line_spacing = line_spacing


def _set_indent(p: Paragraph, chars: int = 2):
    if chars > 0:
        p.paragraph_format.first_line_indent = Cm(chars * 0.37)


def _set_page_number_fmt(section, fmt: str):
    sect_pr = section._sectPr
    el = sect_pr.find(qn("w:pgNumType"))
    if el is None:
        el = parse_xml(f'<w:pgNumType {nsdecls("w")}/>')
        sect_pr.append(el)
    el.set(qn("w:fmt"), fmt)


def _setup_footer(section, roman: bool):
    footer = section.footer
    footer.is_linked_to_previous = False

    # clear default empty paragraph runs to avoid extra blank line
    for p in footer.paragraphs:
        for r in p.runs:
            r.text = ""

    p = footer.paragraphs[0]
    p.alignment = WD_ALIGN_PARAGRAPH.CENTER
    p.paragraph_format.space_before = Pt(0)
    p.paragraph_format.space_after = Pt(0)

    r = p.add_run()
    _set_font(r, "宋体", "Times New Roman", size=9)
    r._element.append(parse_xml(
        f'<w:fldChar {nsdecls("w")} w:fldCharType="begin"/>'))
    r2 = p.add_run()
    r2._element.append(parse_xml(
        f'<w:instrText {nsdecls("w")} xml:space="preserve"> PAGE </w:instrText>'))
    r3 = p.add_run()
    r3._element.append(parse_xml(
        f'<w:fldChar {nsdecls("w")} w:fldCharType="end"/>'))

    _set_page_number_fmt(section, "lowerRoman" if roman else "decimal")


# ── inline markdown parser ───────────────────────────────────────────────


def _parse_inline(text: str):
    """Tokenise line → list of (text, attrs) tuples."""
    tokens: list[tuple[str, dict]] = []
    buf = ""
    i = 0
    n = len(text)

    def flush():
        nonlocal buf
        if buf:
            tokens.append((buf, {}))
            buf = ""

    while i < n:
        ch = text[i]
        # `code`
        if ch == "`":
            flush()
            j = text.find("`", i + 1)
            if j == -1:
                buf += ch
                i += 1
                continue
            tokens.append((text[i + 1:j], {"code": True}))
            i = j + 1
            continue
        # **bold**
        if text[i:i + 2] == "**":
            flush()
            j = text.find("**", i + 2)
            if j == -1:
                buf += ch
                i += 1
                continue
            inner = text[i + 2:j]
            sub = _parse_inline(inner)
            for t, a in sub:
                a["bold"] = True
                tokens.append((t, a))
            i = j + 2
            continue
        # *italic*   (single star, not **)
        if ch == "*" and i + 1 < n and text[i + 1] != "*":
            flush()
            j = text.find("*", i + 1)
            if j == -1:
                buf += ch
                i += 1
                continue
            tokens.append((text[i + 1:j], {"italic": True}))
            i = j + 1
            continue
        buf += ch
        i += 1
    flush()
    return tokens


def _add_inline(p: Paragraph, tokens: list, cfg: ThesisFormat,
                size: float | None = None, bold: bool = False):
    for text, attrs in tokens:
        run = p.add_run(text)
        b = bold or attrs.get("bold", False)
        it = attrs.get("italic", False)
        code = attrs.get("code", False)
        cn = cfg.font_code if code else cfg.font_cn
        en = cfg.font_code if code else cfg.font_en
        _set_font(run, cn, en, size=size or cfg.size_body,
                  bold=b, italic=it if not b else None)


# ── block-level parser ───────────────────────────────────────────────────


def _parse_blocks(text: str):
    lines = text.split("\n")
    blocks: list[dict] = []
    i, n = 0, len(lines)

    while i < n:
        line = lines[i]

        # thematic break
        if line.strip() == "---":
            blocks.append({"type": "thematic_break"})
            i += 1
            continue

        # fenced code block
        if line.strip().startswith("```") or line.strip().startswith("~~~"):
            fence = line.strip()[:3]
            info = line.strip()[3:].strip()
            code_lines: list[str] = []
            i += 1
            while i < n and not lines[i].strip().startswith(fence):
                code_lines.append(lines[i])
                i += 1
            i += 1
            blocks.append({"type": "block_code", "info": info,
                           "raw": "\n".join(code_lines)})
            continue

        # heading
        m = re.match(r"^(#{1,6})\s+(.+)$", line)
        if m:
            blocks.append({"type": "heading",
                           "level": len(m.group(1)),
                           "text": m.group(2).strip()})
            i += 1
            continue

        # blockquote
        if line.strip().startswith(">"):
            ql: list[str] = []
            while i < n and (lines[i].strip().startswith(">")
                             or lines[i].strip() == ""):
                ql.append(re.sub(r"^>\s?", "", lines[i]))
                i += 1
            blocks.append({"type": "block_quote",
                           "text": "\n".join(ql).strip()})
            continue

        # list
        if re.match(r"^(\s*)([-*+]\s|\d+\.\s)", line):
            items: list[str] = []
            while i < n:
                if re.match(r"^(\s*)([-*+]\s|\d+\.\s)", lines[i]):
                    t = re.sub(r"^(\s*)[-*+]\s|\d+\.\s", "", lines[i], 1)
                    items.append(t)
                    i += 1
                    while i < n and lines[i].strip() \
                            and not re.match(r"^(\s*)([-*+]\s|\d+\.\s)",
                                             lines[i]):
                        if lines[i][0] in " \t":
                            items[-1] += " " + lines[i].strip()
                            i += 1
                        else:
                            break
                elif lines[i].strip() == "":
                    i += 1
                else:
                    break
            blocks.append({"type": "list", "items": items})
            continue

        # blank
        if line.strip() == "":
            i += 1
            continue

        # paragraph (accumulate)
        para: list[str] = []
        while i < n and lines[i].strip():
            para.append(lines[i])
            i += 1
        t = "\n".join(para).strip()
        if t:
            blocks.append({"type": "paragraph", "text": t})

    return blocks


# ── converter ────────────────────────────────────────────────────────────


class ThesisConverter:
    """Markdown → 理工类毕业论文 Word 文档。

    处理流程:
      1. 解析 MD → blocks
      2. 扫描 blocks 提取论文题目（H1）
      3. 按章节类别写入带正确格式的 Word
      4. 每章自动分页、页面网格、字体字号严格按学校要求
    """

    def __init__(self, config: ThesisFormat | None = None):
        self.config = config or ThesisFormat()
        self.doc = Document()
        self._thesis_title: str = ""       # 论文题目（来自 H1）
        self._has_title = False            # 是否已保存论文题目
        self._section_break_added = False  # 是否插入过正文分节符

    # ── public API ──────────────────────────────────────────────────

    def convert(self, md_path: str | Path, docx_path: str | Path):
        text = Path(md_path).read_text(encoding="utf-8")
        text = self._strip_manual_toc(text)
        blocks = _parse_blocks(text)

        # extract H1 thesis title
        for blk in blocks:
            if blk["type"] == "heading" and blk["level"] == 1:
                self._thesis_title = blk["text"]
                break

        self._setup_document()
        self._process_blocks(blocks)
        self.doc.save(str(docx_path))

    # ── strip manual TOC ────────────────────────────────────────────

    @staticmethod
    def _strip_manual_toc(text: str) -> str:
        lines = text.split("\n")
        toc_start = -1
        sep_end = -1
        for i, line in enumerate(lines):
            if re.search(r"[目目]\s*[次次]", line) and line.startswith("#"):
                toc_start = i
            if toc_start >= 0 and line.strip() == "---" and i > toc_start:
                sep_end = i
                break
        if toc_start >= 0 and sep_end > toc_start:
            kept = lines[:toc_start + 1]
            kept.append("")
            kept.extend(lines[sep_end:])
            return "\n".join(kept)
        return text

    # ── page setup ──────────────────────────────────────────────────

    def _setup_document(self):
        cfg = self.config
        sec = self.doc.sections[0]
        self._apply_page_setup(sec, roman=True)

        # default font
        styles = self.doc.styles
        normal = styles["Normal"]
        rpr = normal.element.get_or_add_rPr()
        rfonts = rpr.find(qn("w:rFonts"))
        if rfonts is None:
            rfonts = parse_xml(f'<w:rFonts {nsdecls("w")}/>')
            rpr.insert(0, rfonts)
        rfonts.set(qn("w:ascii"), cfg.font_en)
        rfonts.set(qn("w:hAnsi"), cfg.font_en)
        rfonts.set(qn("w:eastAsia"), cfg.font_cn)
        rfonts.set(qn("w:cs"), cfg.font_en)

        sz = rpr.find(qn("w:sz"))
        if sz is None:
            sz = parse_xml(
                f'<w:sz {nsdecls("w")} w:val="{int(cfg.size_body * 2)}"/>')
            rpr.append(sz)
        pf = normal.paragraph_format
        pf.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
        pf.line_spacing = cfg.line_spacing_body

        self._config_heading_styles()

        _setup_footer(sec, roman=True)

    def _config_heading_styles(self):
        """Configure Heading 1/2/3 built-in styles to match thesis formatting.

        This ensures Word's TOC field can detect headings and auto-generate
        the table of contents correctly.
        """
        cfg = self.config
        styles = self.doc.styles

        # ── Heading 1 = 章 (三号宋体加粗左) ──────────────────────────
        h1 = styles["Heading 1"]
        h1.font.name = cfg.font_heading_en
        rpr = h1.element.get_or_add_rPr()
        rfonts = rpr.find(qn("w:rFonts"))
        if rfonts is None:
            rfonts = parse_xml(f'<w:rFonts {nsdecls("w")}/>')
            rpr.insert(0, rfonts)
        rfonts.set(qn("w:eastAsia"), cfg.font_cn_heading)
        h1.font.size = Pt(cfg.size_chapter)
        h1.font.bold = True
        h1.font.color.rgb = RGBColor(0, 0, 0)
        h1.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT
        h1.paragraph_format.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
        h1.paragraph_format.line_spacing = cfg.line_spacing_heading
        h1.paragraph_format.space_before = Pt(0)
        h1.paragraph_format.space_after = Pt(0)
        # Keep with next + page break before
        pPr = h1.element.get_or_add_pPr()
        keep_next = parse_xml(f'<w:keepNext {nsdecls("w")}/>')
        pPr.append(keep_next)

        # ── Heading 2 = 节 (小三号宋体加粗左) ────────────────────────
        h2 = styles["Heading 2"]
        h2.font.name = cfg.font_heading_en
        rpr2 = h2.element.get_or_add_rPr()
        rfonts2 = rpr2.find(qn("w:rFonts"))
        if rfonts2 is None:
            rfonts2 = parse_xml(f'<w:rFonts {nsdecls("w")}/>')
            rpr2.insert(0, rfonts2)
        rfonts2.set(qn("w:eastAsia"), cfg.font_cn_heading)
        h2.font.size = Pt(cfg.size_section)
        h2.font.bold = True
        h2.font.color.rgb = RGBColor(0, 0, 0)
        h2.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT
        h2.paragraph_format.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
        h2.paragraph_format.line_spacing = cfg.line_spacing_heading
        h2.paragraph_format.space_before = Pt(0)
        h2.paragraph_format.space_after = Pt(0)

        # ── Heading 3 = 条 (四号宋体加粗左) ──────────────────────────
        h3 = styles["Heading 3"]
        h3.font.name = cfg.font_heading_en
        rpr3 = h3.element.get_or_add_rPr()
        rfonts3 = rpr3.find(qn("w:rFonts"))
        if rfonts3 is None:
            rfonts3 = parse_xml(f'<w:rFonts {nsdecls("w")}/>')
            rpr3.insert(0, rfonts3)
        rfonts3.set(qn("w:eastAsia"), cfg.font_cn_heading)
        h3.font.size = Pt(cfg.size_subsection)
        h3.font.bold = True
        h3.font.color.rgb = RGBColor(0, 0, 0)
        h3.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT
        h3.paragraph_format.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
        h3.paragraph_format.line_spacing = cfg.line_spacing_heading
        h3.paragraph_format.space_before = Pt(0)
        h3.paragraph_format.space_after = Pt(0)

    def _add_section_break_main(self):
        sec = self.doc.add_section()
        self._apply_page_setup(sec, roman=False)
        self._section_break_added = True

    def _apply_page_setup(self, sec, roman: bool = True):
        """Apply margins, grid, and footer to a section."""
        cfg = self.config
        sec.page_width = Cm(cfg.page_width)
        sec.page_height = Cm(cfg.page_height)

        sect_pr = sec._sectPr
        for el in list(sect_pr):
            if el.tag in (qn("w:pgMar"), qn("w:docGrid")):
                sect_pr.remove(el)

        pgMar = parse_xml(
            f'<w:pgMar {nsdecls("w")} '
            f'w:top="{int(cfg.margin_top * 567)}" '
            f'w:bottom="{int(cfg.margin_bottom * 567)}" '
            f'w:left="{int(cfg.margin_left * 567)}" '
            f'w:right="{int(cfg.margin_right * 567)}" '
            f'w:header="0" '
            f'w:footer="{int(cfg.footer_distance * 567)}"/>')
        sect_pr.append(pgMar)

        text_height_mm = (cfg.page_height - cfg.margin_top
                          - cfg.margin_bottom) * 10
        line_pitch = int(text_height_mm / cfg.grid_lines_per_page * 56.7)
        text_width_mm = (cfg.page_width - cfg.margin_left
                         - cfg.margin_right) * 10
        char_pitch = int(text_width_mm / cfg.grid_chars_per_line * 56.7)
        dg = parse_xml(
            f'<w:docGrid {nsdecls("w")} '
            f'w:type="linesAndChars" '
            f'w:linePitch="{line_pitch}" '
            f'w:charSpace="{char_pitch}"/>')
        sect_pr.append(dg)

        _setup_footer(sec, roman=roman)

    # ── block processing ────────────────────────────────────────────

    def _process_blocks(self, blocks):
        # State machine:
        #   before_abstract → abstract_cn → abstract_en → toc → main
        state = "before_abstract"
        self._seen_first_chapter = False

        for blk in blocks:
            t = blk["type"]

            if t == "heading" and blk["level"] == 1:
                # Skip H1 (thesis title) — not rendered on Chinese abstract
                continue

            if t == "heading" and blk["level"] == 2:
                txt = blk["text"].strip()
                if txt.replace(" ", "") == "摘  要".replace(" ", ""):
                    state = "abstract_cn"
                    self._add_abstract_title("摘  要")
                    continue
                if txt == "Abstract":
                    self._add_abstract_title_en()
                    state = "abstract_en"
                    continue
                if "目" in txt and "次" in txt:
                    state = "toc"
                    self._add_toc("目  次")
                    continue
                # Normal chapter
                if state in ("before_abstract", "abstract_cn", "abstract_en", "toc"):
                    self._add_section_break_main()
                    state = "main"
                self._add_page_break_if_not_first()
                self._add_chapter(txt)
                continue

            if t == "heading" and blk["level"] == 3:
                self._ensure_main_section(state)
                state = "main"
                txt = blk["text"].strip()
                if re.match(r"^\d+\.\d+\.\d+\s", txt):
                    self._add_subsection(txt)
                else:
                    self._add_section(txt)
                continue

            if t == "heading" and blk["level"] >= 4:
                self._ensure_main_section(state)
                state = "main"
                # headings below 3 → body-style bold
                self._add_body_para(blk["text"], bold=True, indent=False)
                continue

            # paragraphs / code / blockquote / list / thematic_break

            if t == "paragraph":
                txt = blk["text"]
                if not txt.strip():
                    continue
                if state == "abstract_cn":
                    if txt.startswith("关键词："):
                        self._add_keywords(txt, cn=True)
                    else:
                        self._add_abstract_body(txt)
                    continue
                if state == "abstract_en":
                    if txt.startswith("Key words:"):
                        self._add_keywords(txt, cn=False)
                    else:
                        self._add_abstract_body(txt)  # 英文摘要正文
                    continue
                # Normal body
                self._ensure_main_section(state)
                state = "main"
                if txt.startswith("关键词："):
                    self._add_keywords(txt, cn=True)
                elif txt.startswith("Key words:"):
                    self._add_keywords(txt, cn=False)
                else:
                    self._add_body_para(txt)
                continue

            if t == "block_code":
                # code can appear in abstract or main — skip abstract code
                if state in ("abstract_cn", "abstract_en", "toc"):
                    continue
                self._ensure_main_section(state)
                state = "main"
                self._process_code(blk)
                continue

            if t == "block_quote":
                txt = blk.get("text", "").strip()
                if not txt:
                    continue
                self._ensure_main_section(state)
                state = "main"
                self._add_body_para(txt)
                continue

            if t == "list":
                self._ensure_main_section(state)
                state = "main"
                for item in blk.get("items", []):
                    self._add_body_para("• " + item)
                continue

            if t == "thematic_break":
                # In front matter or already processed — handled by state
                continue

    def _ensure_main_section(self, state: str):
        if state in ("before_abstract", "abstract_cn", "abstract_en", "toc"):
            if not self._section_break_added:
                self._add_section_break_main()

    def _add_page_break_if_not_first(self):
        if self._seen_first_chapter:
            self.doc.add_page_break()
        else:
            self._seen_first_chapter = True

    # ══════════════════════════════════════════════════════════════
    #  rendering methods
    # ══════════════════════════════════════════════════════════════

    # ── abstract ──────────────────────────────────────────────────

    def _add_abstract_title(self, text: str):
        """摘要题头：三号宋体加粗居中 (3.3节)"""
        cfg = self.config
        p = self.doc.add_paragraph()
        p.style = self.doc.styles["Heading 1"]
        p.alignment = WD_ALIGN_PARAGRAPH.CENTER
        _set_spacing(p, before=0, after=0,
                     line_spacing=cfg.line_spacing_heading)
        run = p.add_run(text)
        _set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
                  size=cfg.size_abstract_title, bold=True)
        # blank line after title (§3.3)
        self.doc.add_paragraph()

    def _add_abstract_body(self, text: str):
        """摘要正文：小四宋体，首行缩进2字符"""
        cfg = self.config
        p = self.doc.add_paragraph()
        p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
        _set_spacing(p, before=0, after=0,
                     line_spacing=cfg.line_spacing_body)
        _set_indent(p, cfg.first_line_indent_chars)
        tokens = _parse_inline(text)
        _add_inline(p, tokens, cfg)

    def _add_abstract_title_en(self):
        """英文摘要页：标题＋论文题目＋作者署名 (2.3节)"""
        cfg = self.config
        p = self.doc.add_paragraph()
        p.style = self.doc.styles["Heading 1"]
        p.alignment = WD_ALIGN_PARAGRAPH.CENTER
        _set_spacing(p, before=0, after=0,
                     line_spacing=cfg.line_spacing_heading)
        run = p.add_run("Abstract")
        _set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
                  size=cfg.size_abstract_title, bold=True)
        self.doc.add_paragraph()

        # Thesis title in English (centered)
        if self._thesis_title:
            # crude English translation placeholder — user should replace
            p2 = self.doc.add_paragraph()
            p2.alignment = WD_ALIGN_PARAGRAPH.CENTER
            _set_spacing(p2, before=0, after=0,
                         line_spacing=cfg.line_spacing_heading)
            r = p2.add_run(self._thesis_title)
            _set_font(r, cfg.font_cn_heading, cfg.font_heading_en,
                      size=cfg.size_section, bold=True)

        # Author & teacher line
        p3 = self.doc.add_paragraph()
        p3.alignment = WD_ALIGN_PARAGRAPH.CENTER
        _set_spacing(p3, before=6, after=6,
                     line_spacing=cfg.line_spacing_heading)
        r = p3.add_run("Student:  \tTeacher:  ")
        _set_font(r, cfg.font_cn, cfg.font_en, size=cfg.size_body)

    # ── keywords ──────────────────────────────────────────────────

    def _add_keywords(self, text: str, cn: bool):
        """关键词：小四宋体加粗顶格 (3.3节)"""
        cfg = self.config
        p = self.doc.add_paragraph()
        p.alignment = WD_ALIGN_PARAGRAPH.LEFT
        _set_spacing(p, before=0, after=0,
                     line_spacing=cfg.line_spacing_body)

        label = cfg.keywords_label_cn if cn else cfg.keywords_label_en
        m = re.match(r"\*\*" + re.escape(label) + r"\*\*(.*)", text)
        if m:
            run = p.add_run(label)
            _set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
                      size=cfg.size_keyword_label, bold=True)
            rest = m.group(1).strip()
            tokens = _parse_inline(rest)
            _add_inline(p, tokens, cfg, size=cfg.size_keyword_label)
        else:
            tokens = _parse_inline(text)
            _add_inline(p, tokens, cfg, size=cfg.size_keyword_label)

    # ── TOC ────────────────────────────────────────────────────────

    def _add_toc(self, title: str):
        cfg = self.config
        p = self.doc.add_paragraph()
        p.style = self.doc.styles["Heading 1"]
        p.alignment = WD_ALIGN_PARAGRAPH.CENTER
        _set_spacing(p, before=0, after=0,
                     line_spacing=cfg.line_spacing_heading)
        run = p.add_run(title)
        _set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
                  size=cfg.size_abstract_title, bold=True)

        self.doc.add_paragraph()  # blank line

        # Word TOC field
        p2 = self.doc.add_paragraph()
        _set_spacing(p2, before=0, after=0,
                     line_spacing=cfg.line_spacing_body)
        r = p2.add_run()
        r._element.append(parse_xml(
            f'<w:fldChar {nsdecls("w")} w:fldCharType="begin"/>'))
        r2 = p2.add_run()
        r2._element.append(parse_xml(
            f'<w:instrText {nsdecls("w")} xml:space="preserve">'
            ' TOC \\o "1-3" \\h \\z \\u </w:instrText>'))
        r3 = p2.add_run()
        r3._element.append(parse_xml(
            f'<w:fldChar {nsdecls("w")} w:fldCharType="separate"/>'))
        r4 = p2.add_run("（请右键此处 > 更新域）")
        _set_font(r4, cfg.font_cn, cfg.font_en, size=cfg.size_body)
        r5 = p2.add_run()
        r5._element.append(parse_xml(
            f'<w:fldChar {nsdecls("w")} w:fldCharType="end"/>'))

    # ── chapter headings (第一层次) ──────────────────────────────

    def _add_chapter(self, text: str):
        """章标题：三号宋体加粗，顶格 (§3.2 表3)"""
        cfg = self.config
        p = self.doc.add_paragraph()
        p.style = self.doc.styles["Heading 1"]
        p.alignment = WD_ALIGN_PARAGRAPH.LEFT
        _set_spacing(p, before=0, after=0,
                     line_spacing=cfg.line_spacing_heading)

        # Ensure double space between number and title (§2.5.2 表1)
        formatted = re.sub(r"^(\d+)\s+", r"\1  ", text)
        run = p.add_run(formatted)
        _set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
                  size=cfg.size_chapter, bold=True)

    # ── section heading (第二层次) ───────────────────────────────

    def _add_section(self, text: str):
        """节标题：小三号宋体加粗，顶格 (§3.2 表3)"""
        cfg = self.config
        p = self.doc.add_paragraph()
        p.style = self.doc.styles["Heading 2"]
        p.alignment = WD_ALIGN_PARAGRAPH.LEFT
        _set_spacing(p, before=0, after=0,
                     line_spacing=cfg.line_spacing_heading)

        # Single space between number and title (§2.5.2 表1)
        formatted = re.sub(r"^(\d+\.\d+)\s+", r"\1 ", text)
        run = p.add_run(formatted)
        _set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
                  size=cfg.size_section, bold=True)

    # ── subsection heading (第三层次) ──────────────────────────

    def _add_subsection(self, text: str):
        """条标题：四号宋体加粗，顶格 (§3.2 表3)"""
        cfg = self.config
        p = self.doc.add_paragraph()
        p.style = self.doc.styles["Heading 3"]
        p.alignment = WD_ALIGN_PARAGRAPH.LEFT
        _set_spacing(p, before=0, after=0,
                     line_spacing=cfg.line_spacing_heading)
        run = p.add_run(text)
        _set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
                  size=cfg.size_subsection, bold=True)

    # ── body paragraph ──────────────────────────────────────────

    def _add_body_para(self, text: str, bold: bool = False,
                       indent: bool = True):
        """正文：小四宋体，首行缩进2字符 (§3.2)"""
        cfg = self.config
        p = self.doc.add_paragraph()
        p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
        _set_spacing(p, before=0, after=0,
                     line_spacing=cfg.line_spacing_body)
        if indent:
            _set_indent(p, cfg.first_line_indent_chars)
        tokens = _parse_inline(text)
        _add_inline(p, tokens, cfg, bold=bold)

    # ── code block ──────────────────────────────────────────────

    def _process_code(self, blk: dict):
        code = blk.get("raw", "")
        if not code.strip():
            return
        cfg = self.config
        p = self.doc.add_paragraph()
        _set_spacing(p, before=0, after=0,
                     line_spacing=cfg.line_spacing_code)
        pf = p.paragraph_format
        pf.left_indent = Cm(0.75)

        pPr = p._element.get_or_add_pPr()
        shd = parse_xml(
            f'<w:shd {nsdecls("w")} w:fill="F2F2F2" w:val="clear"/>')
        pPr.append(shd)

        for line in code.split("\n"):
            if line:
                run = p.add_run(line)
                _set_font(run, cfg.font_code, cfg.font_code,
                          size=cfg.size_code)
            p.add_run("\n")