Files
md2word/docx_thesis/converter.py
2026-05-07 21:37:06 +08:00

796 lines
30 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Convert Markdown graduation thesis → formatted Word .docx.
Parses markdown line-by-line and writes a python-docx document that
complies with 桂林理工大学 理工类毕业设计(论文)格式要求.
"""
from __future__ import annotations
import re
from pathlib import Path
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_LINE_SPACING
from docx.oxml import parse_xml
from docx.oxml.ns import nsdecls, qn
from docx.shared import Cm, Pt, RGBColor
from docx.text.paragraph import Paragraph
from docx.text.run import Run
from .config import ThesisFormat
# ── font helpers ─────────────────────────────────────────────────────────
def _set_font(
run: Run,
cn_font: str,
en_font: str | None = None,
size: float | None = None,
bold: bool | None = None,
italic: bool | None = None,
):
if en_font:
run.font.name = en_font
if cn_font:
rpr = run._element.get_or_add_rPr()
rfonts = rpr.find(qn("w:rFonts"))
if rfonts is None:
rfonts = parse_xml(f'<w:rFonts {nsdecls("w")}/>')
rpr.insert(0, rfonts)
rfonts.set(qn("w:eastAsia"), cn_font)
if size is not None:
run.font.size = Pt(size)
if bold is not None:
run.font.bold = bold
if italic is not None:
run.font.italic = italic
def _set_spacing(p: Paragraph, before: int = 0, after: int = 0,
line_spacing: float = 1.0):
pf = p.paragraph_format
pf.space_before = Pt(before)
pf.space_after = Pt(after)
pf.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
pf.line_spacing = line_spacing
def _set_indent(p: Paragraph, chars: int = 2):
if chars > 0:
p.paragraph_format.first_line_indent = Cm(chars * 0.37)
def _set_page_number_fmt(section, fmt: str):
sect_pr = section._sectPr
el = sect_pr.find(qn("w:pgNumType"))
if el is None:
el = parse_xml(f'<w:pgNumType {nsdecls("w")}/>')
sect_pr.append(el)
el.set(qn("w:fmt"), fmt)
def _setup_footer(section, roman: bool):
footer = section.footer
footer.is_linked_to_previous = False
# clear default empty paragraph runs to avoid extra blank line
for p in footer.paragraphs:
for r in p.runs:
r.text = ""
p = footer.paragraphs[0]
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
p.paragraph_format.space_before = Pt(0)
p.paragraph_format.space_after = Pt(0)
r = p.add_run()
_set_font(r, "宋体", "Times New Roman", size=9)
r._element.append(parse_xml(
f'<w:fldChar {nsdecls("w")} w:fldCharType="begin"/>'))
r2 = p.add_run()
r2._element.append(parse_xml(
f'<w:instrText {nsdecls("w")} xml:space="preserve"> PAGE </w:instrText>'))
r3 = p.add_run()
r3._element.append(parse_xml(
f'<w:fldChar {nsdecls("w")} w:fldCharType="end"/>'))
_set_page_number_fmt(section, "lowerRoman" if roman else "decimal")
# ── inline markdown parser ───────────────────────────────────────────────
def _parse_inline(text: str):
"""Tokenise line → list of (text, attrs) tuples."""
tokens: list[tuple[str, dict]] = []
buf = ""
i = 0
n = len(text)
def flush():
nonlocal buf
if buf:
tokens.append((buf, {}))
buf = ""
while i < n:
ch = text[i]
# `code`
if ch == "`":
flush()
j = text.find("`", i + 1)
if j == -1:
buf += ch
i += 1
continue
tokens.append((text[i + 1:j], {"code": True}))
i = j + 1
continue
# **bold**
if text[i:i + 2] == "**":
flush()
j = text.find("**", i + 2)
if j == -1:
buf += ch
i += 1
continue
inner = text[i + 2:j]
sub = _parse_inline(inner)
for t, a in sub:
a["bold"] = True
tokens.append((t, a))
i = j + 2
continue
# *italic* (single star, not **)
if ch == "*" and i + 1 < n and text[i + 1] != "*":
flush()
j = text.find("*", i + 1)
if j == -1:
buf += ch
i += 1
continue
tokens.append((text[i + 1:j], {"italic": True}))
i = j + 1
continue
buf += ch
i += 1
flush()
return tokens
def _add_inline(p: Paragraph, tokens: list, cfg: ThesisFormat,
size: float | None = None, bold: bool = False):
for text, attrs in tokens:
run = p.add_run(text)
b = bold or attrs.get("bold", False)
it = attrs.get("italic", False)
code = attrs.get("code", False)
cn = cfg.font_code if code else cfg.font_cn
en = cfg.font_code if code else cfg.font_en
_set_font(run, cn, en, size=size or cfg.size_body,
bold=b, italic=it if not b else None)
# ── block-level parser ───────────────────────────────────────────────────
def _parse_blocks(text: str):
lines = text.split("\n")
blocks: list[dict] = []
i, n = 0, len(lines)
while i < n:
line = lines[i]
# thematic break
if line.strip() == "---":
blocks.append({"type": "thematic_break"})
i += 1
continue
# fenced code block
if line.strip().startswith("```") or line.strip().startswith("~~~"):
fence = line.strip()[:3]
info = line.strip()[3:].strip()
code_lines: list[str] = []
i += 1
while i < n and not lines[i].strip().startswith(fence):
code_lines.append(lines[i])
i += 1
i += 1
blocks.append({"type": "block_code", "info": info,
"raw": "\n".join(code_lines)})
continue
# heading
m = re.match(r"^(#{1,6})\s+(.+)$", line)
if m:
blocks.append({"type": "heading",
"level": len(m.group(1)),
"text": m.group(2).strip()})
i += 1
continue
# blockquote
if line.strip().startswith(">"):
ql: list[str] = []
while i < n and (lines[i].strip().startswith(">")
or lines[i].strip() == ""):
ql.append(re.sub(r"^>\s?", "", lines[i]))
i += 1
blocks.append({"type": "block_quote",
"text": "\n".join(ql).strip()})
continue
# list
if re.match(r"^(\s*)([-*+]\s|\d+\.\s)", line):
items: list[str] = []
while i < n:
if re.match(r"^(\s*)([-*+]\s|\d+\.\s)", lines[i]):
t = re.sub(r"^(\s*)[-*+]\s|\d+\.\s", "", lines[i], 1)
items.append(t)
i += 1
while i < n and lines[i].strip() \
and not re.match(r"^(\s*)([-*+]\s|\d+\.\s)",
lines[i]):
if lines[i][0] in " \t":
items[-1] += " " + lines[i].strip()
i += 1
else:
break
elif lines[i].strip() == "":
i += 1
else:
break
blocks.append({"type": "list", "items": items})
continue
# blank
if line.strip() == "":
i += 1
continue
# paragraph (accumulate)
para: list[str] = []
while i < n and lines[i].strip():
para.append(lines[i])
i += 1
t = "\n".join(para).strip()
if t:
blocks.append({"type": "paragraph", "text": t})
return blocks
# ── converter ────────────────────────────────────────────────────────────
class ThesisConverter:
"""Markdown → 理工类毕业论文 Word 文档。
处理流程:
1. 解析 MD → blocks
2. 扫描 blocks 提取论文题目H1
3. 按章节类别写入带正确格式的 Word
4. 每章自动分页、页面网格、字体字号严格按学校要求
"""
def __init__(self, config: ThesisFormat | None = None):
self.config = config or ThesisFormat()
self.doc = Document()
self._thesis_title: str = "" # 论文题目(来自 H1
self._has_title = False # 是否已保存论文题目
self._section_break_added = False # 是否插入过正文分节符
# ── public API ──────────────────────────────────────────────────
def convert(self, md_path: str | Path, docx_path: str | Path):
text = Path(md_path).read_text(encoding="utf-8")
text = self._strip_manual_toc(text)
blocks = _parse_blocks(text)
# extract H1 thesis title
for blk in blocks:
if blk["type"] == "heading" and blk["level"] == 1:
self._thesis_title = blk["text"]
break
self._setup_document()
self._process_blocks(blocks)
self.doc.save(str(docx_path))
# ── strip manual TOC ────────────────────────────────────────────
@staticmethod
def _strip_manual_toc(text: str) -> str:
lines = text.split("\n")
toc_start = -1
sep_end = -1
for i, line in enumerate(lines):
if re.search(r"[目目]\s*[次次]", line) and line.startswith("#"):
toc_start = i
if toc_start >= 0 and line.strip() == "---" and i > toc_start:
sep_end = i
break
if toc_start >= 0 and sep_end > toc_start:
kept = lines[:toc_start + 1]
kept.append("")
kept.extend(lines[sep_end:])
return "\n".join(kept)
return text
# ── page setup ──────────────────────────────────────────────────
def _setup_document(self):
cfg = self.config
sec = self.doc.sections[0]
self._apply_page_setup(sec, roman=True)
# default font
styles = self.doc.styles
normal = styles["Normal"]
rpr = normal.element.get_or_add_rPr()
rfonts = rpr.find(qn("w:rFonts"))
if rfonts is None:
rfonts = parse_xml(f'<w:rFonts {nsdecls("w")}/>')
rpr.insert(0, rfonts)
rfonts.set(qn("w:ascii"), cfg.font_en)
rfonts.set(qn("w:hAnsi"), cfg.font_en)
rfonts.set(qn("w:eastAsia"), cfg.font_cn)
rfonts.set(qn("w:cs"), cfg.font_en)
sz = rpr.find(qn("w:sz"))
if sz is None:
sz = parse_xml(
f'<w:sz {nsdecls("w")} w:val="{int(cfg.size_body * 2)}"/>')
rpr.append(sz)
pf = normal.paragraph_format
pf.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
pf.line_spacing = cfg.line_spacing_body
self._config_heading_styles()
_setup_footer(sec, roman=True)
def _config_heading_styles(self):
"""Configure Heading 1/2/3 built-in styles to match thesis formatting.
This ensures Word's TOC field can detect headings and auto-generate
the table of contents correctly.
"""
cfg = self.config
styles = self.doc.styles
# ── Heading 1 = 章 (三号宋体加粗左) ──────────────────────────
h1 = styles["Heading 1"]
h1.font.name = cfg.font_heading_en
rpr = h1.element.get_or_add_rPr()
rfonts = rpr.find(qn("w:rFonts"))
if rfonts is None:
rfonts = parse_xml(f'<w:rFonts {nsdecls("w")}/>')
rpr.insert(0, rfonts)
rfonts.set(qn("w:eastAsia"), cfg.font_cn_heading)
h1.font.size = Pt(cfg.size_chapter)
h1.font.bold = True
h1.font.color.rgb = RGBColor(0, 0, 0)
h1.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT
h1.paragraph_format.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
h1.paragraph_format.line_spacing = cfg.line_spacing_heading
h1.paragraph_format.space_before = Pt(0)
h1.paragraph_format.space_after = Pt(0)
# Keep with next + page break before
pPr = h1.element.get_or_add_pPr()
keep_next = parse_xml(f'<w:keepNext {nsdecls("w")}/>')
pPr.append(keep_next)
# ── Heading 2 = 节 (小三号宋体加粗左) ────────────────────────
h2 = styles["Heading 2"]
h2.font.name = cfg.font_heading_en
rpr2 = h2.element.get_or_add_rPr()
rfonts2 = rpr2.find(qn("w:rFonts"))
if rfonts2 is None:
rfonts2 = parse_xml(f'<w:rFonts {nsdecls("w")}/>')
rpr2.insert(0, rfonts2)
rfonts2.set(qn("w:eastAsia"), cfg.font_cn_heading)
h2.font.size = Pt(cfg.size_section)
h2.font.bold = True
h2.font.color.rgb = RGBColor(0, 0, 0)
h2.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT
h2.paragraph_format.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
h2.paragraph_format.line_spacing = cfg.line_spacing_heading
h2.paragraph_format.space_before = Pt(0)
h2.paragraph_format.space_after = Pt(0)
# ── Heading 3 = 条 (四号宋体加粗左) ──────────────────────────
h3 = styles["Heading 3"]
h3.font.name = cfg.font_heading_en
rpr3 = h3.element.get_or_add_rPr()
rfonts3 = rpr3.find(qn("w:rFonts"))
if rfonts3 is None:
rfonts3 = parse_xml(f'<w:rFonts {nsdecls("w")}/>')
rpr3.insert(0, rfonts3)
rfonts3.set(qn("w:eastAsia"), cfg.font_cn_heading)
h3.font.size = Pt(cfg.size_subsection)
h3.font.bold = True
h3.font.color.rgb = RGBColor(0, 0, 0)
h3.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT
h3.paragraph_format.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
h3.paragraph_format.line_spacing = cfg.line_spacing_heading
h3.paragraph_format.space_before = Pt(0)
h3.paragraph_format.space_after = Pt(0)
def _add_section_break_main(self):
sec = self.doc.add_section()
self._apply_page_setup(sec, roman=False)
self._section_break_added = True
def _apply_page_setup(self, sec, roman: bool = True):
"""Apply margins, grid, and footer to a section."""
cfg = self.config
sec.page_width = Cm(cfg.page_width)
sec.page_height = Cm(cfg.page_height)
sect_pr = sec._sectPr
for el in list(sect_pr):
if el.tag in (qn("w:pgMar"), qn("w:docGrid")):
sect_pr.remove(el)
pgMar = parse_xml(
f'<w:pgMar {nsdecls("w")} '
f'w:top="{int(cfg.margin_top * 567)}" '
f'w:bottom="{int(cfg.margin_bottom * 567)}" '
f'w:left="{int(cfg.margin_left * 567)}" '
f'w:right="{int(cfg.margin_right * 567)}" '
f'w:header="0" '
f'w:footer="{int(cfg.footer_distance * 567)}"/>')
sect_pr.append(pgMar)
text_height_mm = (cfg.page_height - cfg.margin_top
- cfg.margin_bottom) * 10
line_pitch = int(text_height_mm / cfg.grid_lines_per_page * 56.7)
text_width_mm = (cfg.page_width - cfg.margin_left
- cfg.margin_right) * 10
char_pitch = int(text_width_mm / cfg.grid_chars_per_line * 56.7)
dg = parse_xml(
f'<w:docGrid {nsdecls("w")} '
f'w:type="linesAndChars" '
f'w:linePitch="{line_pitch}" '
f'w:charSpace="{char_pitch}"/>')
sect_pr.append(dg)
_setup_footer(sec, roman=roman)
# ── block processing ────────────────────────────────────────────
def _process_blocks(self, blocks):
# State machine:
# before_abstract → abstract_cn → abstract_en → toc → main
state = "before_abstract"
self._seen_first_chapter = False
for blk in blocks:
t = blk["type"]
if t == "heading" and blk["level"] == 1:
# Skip H1 (thesis title) — not rendered on Chinese abstract
continue
if t == "heading" and blk["level"] == 2:
txt = blk["text"].strip()
if txt.replace(" ", "") == "摘 要".replace(" ", ""):
state = "abstract_cn"
self._add_abstract_title("摘 要")
continue
if txt == "Abstract":
self._add_abstract_title_en()
state = "abstract_en"
continue
if "" in txt and "" in txt:
state = "toc"
self._add_toc("目 次")
continue
# Normal chapter
if state in ("before_abstract", "abstract_cn", "abstract_en", "toc"):
self._add_section_break_main()
state = "main"
self._add_page_break_if_not_first()
self._add_chapter(txt)
continue
if t == "heading" and blk["level"] == 3:
self._ensure_main_section(state)
state = "main"
txt = blk["text"].strip()
if re.match(r"^\d+\.\d+\.\d+\s", txt):
self._add_subsection(txt)
else:
self._add_section(txt)
continue
if t == "heading" and blk["level"] >= 4:
self._ensure_main_section(state)
state = "main"
# headings below 3 → body-style bold
self._add_body_para(blk["text"], bold=True, indent=False)
continue
# paragraphs / code / blockquote / list / thematic_break
if t == "paragraph":
txt = blk["text"]
if not txt.strip():
continue
if state == "abstract_cn":
if txt.startswith("关键词:"):
self._add_keywords(txt, cn=True)
else:
self._add_abstract_body(txt)
continue
if state == "abstract_en":
if txt.startswith("Key words:"):
self._add_keywords(txt, cn=False)
else:
self._add_abstract_body(txt) # 英文摘要正文
continue
# Normal body
self._ensure_main_section(state)
state = "main"
if txt.startswith("关键词:"):
self._add_keywords(txt, cn=True)
elif txt.startswith("Key words:"):
self._add_keywords(txt, cn=False)
else:
self._add_body_para(txt)
continue
if t == "block_code":
# code can appear in abstract or main — skip abstract code
if state in ("abstract_cn", "abstract_en", "toc"):
continue
self._ensure_main_section(state)
state = "main"
self._process_code(blk)
continue
if t == "block_quote":
txt = blk.get("text", "").strip()
if not txt:
continue
self._ensure_main_section(state)
state = "main"
self._add_body_para(txt)
continue
if t == "list":
self._ensure_main_section(state)
state = "main"
for item in blk.get("items", []):
self._add_body_para("" + item)
continue
if t == "thematic_break":
# In front matter or already processed — handled by state
continue
def _ensure_main_section(self, state: str):
if state in ("before_abstract", "abstract_cn", "abstract_en", "toc"):
if not self._section_break_added:
self._add_section_break_main()
def _add_page_break_if_not_first(self):
if self._seen_first_chapter:
self.doc.add_page_break()
else:
self._seen_first_chapter = True
# ══════════════════════════════════════════════════════════════
# rendering methods
# ══════════════════════════════════════════════════════════════
# ── abstract ──────────────────────────────────────────────────
def _add_abstract_title(self, text: str):
"""摘要题头:三号宋体加粗居中 (3.3节)"""
cfg = self.config
p = self.doc.add_paragraph()
p.style = self.doc.styles["Heading 1"]
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
_set_spacing(p, before=0, after=0,
line_spacing=cfg.line_spacing_heading)
run = p.add_run(text)
_set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
size=cfg.size_abstract_title, bold=True)
# blank line after title (§3.3)
self.doc.add_paragraph()
def _add_abstract_body(self, text: str):
"""摘要正文小四宋体首行缩进2字符"""
cfg = self.config
p = self.doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
_set_spacing(p, before=0, after=0,
line_spacing=cfg.line_spacing_body)
_set_indent(p, cfg.first_line_indent_chars)
tokens = _parse_inline(text)
_add_inline(p, tokens, cfg)
def _add_abstract_title_en(self):
"""英文摘要页:标题+论文题目+作者署名 (2.3节)"""
cfg = self.config
p = self.doc.add_paragraph()
p.style = self.doc.styles["Heading 1"]
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
_set_spacing(p, before=0, after=0,
line_spacing=cfg.line_spacing_heading)
run = p.add_run("Abstract")
_set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
size=cfg.size_abstract_title, bold=True)
self.doc.add_paragraph()
# Thesis title in English (centered)
if self._thesis_title:
# crude English translation placeholder — user should replace
p2 = self.doc.add_paragraph()
p2.alignment = WD_ALIGN_PARAGRAPH.CENTER
_set_spacing(p2, before=0, after=0,
line_spacing=cfg.line_spacing_heading)
r = p2.add_run(self._thesis_title)
_set_font(r, cfg.font_cn_heading, cfg.font_heading_en,
size=cfg.size_section, bold=True)
# Author & teacher line
p3 = self.doc.add_paragraph()
p3.alignment = WD_ALIGN_PARAGRAPH.CENTER
_set_spacing(p3, before=6, after=6,
line_spacing=cfg.line_spacing_heading)
r = p3.add_run("Student: \tTeacher: ")
_set_font(r, cfg.font_cn, cfg.font_en, size=cfg.size_body)
# ── keywords ──────────────────────────────────────────────────
def _add_keywords(self, text: str, cn: bool):
"""关键词:小四宋体加粗顶格 (3.3节)"""
cfg = self.config
p = self.doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
_set_spacing(p, before=0, after=0,
line_spacing=cfg.line_spacing_body)
label = cfg.keywords_label_cn if cn else cfg.keywords_label_en
m = re.match(r"\*\*" + re.escape(label) + r"\*\*(.*)", text)
if m:
run = p.add_run(label)
_set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
size=cfg.size_keyword_label, bold=True)
rest = m.group(1).strip()
tokens = _parse_inline(rest)
_add_inline(p, tokens, cfg, size=cfg.size_keyword_label)
else:
tokens = _parse_inline(text)
_add_inline(p, tokens, cfg, size=cfg.size_keyword_label)
# ── TOC ────────────────────────────────────────────────────────
def _add_toc(self, title: str):
cfg = self.config
p = self.doc.add_paragraph()
p.style = self.doc.styles["Heading 1"]
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
_set_spacing(p, before=0, after=0,
line_spacing=cfg.line_spacing_heading)
run = p.add_run(title)
_set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
size=cfg.size_abstract_title, bold=True)
self.doc.add_paragraph() # blank line
# Word TOC field
p2 = self.doc.add_paragraph()
_set_spacing(p2, before=0, after=0,
line_spacing=cfg.line_spacing_body)
r = p2.add_run()
r._element.append(parse_xml(
f'<w:fldChar {nsdecls("w")} w:fldCharType="begin"/>'))
r2 = p2.add_run()
r2._element.append(parse_xml(
f'<w:instrText {nsdecls("w")} xml:space="preserve">'
' TOC \\o "1-3" \\h \\z \\u </w:instrText>'))
r3 = p2.add_run()
r3._element.append(parse_xml(
f'<w:fldChar {nsdecls("w")} w:fldCharType="separate"/>'))
r4 = p2.add_run("(请右键此处 > 更新域)")
_set_font(r4, cfg.font_cn, cfg.font_en, size=cfg.size_body)
r5 = p2.add_run()
r5._element.append(parse_xml(
f'<w:fldChar {nsdecls("w")} w:fldCharType="end"/>'))
# ── chapter headings (第一层次) ──────────────────────────────
def _add_chapter(self, text: str):
"""章标题:三号宋体加粗,顶格 (§3.2 表3)"""
cfg = self.config
p = self.doc.add_paragraph()
p.style = self.doc.styles["Heading 1"]
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
_set_spacing(p, before=0, after=0,
line_spacing=cfg.line_spacing_heading)
# Ensure double space between number and title (§2.5.2 表1)
formatted = re.sub(r"^(\d+)\s+", r"\1 ", text)
run = p.add_run(formatted)
_set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
size=cfg.size_chapter, bold=True)
# ── section heading (第二层次) ───────────────────────────────
def _add_section(self, text: str):
"""节标题:小三号宋体加粗,顶格 (§3.2 表3)"""
cfg = self.config
p = self.doc.add_paragraph()
p.style = self.doc.styles["Heading 2"]
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
_set_spacing(p, before=0, after=0,
line_spacing=cfg.line_spacing_heading)
# Single space between number and title (§2.5.2 表1)
formatted = re.sub(r"^(\d+\.\d+)\s+", r"\1 ", text)
run = p.add_run(formatted)
_set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
size=cfg.size_section, bold=True)
# ── subsection heading (第三层次) ──────────────────────────
def _add_subsection(self, text: str):
"""条标题:四号宋体加粗,顶格 (§3.2 表3)"""
cfg = self.config
p = self.doc.add_paragraph()
p.style = self.doc.styles["Heading 3"]
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
_set_spacing(p, before=0, after=0,
line_spacing=cfg.line_spacing_heading)
run = p.add_run(text)
_set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
size=cfg.size_subsection, bold=True)
# ── body paragraph ──────────────────────────────────────────
def _add_body_para(self, text: str, bold: bool = False,
indent: bool = True):
"""正文小四宋体首行缩进2字符 (§3.2)"""
cfg = self.config
p = self.doc.add_paragraph()
p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
_set_spacing(p, before=0, after=0,
line_spacing=cfg.line_spacing_body)
if indent:
_set_indent(p, cfg.first_line_indent_chars)
tokens = _parse_inline(text)
_add_inline(p, tokens, cfg, bold=bold)
# ── code block ──────────────────────────────────────────────
def _process_code(self, blk: dict):
code = blk.get("raw", "")
if not code.strip():
return
cfg = self.config
p = self.doc.add_paragraph()
_set_spacing(p, before=0, after=0,
line_spacing=cfg.line_spacing_code)
pf = p.paragraph_format
pf.left_indent = Cm(0.75)
pPr = p._element.get_or_add_pPr()
shd = parse_xml(
f'<w:shd {nsdecls("w")} w:fill="F2F2F2" w:val="clear"/>')
pPr.append(shd)
for line in code.split("\n"):
if line:
run = p.add_run(line)
_set_font(run, cfg.font_code, cfg.font_code,
size=cfg.size_code)
p.add_run("\n")