init
This commit is contained in:
795
docx_thesis/converter.py
Normal file
795
docx_thesis/converter.py
Normal file
@@ -0,0 +1,795 @@
|
||||
"""Convert Markdown graduation thesis → formatted Word .docx.
|
||||
|
||||
Parses markdown line-by-line and writes a python-docx document that
|
||||
complies with 桂林理工大学 理工类毕业设计(论文)格式要求.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from docx import Document
|
||||
from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_LINE_SPACING
|
||||
from docx.oxml import parse_xml
|
||||
from docx.oxml.ns import nsdecls, qn
|
||||
from docx.shared import Cm, Pt, RGBColor
|
||||
from docx.text.paragraph import Paragraph
|
||||
from docx.text.run import Run
|
||||
|
||||
from .config import ThesisFormat
|
||||
|
||||
|
||||
# ── font helpers ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _set_font(
|
||||
run: Run,
|
||||
cn_font: str,
|
||||
en_font: str | None = None,
|
||||
size: float | None = None,
|
||||
bold: bool | None = None,
|
||||
italic: bool | None = None,
|
||||
):
|
||||
if en_font:
|
||||
run.font.name = en_font
|
||||
if cn_font:
|
||||
rpr = run._element.get_or_add_rPr()
|
||||
rfonts = rpr.find(qn("w:rFonts"))
|
||||
if rfonts is None:
|
||||
rfonts = parse_xml(f'<w:rFonts {nsdecls("w")}/>')
|
||||
rpr.insert(0, rfonts)
|
||||
rfonts.set(qn("w:eastAsia"), cn_font)
|
||||
if size is not None:
|
||||
run.font.size = Pt(size)
|
||||
if bold is not None:
|
||||
run.font.bold = bold
|
||||
if italic is not None:
|
||||
run.font.italic = italic
|
||||
|
||||
|
||||
def _set_spacing(p: Paragraph, before: int = 0, after: int = 0,
|
||||
line_spacing: float = 1.0):
|
||||
pf = p.paragraph_format
|
||||
pf.space_before = Pt(before)
|
||||
pf.space_after = Pt(after)
|
||||
pf.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
|
||||
pf.line_spacing = line_spacing
|
||||
|
||||
|
||||
def _set_indent(p: Paragraph, chars: int = 2):
|
||||
if chars > 0:
|
||||
p.paragraph_format.first_line_indent = Cm(chars * 0.37)
|
||||
|
||||
|
||||
def _set_page_number_fmt(section, fmt: str):
|
||||
sect_pr = section._sectPr
|
||||
el = sect_pr.find(qn("w:pgNumType"))
|
||||
if el is None:
|
||||
el = parse_xml(f'<w:pgNumType {nsdecls("w")}/>')
|
||||
sect_pr.append(el)
|
||||
el.set(qn("w:fmt"), fmt)
|
||||
|
||||
|
||||
def _setup_footer(section, roman: bool):
|
||||
footer = section.footer
|
||||
footer.is_linked_to_previous = False
|
||||
|
||||
# clear default empty paragraph runs to avoid extra blank line
|
||||
for p in footer.paragraphs:
|
||||
for r in p.runs:
|
||||
r.text = ""
|
||||
|
||||
p = footer.paragraphs[0]
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
p.paragraph_format.space_before = Pt(0)
|
||||
p.paragraph_format.space_after = Pt(0)
|
||||
|
||||
r = p.add_run()
|
||||
_set_font(r, "宋体", "Times New Roman", size=9)
|
||||
r._element.append(parse_xml(
|
||||
f'<w:fldChar {nsdecls("w")} w:fldCharType="begin"/>'))
|
||||
r2 = p.add_run()
|
||||
r2._element.append(parse_xml(
|
||||
f'<w:instrText {nsdecls("w")} xml:space="preserve"> PAGE </w:instrText>'))
|
||||
r3 = p.add_run()
|
||||
r3._element.append(parse_xml(
|
||||
f'<w:fldChar {nsdecls("w")} w:fldCharType="end"/>'))
|
||||
|
||||
_set_page_number_fmt(section, "lowerRoman" if roman else "decimal")
|
||||
|
||||
|
||||
# ── inline markdown parser ───────────────────────────────────────────────
|
||||
|
||||
|
||||
def _parse_inline(text: str):
|
||||
"""Tokenise line → list of (text, attrs) tuples."""
|
||||
tokens: list[tuple[str, dict]] = []
|
||||
buf = ""
|
||||
i = 0
|
||||
n = len(text)
|
||||
|
||||
def flush():
|
||||
nonlocal buf
|
||||
if buf:
|
||||
tokens.append((buf, {}))
|
||||
buf = ""
|
||||
|
||||
while i < n:
|
||||
ch = text[i]
|
||||
# `code`
|
||||
if ch == "`":
|
||||
flush()
|
||||
j = text.find("`", i + 1)
|
||||
if j == -1:
|
||||
buf += ch
|
||||
i += 1
|
||||
continue
|
||||
tokens.append((text[i + 1:j], {"code": True}))
|
||||
i = j + 1
|
||||
continue
|
||||
# **bold**
|
||||
if text[i:i + 2] == "**":
|
||||
flush()
|
||||
j = text.find("**", i + 2)
|
||||
if j == -1:
|
||||
buf += ch
|
||||
i += 1
|
||||
continue
|
||||
inner = text[i + 2:j]
|
||||
sub = _parse_inline(inner)
|
||||
for t, a in sub:
|
||||
a["bold"] = True
|
||||
tokens.append((t, a))
|
||||
i = j + 2
|
||||
continue
|
||||
# *italic* (single star, not **)
|
||||
if ch == "*" and i + 1 < n and text[i + 1] != "*":
|
||||
flush()
|
||||
j = text.find("*", i + 1)
|
||||
if j == -1:
|
||||
buf += ch
|
||||
i += 1
|
||||
continue
|
||||
tokens.append((text[i + 1:j], {"italic": True}))
|
||||
i = j + 1
|
||||
continue
|
||||
buf += ch
|
||||
i += 1
|
||||
flush()
|
||||
return tokens
|
||||
|
||||
|
||||
def _add_inline(p: Paragraph, tokens: list, cfg: ThesisFormat,
|
||||
size: float | None = None, bold: bool = False):
|
||||
for text, attrs in tokens:
|
||||
run = p.add_run(text)
|
||||
b = bold or attrs.get("bold", False)
|
||||
it = attrs.get("italic", False)
|
||||
code = attrs.get("code", False)
|
||||
cn = cfg.font_code if code else cfg.font_cn
|
||||
en = cfg.font_code if code else cfg.font_en
|
||||
_set_font(run, cn, en, size=size or cfg.size_body,
|
||||
bold=b, italic=it if not b else None)
|
||||
|
||||
|
||||
# ── block-level parser ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _parse_blocks(text: str):
|
||||
lines = text.split("\n")
|
||||
blocks: list[dict] = []
|
||||
i, n = 0, len(lines)
|
||||
|
||||
while i < n:
|
||||
line = lines[i]
|
||||
|
||||
# thematic break
|
||||
if line.strip() == "---":
|
||||
blocks.append({"type": "thematic_break"})
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# fenced code block
|
||||
if line.strip().startswith("```") or line.strip().startswith("~~~"):
|
||||
fence = line.strip()[:3]
|
||||
info = line.strip()[3:].strip()
|
||||
code_lines: list[str] = []
|
||||
i += 1
|
||||
while i < n and not lines[i].strip().startswith(fence):
|
||||
code_lines.append(lines[i])
|
||||
i += 1
|
||||
i += 1
|
||||
blocks.append({"type": "block_code", "info": info,
|
||||
"raw": "\n".join(code_lines)})
|
||||
continue
|
||||
|
||||
# heading
|
||||
m = re.match(r"^(#{1,6})\s+(.+)$", line)
|
||||
if m:
|
||||
blocks.append({"type": "heading",
|
||||
"level": len(m.group(1)),
|
||||
"text": m.group(2).strip()})
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# blockquote
|
||||
if line.strip().startswith(">"):
|
||||
ql: list[str] = []
|
||||
while i < n and (lines[i].strip().startswith(">")
|
||||
or lines[i].strip() == ""):
|
||||
ql.append(re.sub(r"^>\s?", "", lines[i]))
|
||||
i += 1
|
||||
blocks.append({"type": "block_quote",
|
||||
"text": "\n".join(ql).strip()})
|
||||
continue
|
||||
|
||||
# list
|
||||
if re.match(r"^(\s*)([-*+]\s|\d+\.\s)", line):
|
||||
items: list[str] = []
|
||||
while i < n:
|
||||
if re.match(r"^(\s*)([-*+]\s|\d+\.\s)", lines[i]):
|
||||
t = re.sub(r"^(\s*)[-*+]\s|\d+\.\s", "", lines[i], 1)
|
||||
items.append(t)
|
||||
i += 1
|
||||
while i < n and lines[i].strip() \
|
||||
and not re.match(r"^(\s*)([-*+]\s|\d+\.\s)",
|
||||
lines[i]):
|
||||
if lines[i][0] in " \t":
|
||||
items[-1] += " " + lines[i].strip()
|
||||
i += 1
|
||||
else:
|
||||
break
|
||||
elif lines[i].strip() == "":
|
||||
i += 1
|
||||
else:
|
||||
break
|
||||
blocks.append({"type": "list", "items": items})
|
||||
continue
|
||||
|
||||
# blank
|
||||
if line.strip() == "":
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# paragraph (accumulate)
|
||||
para: list[str] = []
|
||||
while i < n and lines[i].strip():
|
||||
para.append(lines[i])
|
||||
i += 1
|
||||
t = "\n".join(para).strip()
|
||||
if t:
|
||||
blocks.append({"type": "paragraph", "text": t})
|
||||
|
||||
return blocks
|
||||
|
||||
|
||||
# ── converter ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class ThesisConverter:
|
||||
"""Markdown → 理工类毕业论文 Word 文档。
|
||||
|
||||
处理流程:
|
||||
1. 解析 MD → blocks
|
||||
2. 扫描 blocks 提取论文题目(H1)
|
||||
3. 按章节类别写入带正确格式的 Word
|
||||
4. 每章自动分页、页面网格、字体字号严格按学校要求
|
||||
"""
|
||||
|
||||
def __init__(self, config: ThesisFormat | None = None):
|
||||
self.config = config or ThesisFormat()
|
||||
self.doc = Document()
|
||||
self._thesis_title: str = "" # 论文题目(来自 H1)
|
||||
self._has_title = False # 是否已保存论文题目
|
||||
self._section_break_added = False # 是否插入过正文分节符
|
||||
|
||||
# ── public API ──────────────────────────────────────────────────
|
||||
|
||||
def convert(self, md_path: str | Path, docx_path: str | Path):
|
||||
text = Path(md_path).read_text(encoding="utf-8")
|
||||
text = self._strip_manual_toc(text)
|
||||
blocks = _parse_blocks(text)
|
||||
|
||||
# extract H1 thesis title
|
||||
for blk in blocks:
|
||||
if blk["type"] == "heading" and blk["level"] == 1:
|
||||
self._thesis_title = blk["text"]
|
||||
break
|
||||
|
||||
self._setup_document()
|
||||
self._process_blocks(blocks)
|
||||
self.doc.save(str(docx_path))
|
||||
|
||||
# ── strip manual TOC ────────────────────────────────────────────
|
||||
|
||||
@staticmethod
|
||||
def _strip_manual_toc(text: str) -> str:
|
||||
lines = text.split("\n")
|
||||
toc_start = -1
|
||||
sep_end = -1
|
||||
for i, line in enumerate(lines):
|
||||
if re.search(r"[目目]\s*[次次]", line) and line.startswith("#"):
|
||||
toc_start = i
|
||||
if toc_start >= 0 and line.strip() == "---" and i > toc_start:
|
||||
sep_end = i
|
||||
break
|
||||
if toc_start >= 0 and sep_end > toc_start:
|
||||
kept = lines[:toc_start + 1]
|
||||
kept.append("")
|
||||
kept.extend(lines[sep_end:])
|
||||
return "\n".join(kept)
|
||||
return text
|
||||
|
||||
# ── page setup ──────────────────────────────────────────────────
|
||||
|
||||
def _setup_document(self):
|
||||
cfg = self.config
|
||||
sec = self.doc.sections[0]
|
||||
self._apply_page_setup(sec, roman=True)
|
||||
|
||||
# default font
|
||||
styles = self.doc.styles
|
||||
normal = styles["Normal"]
|
||||
rpr = normal.element.get_or_add_rPr()
|
||||
rfonts = rpr.find(qn("w:rFonts"))
|
||||
if rfonts is None:
|
||||
rfonts = parse_xml(f'<w:rFonts {nsdecls("w")}/>')
|
||||
rpr.insert(0, rfonts)
|
||||
rfonts.set(qn("w:ascii"), cfg.font_en)
|
||||
rfonts.set(qn("w:hAnsi"), cfg.font_en)
|
||||
rfonts.set(qn("w:eastAsia"), cfg.font_cn)
|
||||
rfonts.set(qn("w:cs"), cfg.font_en)
|
||||
|
||||
sz = rpr.find(qn("w:sz"))
|
||||
if sz is None:
|
||||
sz = parse_xml(
|
||||
f'<w:sz {nsdecls("w")} w:val="{int(cfg.size_body * 2)}"/>')
|
||||
rpr.append(sz)
|
||||
pf = normal.paragraph_format
|
||||
pf.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
|
||||
pf.line_spacing = cfg.line_spacing_body
|
||||
|
||||
self._config_heading_styles()
|
||||
|
||||
_setup_footer(sec, roman=True)
|
||||
|
||||
def _config_heading_styles(self):
|
||||
"""Configure Heading 1/2/3 built-in styles to match thesis formatting.
|
||||
|
||||
This ensures Word's TOC field can detect headings and auto-generate
|
||||
the table of contents correctly.
|
||||
"""
|
||||
cfg = self.config
|
||||
styles = self.doc.styles
|
||||
|
||||
# ── Heading 1 = 章 (三号宋体加粗左) ──────────────────────────
|
||||
h1 = styles["Heading 1"]
|
||||
h1.font.name = cfg.font_heading_en
|
||||
rpr = h1.element.get_or_add_rPr()
|
||||
rfonts = rpr.find(qn("w:rFonts"))
|
||||
if rfonts is None:
|
||||
rfonts = parse_xml(f'<w:rFonts {nsdecls("w")}/>')
|
||||
rpr.insert(0, rfonts)
|
||||
rfonts.set(qn("w:eastAsia"), cfg.font_cn_heading)
|
||||
h1.font.size = Pt(cfg.size_chapter)
|
||||
h1.font.bold = True
|
||||
h1.font.color.rgb = RGBColor(0, 0, 0)
|
||||
h1.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||||
h1.paragraph_format.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
|
||||
h1.paragraph_format.line_spacing = cfg.line_spacing_heading
|
||||
h1.paragraph_format.space_before = Pt(0)
|
||||
h1.paragraph_format.space_after = Pt(0)
|
||||
# Keep with next + page break before
|
||||
pPr = h1.element.get_or_add_pPr()
|
||||
keep_next = parse_xml(f'<w:keepNext {nsdecls("w")}/>')
|
||||
pPr.append(keep_next)
|
||||
|
||||
# ── Heading 2 = 节 (小三号宋体加粗左) ────────────────────────
|
||||
h2 = styles["Heading 2"]
|
||||
h2.font.name = cfg.font_heading_en
|
||||
rpr2 = h2.element.get_or_add_rPr()
|
||||
rfonts2 = rpr2.find(qn("w:rFonts"))
|
||||
if rfonts2 is None:
|
||||
rfonts2 = parse_xml(f'<w:rFonts {nsdecls("w")}/>')
|
||||
rpr2.insert(0, rfonts2)
|
||||
rfonts2.set(qn("w:eastAsia"), cfg.font_cn_heading)
|
||||
h2.font.size = Pt(cfg.size_section)
|
||||
h2.font.bold = True
|
||||
h2.font.color.rgb = RGBColor(0, 0, 0)
|
||||
h2.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||||
h2.paragraph_format.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
|
||||
h2.paragraph_format.line_spacing = cfg.line_spacing_heading
|
||||
h2.paragraph_format.space_before = Pt(0)
|
||||
h2.paragraph_format.space_after = Pt(0)
|
||||
|
||||
# ── Heading 3 = 条 (四号宋体加粗左) ──────────────────────────
|
||||
h3 = styles["Heading 3"]
|
||||
h3.font.name = cfg.font_heading_en
|
||||
rpr3 = h3.element.get_or_add_rPr()
|
||||
rfonts3 = rpr3.find(qn("w:rFonts"))
|
||||
if rfonts3 is None:
|
||||
rfonts3 = parse_xml(f'<w:rFonts {nsdecls("w")}/>')
|
||||
rpr3.insert(0, rfonts3)
|
||||
rfonts3.set(qn("w:eastAsia"), cfg.font_cn_heading)
|
||||
h3.font.size = Pt(cfg.size_subsection)
|
||||
h3.font.bold = True
|
||||
h3.font.color.rgb = RGBColor(0, 0, 0)
|
||||
h3.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||||
h3.paragraph_format.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
|
||||
h3.paragraph_format.line_spacing = cfg.line_spacing_heading
|
||||
h3.paragraph_format.space_before = Pt(0)
|
||||
h3.paragraph_format.space_after = Pt(0)
|
||||
|
||||
def _add_section_break_main(self):
|
||||
sec = self.doc.add_section()
|
||||
self._apply_page_setup(sec, roman=False)
|
||||
self._section_break_added = True
|
||||
|
||||
def _apply_page_setup(self, sec, roman: bool = True):
|
||||
"""Apply margins, grid, and footer to a section."""
|
||||
cfg = self.config
|
||||
sec.page_width = Cm(cfg.page_width)
|
||||
sec.page_height = Cm(cfg.page_height)
|
||||
|
||||
sect_pr = sec._sectPr
|
||||
for el in list(sect_pr):
|
||||
if el.tag in (qn("w:pgMar"), qn("w:docGrid")):
|
||||
sect_pr.remove(el)
|
||||
|
||||
pgMar = parse_xml(
|
||||
f'<w:pgMar {nsdecls("w")} '
|
||||
f'w:top="{int(cfg.margin_top * 567)}" '
|
||||
f'w:bottom="{int(cfg.margin_bottom * 567)}" '
|
||||
f'w:left="{int(cfg.margin_left * 567)}" '
|
||||
f'w:right="{int(cfg.margin_right * 567)}" '
|
||||
f'w:header="0" '
|
||||
f'w:footer="{int(cfg.footer_distance * 567)}"/>')
|
||||
sect_pr.append(pgMar)
|
||||
|
||||
text_height_mm = (cfg.page_height - cfg.margin_top
|
||||
- cfg.margin_bottom) * 10
|
||||
line_pitch = int(text_height_mm / cfg.grid_lines_per_page * 56.7)
|
||||
text_width_mm = (cfg.page_width - cfg.margin_left
|
||||
- cfg.margin_right) * 10
|
||||
char_pitch = int(text_width_mm / cfg.grid_chars_per_line * 56.7)
|
||||
dg = parse_xml(
|
||||
f'<w:docGrid {nsdecls("w")} '
|
||||
f'w:type="linesAndChars" '
|
||||
f'w:linePitch="{line_pitch}" '
|
||||
f'w:charSpace="{char_pitch}"/>')
|
||||
sect_pr.append(dg)
|
||||
|
||||
_setup_footer(sec, roman=roman)
|
||||
|
||||
# ── block processing ────────────────────────────────────────────
|
||||
|
||||
def _process_blocks(self, blocks):
|
||||
# State machine:
|
||||
# before_abstract → abstract_cn → abstract_en → toc → main
|
||||
state = "before_abstract"
|
||||
self._seen_first_chapter = False
|
||||
|
||||
for blk in blocks:
|
||||
t = blk["type"]
|
||||
|
||||
if t == "heading" and blk["level"] == 1:
|
||||
# Skip H1 (thesis title) — not rendered on Chinese abstract
|
||||
continue
|
||||
|
||||
if t == "heading" and blk["level"] == 2:
|
||||
txt = blk["text"].strip()
|
||||
if txt.replace(" ", "") == "摘 要".replace(" ", ""):
|
||||
state = "abstract_cn"
|
||||
self._add_abstract_title("摘 要")
|
||||
continue
|
||||
if txt == "Abstract":
|
||||
self._add_abstract_title_en()
|
||||
state = "abstract_en"
|
||||
continue
|
||||
if "目" in txt and "次" in txt:
|
||||
state = "toc"
|
||||
self._add_toc("目 次")
|
||||
continue
|
||||
# Normal chapter
|
||||
if state in ("before_abstract", "abstract_cn", "abstract_en", "toc"):
|
||||
self._add_section_break_main()
|
||||
state = "main"
|
||||
self._add_page_break_if_not_first()
|
||||
self._add_chapter(txt)
|
||||
continue
|
||||
|
||||
if t == "heading" and blk["level"] == 3:
|
||||
self._ensure_main_section(state)
|
||||
state = "main"
|
||||
txt = blk["text"].strip()
|
||||
if re.match(r"^\d+\.\d+\.\d+\s", txt):
|
||||
self._add_subsection(txt)
|
||||
else:
|
||||
self._add_section(txt)
|
||||
continue
|
||||
|
||||
if t == "heading" and blk["level"] >= 4:
|
||||
self._ensure_main_section(state)
|
||||
state = "main"
|
||||
# headings below 3 → body-style bold
|
||||
self._add_body_para(blk["text"], bold=True, indent=False)
|
||||
continue
|
||||
|
||||
# paragraphs / code / blockquote / list / thematic_break
|
||||
|
||||
if t == "paragraph":
|
||||
txt = blk["text"]
|
||||
if not txt.strip():
|
||||
continue
|
||||
if state == "abstract_cn":
|
||||
if txt.startswith("关键词:"):
|
||||
self._add_keywords(txt, cn=True)
|
||||
else:
|
||||
self._add_abstract_body(txt)
|
||||
continue
|
||||
if state == "abstract_en":
|
||||
if txt.startswith("Key words:"):
|
||||
self._add_keywords(txt, cn=False)
|
||||
else:
|
||||
self._add_abstract_body(txt) # 英文摘要正文
|
||||
continue
|
||||
# Normal body
|
||||
self._ensure_main_section(state)
|
||||
state = "main"
|
||||
if txt.startswith("关键词:"):
|
||||
self._add_keywords(txt, cn=True)
|
||||
elif txt.startswith("Key words:"):
|
||||
self._add_keywords(txt, cn=False)
|
||||
else:
|
||||
self._add_body_para(txt)
|
||||
continue
|
||||
|
||||
if t == "block_code":
|
||||
# code can appear in abstract or main — skip abstract code
|
||||
if state in ("abstract_cn", "abstract_en", "toc"):
|
||||
continue
|
||||
self._ensure_main_section(state)
|
||||
state = "main"
|
||||
self._process_code(blk)
|
||||
continue
|
||||
|
||||
if t == "block_quote":
|
||||
txt = blk.get("text", "").strip()
|
||||
if not txt:
|
||||
continue
|
||||
self._ensure_main_section(state)
|
||||
state = "main"
|
||||
self._add_body_para(txt)
|
||||
continue
|
||||
|
||||
if t == "list":
|
||||
self._ensure_main_section(state)
|
||||
state = "main"
|
||||
for item in blk.get("items", []):
|
||||
self._add_body_para("• " + item)
|
||||
continue
|
||||
|
||||
if t == "thematic_break":
|
||||
# In front matter or already processed — handled by state
|
||||
continue
|
||||
|
||||
def _ensure_main_section(self, state: str):
|
||||
if state in ("before_abstract", "abstract_cn", "abstract_en", "toc"):
|
||||
if not self._section_break_added:
|
||||
self._add_section_break_main()
|
||||
|
||||
def _add_page_break_if_not_first(self):
|
||||
if self._seen_first_chapter:
|
||||
self.doc.add_page_break()
|
||||
else:
|
||||
self._seen_first_chapter = True
|
||||
|
||||
# ══════════════════════════════════════════════════════════════
|
||||
# rendering methods
|
||||
# ══════════════════════════════════════════════════════════════
|
||||
|
||||
# ── abstract ──────────────────────────────────────────────────
|
||||
|
||||
def _add_abstract_title(self, text: str):
|
||||
"""摘要题头:三号宋体加粗居中 (3.3节)"""
|
||||
cfg = self.config
|
||||
p = self.doc.add_paragraph()
|
||||
p.style = self.doc.styles["Heading 1"]
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
_set_spacing(p, before=0, after=0,
|
||||
line_spacing=cfg.line_spacing_heading)
|
||||
run = p.add_run(text)
|
||||
_set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
|
||||
size=cfg.size_abstract_title, bold=True)
|
||||
# blank line after title (§3.3)
|
||||
self.doc.add_paragraph()
|
||||
|
||||
def _add_abstract_body(self, text: str):
|
||||
"""摘要正文:小四宋体,首行缩进2字符"""
|
||||
cfg = self.config
|
||||
p = self.doc.add_paragraph()
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
|
||||
_set_spacing(p, before=0, after=0,
|
||||
line_spacing=cfg.line_spacing_body)
|
||||
_set_indent(p, cfg.first_line_indent_chars)
|
||||
tokens = _parse_inline(text)
|
||||
_add_inline(p, tokens, cfg)
|
||||
|
||||
def _add_abstract_title_en(self):
|
||||
"""英文摘要页:标题+论文题目+作者署名 (2.3节)"""
|
||||
cfg = self.config
|
||||
p = self.doc.add_paragraph()
|
||||
p.style = self.doc.styles["Heading 1"]
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
_set_spacing(p, before=0, after=0,
|
||||
line_spacing=cfg.line_spacing_heading)
|
||||
run = p.add_run("Abstract")
|
||||
_set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
|
||||
size=cfg.size_abstract_title, bold=True)
|
||||
self.doc.add_paragraph()
|
||||
|
||||
# Thesis title in English (centered)
|
||||
if self._thesis_title:
|
||||
# crude English translation placeholder — user should replace
|
||||
p2 = self.doc.add_paragraph()
|
||||
p2.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
_set_spacing(p2, before=0, after=0,
|
||||
line_spacing=cfg.line_spacing_heading)
|
||||
r = p2.add_run(self._thesis_title)
|
||||
_set_font(r, cfg.font_cn_heading, cfg.font_heading_en,
|
||||
size=cfg.size_section, bold=True)
|
||||
|
||||
# Author & teacher line
|
||||
p3 = self.doc.add_paragraph()
|
||||
p3.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
_set_spacing(p3, before=6, after=6,
|
||||
line_spacing=cfg.line_spacing_heading)
|
||||
r = p3.add_run("Student: \tTeacher: ")
|
||||
_set_font(r, cfg.font_cn, cfg.font_en, size=cfg.size_body)
|
||||
|
||||
# ── keywords ──────────────────────────────────────────────────
|
||||
|
||||
def _add_keywords(self, text: str, cn: bool):
|
||||
"""关键词:小四宋体加粗顶格 (3.3节)"""
|
||||
cfg = self.config
|
||||
p = self.doc.add_paragraph()
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||||
_set_spacing(p, before=0, after=0,
|
||||
line_spacing=cfg.line_spacing_body)
|
||||
|
||||
label = cfg.keywords_label_cn if cn else cfg.keywords_label_en
|
||||
m = re.match(r"\*\*" + re.escape(label) + r"\*\*(.*)", text)
|
||||
if m:
|
||||
run = p.add_run(label)
|
||||
_set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
|
||||
size=cfg.size_keyword_label, bold=True)
|
||||
rest = m.group(1).strip()
|
||||
tokens = _parse_inline(rest)
|
||||
_add_inline(p, tokens, cfg, size=cfg.size_keyword_label)
|
||||
else:
|
||||
tokens = _parse_inline(text)
|
||||
_add_inline(p, tokens, cfg, size=cfg.size_keyword_label)
|
||||
|
||||
# ── TOC ────────────────────────────────────────────────────────
|
||||
|
||||
def _add_toc(self, title: str):
|
||||
cfg = self.config
|
||||
p = self.doc.add_paragraph()
|
||||
p.style = self.doc.styles["Heading 1"]
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
_set_spacing(p, before=0, after=0,
|
||||
line_spacing=cfg.line_spacing_heading)
|
||||
run = p.add_run(title)
|
||||
_set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
|
||||
size=cfg.size_abstract_title, bold=True)
|
||||
|
||||
self.doc.add_paragraph() # blank line
|
||||
|
||||
# Word TOC field
|
||||
p2 = self.doc.add_paragraph()
|
||||
_set_spacing(p2, before=0, after=0,
|
||||
line_spacing=cfg.line_spacing_body)
|
||||
r = p2.add_run()
|
||||
r._element.append(parse_xml(
|
||||
f'<w:fldChar {nsdecls("w")} w:fldCharType="begin"/>'))
|
||||
r2 = p2.add_run()
|
||||
r2._element.append(parse_xml(
|
||||
f'<w:instrText {nsdecls("w")} xml:space="preserve">'
|
||||
' TOC \\o "1-3" \\h \\z \\u </w:instrText>'))
|
||||
r3 = p2.add_run()
|
||||
r3._element.append(parse_xml(
|
||||
f'<w:fldChar {nsdecls("w")} w:fldCharType="separate"/>'))
|
||||
r4 = p2.add_run("(请右键此处 > 更新域)")
|
||||
_set_font(r4, cfg.font_cn, cfg.font_en, size=cfg.size_body)
|
||||
r5 = p2.add_run()
|
||||
r5._element.append(parse_xml(
|
||||
f'<w:fldChar {nsdecls("w")} w:fldCharType="end"/>'))
|
||||
|
||||
# ── chapter headings (第一层次) ──────────────────────────────
|
||||
|
||||
def _add_chapter(self, text: str):
|
||||
"""章标题:三号宋体加粗,顶格 (§3.2 表3)"""
|
||||
cfg = self.config
|
||||
p = self.doc.add_paragraph()
|
||||
p.style = self.doc.styles["Heading 1"]
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||||
_set_spacing(p, before=0, after=0,
|
||||
line_spacing=cfg.line_spacing_heading)
|
||||
|
||||
# Ensure double space between number and title (§2.5.2 表1)
|
||||
formatted = re.sub(r"^(\d+)\s+", r"\1 ", text)
|
||||
run = p.add_run(formatted)
|
||||
_set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
|
||||
size=cfg.size_chapter, bold=True)
|
||||
|
||||
# ── section heading (第二层次) ───────────────────────────────
|
||||
|
||||
def _add_section(self, text: str):
|
||||
"""节标题:小三号宋体加粗,顶格 (§3.2 表3)"""
|
||||
cfg = self.config
|
||||
p = self.doc.add_paragraph()
|
||||
p.style = self.doc.styles["Heading 2"]
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||||
_set_spacing(p, before=0, after=0,
|
||||
line_spacing=cfg.line_spacing_heading)
|
||||
|
||||
# Single space between number and title (§2.5.2 表1)
|
||||
formatted = re.sub(r"^(\d+\.\d+)\s+", r"\1 ", text)
|
||||
run = p.add_run(formatted)
|
||||
_set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
|
||||
size=cfg.size_section, bold=True)
|
||||
|
||||
# ── subsection heading (第三层次) ──────────────────────────
|
||||
|
||||
def _add_subsection(self, text: str):
|
||||
"""条标题:四号宋体加粗,顶格 (§3.2 表3)"""
|
||||
cfg = self.config
|
||||
p = self.doc.add_paragraph()
|
||||
p.style = self.doc.styles["Heading 3"]
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||||
_set_spacing(p, before=0, after=0,
|
||||
line_spacing=cfg.line_spacing_heading)
|
||||
run = p.add_run(text)
|
||||
_set_font(run, cfg.font_cn_heading, cfg.font_heading_en,
|
||||
size=cfg.size_subsection, bold=True)
|
||||
|
||||
# ── body paragraph ──────────────────────────────────────────
|
||||
|
||||
def _add_body_para(self, text: str, bold: bool = False,
|
||||
indent: bool = True):
|
||||
"""正文:小四宋体,首行缩进2字符 (§3.2)"""
|
||||
cfg = self.config
|
||||
p = self.doc.add_paragraph()
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
|
||||
_set_spacing(p, before=0, after=0,
|
||||
line_spacing=cfg.line_spacing_body)
|
||||
if indent:
|
||||
_set_indent(p, cfg.first_line_indent_chars)
|
||||
tokens = _parse_inline(text)
|
||||
_add_inline(p, tokens, cfg, bold=bold)
|
||||
|
||||
# ── code block ──────────────────────────────────────────────
|
||||
|
||||
def _process_code(self, blk: dict):
|
||||
code = blk.get("raw", "")
|
||||
if not code.strip():
|
||||
return
|
||||
cfg = self.config
|
||||
p = self.doc.add_paragraph()
|
||||
_set_spacing(p, before=0, after=0,
|
||||
line_spacing=cfg.line_spacing_code)
|
||||
pf = p.paragraph_format
|
||||
pf.left_indent = Cm(0.75)
|
||||
|
||||
pPr = p._element.get_or_add_pPr()
|
||||
shd = parse_xml(
|
||||
f'<w:shd {nsdecls("w")} w:fill="F2F2F2" w:val="clear"/>')
|
||||
pPr.append(shd)
|
||||
|
||||
for line in code.split("\n"):
|
||||
if line:
|
||||
run = p.add_run(line)
|
||||
_set_font(run, cfg.font_code, cfg.font_code,
|
||||
size=cfg.size_code)
|
||||
p.add_run("\n")
|
||||
Reference in New Issue
Block a user