diff --git a/transit/body.py b/transit/body.py index d372879..5043ff4 100644 --- a/transit/body.py +++ b/transit/body.py @@ -7,6 +7,7 @@ Markdown 正文 → Word 段落转换。 import re from copy import deepcopy +from pathlib import Path from docx import Document from docx.oxml.ns import qn @@ -15,12 +16,16 @@ from .images import make_image_paragraph, is_figure_caption, insert_image_paragr _PAT_HEADING = re.compile(r"^(#{1,6})\s+(.+)$", re.MULTILINE) +# 匹配正文中的引用标记 [1] / [1,2,3] +_CITE_PATTERN = re.compile(r"\[(\d+(?:[,,\s]*\d+)*)\]") + def body_to_paragraphs( md_text: str, *, level_offset: int = 0, body_style: str = "Body Text Indent", + base_dir: str | Path | None = None, ) -> list[dict]: """将 Markdown 正文按标题和段落拆分为结构化列表。 @@ -32,6 +37,8 @@ def body_to_paragraphs( 标题级别偏移量(正文从 ``##`` 开始时传 ``-1``,使其输出为 ``Heading 1``)。 body_style : str 正文段落的 Word 样式名。 + base_dir : str | Path | None + Markdown 文件所在目录,用于解析图片相对路径。 """ paragraphs: list[dict] = [] last_end = 0 @@ -41,7 +48,7 @@ def body_to_paragraphs( if not block: return # 图片段落 - img = make_image_paragraph(block) + img = make_image_paragraph(block, base_dir) if img: paragraphs.append(img) return @@ -103,6 +110,9 @@ def replace_placeholder( parent.remove(para._element) + # 为参考文献段落准备书签 ID + bm_id = _max_bookmark_id(doc) + 1 + for pd_data in reversed(paragraphs): if pd_data.get("type") == "image": insert_image_paragraphs( @@ -134,12 +144,123 @@ def replace_placeholder( new_pPr.append(deepcopy(numPr)) parent.insert(idx, new_p._element) + + # 为参考文献条目添加书签 + ref_id = pd_data.get("ref_id") + if ref_id is not None: + _add_bookmark(new_p, f"ref-{ref_id}", bm_id) + bm_id += 1 break if not placeholder_found: print(f"警告:未找到占位符 '{placeholder}',正文段落未注入。") +def link_body_citations(doc: Document): + """将文档中正文段落的 ``[N]`` 引用替换为指向对应书签的超链接。""" + for para in doc.paragraphs: + # 跳过已有超链接的段落(如目录页) + if para._element.findall(qn("w:hyperlink")): + continue + _link_paragraph(para) + + +def _max_bookmark_id(doc: Document) -> int: + """扫描文档返回最大书签 ID。""" + max_id = 0 + for para in doc.paragraphs: + for bm in para._element.iter(qn("w:bookmarkStart")): + try: + max_id = max(max_id, int(bm.get(qn("w:id")))) + except (ValueError, TypeError): + pass + return max_id + + +def _add_bookmark(paragraph, name: str, bm_id: int): + """为段落添加书签。""" + bm_start = paragraph._element.makeelement(qn("w:bookmarkStart"), {}) + bm_start.set(qn("w:id"), str(bm_id)) + bm_start.set(qn("w:name"), name) + + bm_end = paragraph._element.makeelement(qn("w:bookmarkEnd"), {}) + bm_end.set(qn("w:id"), str(bm_id)) + + pPr = paragraph._element.find(qn("w:pPr")) + if pPr is not None: + paragraph._element.insert(1, bm_start) + else: + paragraph._element.insert(0, bm_start) + paragraph._element.append(bm_end) + + +def _link_paragraph(para): + """将单个段落中的 ``[N]`` 替换为 HYPERLINK 域。""" + runs = list(para._element.findall(qn("w:r"))) + if not runs: + return + + full_text = "" + for r in runs: + t = r.find(qn("w:t")) + if t is not None and t.text: + full_text += t.text + + matches = list(_CITE_PATTERN.finditer(full_text)) + if not matches: + return + + first_rPr = runs[0].find(qn("w:rPr")) + for r in runs: + para._element.remove(r) + + pos = 0 + for m in matches: + before = full_text[pos : m.start()] + if before: + _add_run(para._element, before, first_rPr) + + nums = re.findall(r"\d+", m.group(1)) + _add_hlink(para._element, f"ref-{nums[0]}", m.group()) + + pos = m.end() + + after = full_text[pos:] + if after: + _add_run(para._element, after, first_rPr) + + +def _add_run(parent, text: str, rPr): + r = parent.makeelement(qn("w:r"), {}) + if rPr is not None: + r.append(deepcopy(rPr)) + t = r.makeelement(qn("w:t"), {}) + t.text = text + r.append(t) + parent.append(r) + + +def _add_hlink(parent, anchor: str, text: str): + hl = parent.makeelement(qn("w:hyperlink"), {}) + hl.set(qn("w:anchor"), anchor) + + r = parent.makeelement(qn("w:r"), {}) + rPr = r.makeelement(qn("w:rPr"), {}) + rStyle = rPr.makeelement(qn("w:rStyle"), {}) + rStyle.set(qn("w:val"), "Hyperlink") + rPr.append(rStyle) + vertAlign = rPr.makeelement(qn("w:vertAlign"), {}) + vertAlign.set(qn("w:val"), "superscript") + rPr.append(vertAlign) + r.append(rPr) + + t = r.makeelement(qn("w:t"), {}) + t.text = text + r.append(t) + hl.append(r) + parent.append(hl) + + def _apply_style(paragraph, doc, style_name: str) -> bool: """尝试给段落应用样式,成功返回 ``True``。""" try: diff --git a/transit/images.py b/transit/images.py index 6c41a3b..63277cd 100644 --- a/transit/images.py +++ b/transit/images.py @@ -7,6 +7,7 @@ import struct import re +from pathlib import Path from docx import Document from docx.enum.text import WD_ALIGN_PARAGRAPH @@ -25,15 +26,26 @@ _FIG_CAPTION = re.compile( ) -def make_image_paragraph(block: str) -> dict | None: - """若 *block* 包含 ```` 标签,返回图片段落字典;否则返回 ``None``。""" +def make_image_paragraph(block: str, base_dir: str | Path | None = None) -> dict | None: + """若 *block* 包含 ```` 标签,返回图片段落字典;否则返回 ``None``。 + + Parameters + ---------- + block : str + 文本块。 + base_dir : str | Path | None + Markdown 文件所在目录,用于解析图片相对路径。 + """ m = _IMG_TAG.search(block) if not m: return None attrs = dict(_ATTR.findall(block)) + src = attrs.get("src", m.group(2)) + if base_dir and not Path(src).is_absolute(): + src = str(Path(base_dir) / src) return { "type": "image", - "src": attrs.get("src", m.group(2)), + "src": src, "alt": attrs.get("alt", ""), } diff --git a/transit/references.py b/transit/references.py index 5dcb3ea..fc36eea 100644 --- a/transit/references.py +++ b/transit/references.py @@ -104,11 +104,15 @@ def references_to_paragraphs( ref = parse_reference_line(line) if ref: formatted = format_gb7714(ref) + ref_id = ref["number"] else: # 无法解析时,至少去掉 [N] 前缀 fallback = re.sub(r"^\[\d+\]\s*", "", line) formatted = _normalize_period(fallback) + ref_id = None - paragraphs.append({"text": formatted, "level": 0, "style": ref_style}) + paragraphs.append( + {"text": formatted, "level": 0, "style": ref_style, "ref_id": ref_id} + ) return paragraphs diff --git a/transit/renderer.py b/transit/renderer.py index e7fdf73..92d3bc7 100644 --- a/transit/renderer.py +++ b/transit/renderer.py @@ -11,7 +11,7 @@ from docx import Document from .config import load_config, ThesisConfig from .parser import parse_markdown -from .body import body_to_paragraphs, replace_placeholder +from .body import body_to_paragraphs, replace_placeholder, link_body_citations from .references import references_to_paragraphs @@ -88,7 +88,12 @@ def generate_thesis( # 5. 解析正文为段落列表 body_md = ctx.get("body_md", "") body_paragraphs = ( - body_to_paragraphs(body_md, level_offset=config.level_offset, body_style=config.body_style) + body_to_paragraphs( + body_md, + level_offset=config.level_offset, + body_style=config.body_style, + base_dir=data_path.parent, + ) if body_md else [] ) @@ -114,6 +119,10 @@ def generate_thesis( final_doc, "__CONTEXT_PLACEHOLDER__", body_paragraphs, default_body_style=config.body_style, ) + + # 将正文中的 [N] 引用替换为超链接 + link_body_citations(final_doc) + replace_placeholder( final_doc, "__REFERENCE_PLACEHOLDER__", ref_paragraphs, default_body_style=config.reference_style,