Files
md2word/transit/body.py
zzy 0b10e97e0c feat(transit): 支持Markdown图片转Word图片段落
- 新增 transit/images.py 模块处理图片解析和插入逻辑
- 实现 `<img>` 标签解析为独立图片段落
- 添加图片尺寸检测和自适应缩放功能(超出页面宽度时自动缩放)
- 支持图标题居中显示和图片居中对齐
- 优化 body_to_paragraphs 函数,添加图片处理逻辑
- 更新 README.md 使用说明,添加模板文件要求说明

BREAKING CHANGE: 图片处理方式变更,需要包含 sample.docx 模板文件
2026-05-09 16:17:51 +08:00

156 lines
5.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Markdown 正文 → Word 段落转换。
将正文 Markdown 按标题层级拆分为带样式的段落序列,
再注入到渲染后 docx 文档的占位符位置。
"""
import re
from copy import deepcopy
from docx import Document
from docx.oxml.ns import qn
from .images import make_image_paragraph, is_figure_caption, insert_image_paragraphs
_PAT_HEADING = re.compile(r"^(#{1,6})\s+(.+)$", re.MULTILINE)
def body_to_paragraphs(
md_text: str,
*,
level_offset: int = 0,
body_style: str = "Body Text Indent",
) -> list[dict]:
"""将 Markdown 正文按标题和段落拆分为结构化列表。
Parameters
----------
md_text : str
正文 Markdown。
level_offset : int
标题级别偏移量(正文从 ``##`` 开始时传 ``-1``,使其输出为 ``Heading 1``)。
body_style : str
正文段落的 Word 样式名。
"""
paragraphs: list[dict] = []
last_end = 0
def _add_block(block: str) -> None:
block = block.strip()
if not block:
return
# 图片段落
img = make_image_paragraph(block)
if img:
paragraphs.append(img)
return
# 跳过紧跟在图片后的重复图标题
if paragraphs and paragraphs[-1].get("type") == "image" and is_figure_caption(block):
return
# 普通正文段落
paragraphs.append({"text": block, "level": 0, "style": body_style})
for m in _PAT_HEADING.finditer(md_text):
# 标题前的普通文本
if m.start() > last_end:
pre = md_text[last_end : m.start()].strip()
if pre:
for block in re.split(r"\n\s*\n", pre):
_add_block(block)
level = len(m.group(1)) + level_offset
heading_text = m.group(2).strip()
paragraphs.append(
{"text": heading_text, "level": level, "style": f"Heading {level}"}
)
last_end = m.end()
# 最后一段 / 尾部文本
tail = md_text[last_end:].strip()
if tail:
for block in re.split(r"\n\s*\n", tail):
_add_block(block)
return paragraphs
def replace_placeholder(
doc: Document,
placeholder: str,
paragraphs: list[dict],
*,
default_body_style: str | None = None,
):
"""在 *doc* 中找到包含 *placeholder* 的段落,替换为 *paragraphs* 列表。
正文段落的样式优先级:
1. ``style`` 字段指定的样式名(来自 ``body_to_paragraphs`` 的 ``body_style``
2. 占位符段落自身的样式(模板中已设好的样式)
3. ``Normal``
"""
placeholder_found = False
for para in doc.paragraphs:
if placeholder in para.text:
placeholder_found = True
placeholder_style = para.style.name if para.style else None
parent = para._element.getparent()
idx = list(parent).index(para._element)
# 保存原段落的编号属性numPr用于继承自动编号
orig_pPr = para._element.find(qn("w:pPr"))
numPr = orig_pPr.find(qn("w:numPr")) if orig_pPr is not None else None
parent.remove(para._element)
for pd_data in reversed(paragraphs):
if pd_data.get("type") == "image":
insert_image_paragraphs(
doc, [pd_data], idx=idx, parent=parent
)
else:
new_p = doc.add_paragraph(pd_data["text"])
style_name = pd_data["style"]
# 尝试应用样式,逐步降级
applied = _apply_style(new_p, doc, style_name)
if not applied and style_name.startswith("Heading"):
new_p.style = doc.styles["Normal"]
elif not applied:
if placeholder_style:
_apply_style(new_p, doc, placeholder_style)
if new_p.style.name == "Normal" and placeholder_style:
new_p.style = doc.styles[placeholder_style]
# 继承原段落的编号属性(自动编号)
if numPr is not None:
new_pPr = new_p._element.find(qn("w:pPr"))
if new_pPr is None:
new_pPr = new_p._element.makeelement(qn("w:pPr"), {})
new_p._element.insert(0, new_pPr)
existing = new_pPr.find(qn("w:numPr"))
if existing is not None:
new_pPr.remove(existing)
new_pPr.append(deepcopy(numPr))
parent.insert(idx, new_p._element)
break
if not placeholder_found:
print(f"警告:未找到占位符 '{placeholder}',正文段落未注入。")
def _apply_style(paragraph, doc, style_name: str) -> bool:
"""尝试给段落应用样式,成功返回 ``True``。"""
try:
paragraph.style = doc.styles[style_name]
return True
except KeyError:
pass
# 大小写不敏感匹配
for s in doc.styles:
if s.name.lower() == style_name.lower():
paragraph.style = s
return True
return False