feat: 论文
This commit is contained in:
670
docs/_scripts/md_to_docx_henau.py
Normal file
670
docs/_scripts/md_to_docx_henau.py
Normal file
@@ -0,0 +1,670 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from docx import Document
|
||||
from docx.enum.section import WD_SECTION_START
|
||||
from docx.enum.style import WD_STYLE_TYPE
|
||||
from docx.enum.table import WD_TABLE_ALIGNMENT
|
||||
from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_BREAK, WD_LINE_SPACING
|
||||
from docx.oxml import OxmlElement
|
||||
from docx.oxml.ns import qn
|
||||
from docx.shared import Cm, Pt
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
SOURCE = ROOT / 'docs' / '11_毕业论文正文初稿.md'
|
||||
OUTPUT = ROOT / 'docs' / '11_毕业论文正文初稿_河农大格式.docx'
|
||||
|
||||
TABLE_TITLES = [
|
||||
'表 6-1 测试环境',
|
||||
'表 6-2 功能测试结果',
|
||||
'表 6-3 主要接口测试情况',
|
||||
'表 6-4 各模型在测试集上的回归性能',
|
||||
'表 6-5 主要分类模型结果',
|
||||
]
|
||||
|
||||
CHINESE_TITLE_PLACEHOLDER_DATE = '二〇二六年〇月〇日'
|
||||
ENGLISH_TITLE_PLACEHOLDER = 'ENGLISH TITLE PLACEHOLDER'
|
||||
|
||||
FONT_SONG = '宋体'
|
||||
FONT_HEI = '黑体'
|
||||
FONT_KAI = '楷体'
|
||||
FONT_TNR = 'Times New Roman'
|
||||
FONT_MONO = 'Courier New'
|
||||
|
||||
|
||||
@dataclass
|
||||
class Block:
|
||||
kind: str
|
||||
text: str | None = None
|
||||
level: int | None = None
|
||||
lines: list[str] | None = None
|
||||
|
||||
|
||||
def set_run_fonts(run, east_asia: str, ascii_font: str, size: float | int | None = None, bold: bool | None = None):
|
||||
if size is not None:
|
||||
run.font.size = Pt(size)
|
||||
if bold is not None:
|
||||
run.bold = bold
|
||||
|
||||
run.font.name = ascii_font
|
||||
r_pr = run._element.get_or_add_rPr()
|
||||
r_fonts = r_pr.rFonts
|
||||
if r_fonts is None:
|
||||
r_fonts = OxmlElement('w:rFonts')
|
||||
r_pr.insert(0, r_fonts)
|
||||
r_fonts.set(qn('w:ascii'), ascii_font)
|
||||
r_fonts.set(qn('w:hAnsi'), ascii_font)
|
||||
r_fonts.set(qn('w:eastAsia'), east_asia)
|
||||
r_fonts.set(qn('w:cs'), ascii_font)
|
||||
|
||||
|
||||
def set_style_fonts(style, east_asia: str, ascii_font: str, size: float | int, bold: bool = False):
|
||||
style.font.size = Pt(size)
|
||||
style.font.bold = bold
|
||||
style.font.name = ascii_font
|
||||
style._element.rPr.rFonts.set(qn('w:ascii'), ascii_font)
|
||||
style._element.rPr.rFonts.set(qn('w:hAnsi'), ascii_font)
|
||||
style._element.rPr.rFonts.set(qn('w:eastAsia'), east_asia)
|
||||
style._element.rPr.rFonts.set(qn('w:cs'), ascii_font)
|
||||
|
||||
|
||||
def configure_document(doc: Document):
|
||||
for section in doc.sections:
|
||||
apply_page_layout(section)
|
||||
|
||||
normal = doc.styles['Normal']
|
||||
set_style_fonts(normal, FONT_SONG, FONT_TNR, 10.5, False)
|
||||
normal.paragraph_format.line_spacing_rule = WD_LINE_SPACING.EXACTLY
|
||||
normal.paragraph_format.line_spacing = Pt(20)
|
||||
normal.paragraph_format.first_line_indent = Pt(21)
|
||||
normal.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
|
||||
|
||||
heading1 = doc.styles['Heading 1']
|
||||
set_style_fonts(heading1, FONT_HEI, FONT_TNR, 14, True)
|
||||
heading1.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||||
heading1.paragraph_format.first_line_indent = Pt(0)
|
||||
heading1.paragraph_format.space_before = Pt(12)
|
||||
heading1.paragraph_format.space_after = Pt(6)
|
||||
heading1.paragraph_format.line_spacing_rule = WD_LINE_SPACING.EXACTLY
|
||||
heading1.paragraph_format.line_spacing = Pt(20)
|
||||
|
||||
heading2 = doc.styles['Heading 2']
|
||||
set_style_fonts(heading2, FONT_HEI, FONT_TNR, 12, True)
|
||||
heading2.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||||
heading2.paragraph_format.first_line_indent = Pt(0)
|
||||
heading2.paragraph_format.space_before = Pt(10)
|
||||
heading2.paragraph_format.space_after = Pt(4)
|
||||
heading2.paragraph_format.line_spacing_rule = WD_LINE_SPACING.EXACTLY
|
||||
heading2.paragraph_format.line_spacing = Pt(20)
|
||||
|
||||
heading3 = doc.styles['Heading 3']
|
||||
set_style_fonts(heading3, FONT_HEI, FONT_TNR, 10.5, True)
|
||||
heading3.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||||
heading3.paragraph_format.first_line_indent = Pt(0)
|
||||
heading3.paragraph_format.space_before = Pt(8)
|
||||
heading3.paragraph_format.space_after = Pt(3)
|
||||
heading3.paragraph_format.line_spacing_rule = WD_LINE_SPACING.EXACTLY
|
||||
heading3.paragraph_format.line_spacing = Pt(20)
|
||||
|
||||
heading4 = doc.styles['Heading 4']
|
||||
set_style_fonts(heading4, FONT_HEI, FONT_TNR, 10.5, True)
|
||||
heading4.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||||
heading4.paragraph_format.first_line_indent = Pt(0)
|
||||
heading4.paragraph_format.space_before = Pt(6)
|
||||
heading4.paragraph_format.space_after = Pt(3)
|
||||
heading4.paragraph_format.line_spacing_rule = WD_LINE_SPACING.EXACTLY
|
||||
heading4.paragraph_format.line_spacing = Pt(20)
|
||||
|
||||
for toc_name in ('TOC 1', 'TOC 2', 'TOC 3'):
|
||||
if toc_name in doc.styles:
|
||||
toc_style = doc.styles[toc_name]
|
||||
set_style_fonts(toc_style, FONT_SONG, FONT_TNR, 12, False)
|
||||
toc_style.paragraph_format.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
|
||||
toc_style.paragraph_format.line_spacing = 1.5
|
||||
|
||||
if 'CodeBlock' not in doc.styles:
|
||||
code_style = doc.styles.add_style('CodeBlock', WD_STYLE_TYPE.PARAGRAPH)
|
||||
else:
|
||||
code_style = doc.styles['CodeBlock']
|
||||
set_style_fonts(code_style, FONT_MONO, FONT_MONO, 10.5, False)
|
||||
code_style.paragraph_format.first_line_indent = Pt(0)
|
||||
code_style.paragraph_format.left_indent = Pt(12)
|
||||
code_style.paragraph_format.line_spacing_rule = WD_LINE_SPACING.EXACTLY
|
||||
code_style.paragraph_format.line_spacing = Pt(20)
|
||||
|
||||
enable_update_fields_on_open(doc)
|
||||
|
||||
|
||||
def apply_page_layout(section):
|
||||
section.page_width = Cm(21)
|
||||
section.page_height = Cm(29.7)
|
||||
section.top_margin = Cm(2.5)
|
||||
section.bottom_margin = Cm(2.5)
|
||||
section.left_margin = Cm(3)
|
||||
section.right_margin = Cm(3)
|
||||
|
||||
|
||||
def enable_update_fields_on_open(doc: Document):
|
||||
settings = doc.settings.element
|
||||
update_fields = settings.find(qn('w:updateFields'))
|
||||
if update_fields is None:
|
||||
update_fields = OxmlElement('w:updateFields')
|
||||
settings.append(update_fields)
|
||||
update_fields.set(qn('w:val'), 'true')
|
||||
|
||||
|
||||
def add_field(paragraph, instruction: str, placeholder: str | None = None):
|
||||
begin = OxmlElement('w:fldChar')
|
||||
begin.set(qn('w:fldCharType'), 'begin')
|
||||
|
||||
instr = OxmlElement('w:instrText')
|
||||
instr.set(qn('xml:space'), 'preserve')
|
||||
instr.text = instruction
|
||||
|
||||
separate = OxmlElement('w:fldChar')
|
||||
separate.set(qn('w:fldCharType'), 'separate')
|
||||
|
||||
end = OxmlElement('w:fldChar')
|
||||
end.set(qn('w:fldCharType'), 'end')
|
||||
|
||||
paragraph._p.append(begin)
|
||||
paragraph._p.append(instr)
|
||||
paragraph._p.append(separate)
|
||||
|
||||
if placeholder:
|
||||
run = paragraph.add_run(placeholder)
|
||||
set_run_fonts(run, FONT_SONG, FONT_TNR, 12, False)
|
||||
|
||||
paragraph._p.append(end)
|
||||
|
||||
|
||||
def configure_footer_page_number(section, fmt: str, start: int):
|
||||
sect_pr = section._sectPr
|
||||
pg_num_type = sect_pr.find(qn('w:pgNumType'))
|
||||
if pg_num_type is None:
|
||||
pg_num_type = OxmlElement('w:pgNumType')
|
||||
sect_pr.append(pg_num_type)
|
||||
pg_num_type.set(qn('w:fmt'), fmt)
|
||||
pg_num_type.set(qn('w:start'), str(start))
|
||||
|
||||
section.footer.is_linked_to_previous = False
|
||||
footer_p = section.footer.paragraphs[0]
|
||||
footer_p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
add_field(footer_p, ' PAGE ')
|
||||
for run in footer_p.runs:
|
||||
set_run_fonts(run, FONT_TNR, FONT_TNR, 10.5, False)
|
||||
|
||||
|
||||
def add_cover(doc: Document, title: str):
|
||||
for _ in range(4):
|
||||
doc.add_paragraph()
|
||||
|
||||
title_p = doc.add_paragraph()
|
||||
title_p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
title_p.paragraph_format.space_after = Pt(18)
|
||||
run = title_p.add_run(title)
|
||||
set_run_fonts(run, FONT_HEI, FONT_TNR, 18, True)
|
||||
|
||||
en_title_p = doc.add_paragraph()
|
||||
en_title_p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
en_title_p.paragraph_format.space_after = Pt(30)
|
||||
run = en_title_p.add_run(ENGLISH_TITLE_PLACEHOLDER)
|
||||
set_run_fonts(run, FONT_TNR, FONT_TNR, 18, True)
|
||||
|
||||
for _ in range(4):
|
||||
doc.add_paragraph()
|
||||
|
||||
cover_lines = [
|
||||
'作 者:________________________',
|
||||
'学 院:________________________',
|
||||
'专 业:________________________',
|
||||
'班 级:________________________',
|
||||
'学 号:________________________',
|
||||
'指导教师:________________________',
|
||||
f'完成日期:{CHINESE_TITLE_PLACEHOLDER_DATE}',
|
||||
]
|
||||
|
||||
for line in cover_lines:
|
||||
p = doc.add_paragraph()
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
p.paragraph_format.space_after = Pt(8)
|
||||
run = p.add_run(line)
|
||||
set_run_fonts(run, FONT_SONG, FONT_TNR, 15, False)
|
||||
|
||||
|
||||
def add_toc_page(doc: Document):
|
||||
toc_title = doc.add_paragraph()
|
||||
toc_title.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
toc_title.paragraph_format.space_before = Pt(16)
|
||||
toc_title.paragraph_format.space_after = Pt(12)
|
||||
toc_title.paragraph_format.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
|
||||
toc_title.paragraph_format.line_spacing = 1.5
|
||||
run = toc_title.add_run('目 录')
|
||||
set_run_fonts(run, FONT_HEI, FONT_TNR, 16, True)
|
||||
|
||||
toc_p = doc.add_paragraph()
|
||||
add_field(toc_p, r' TOC \o "1-3" \h \z \u ', '右键更新目录')
|
||||
doc.add_page_break()
|
||||
|
||||
|
||||
def add_heading_paragraph(doc: Document, text: str, style_name: str, align=WD_ALIGN_PARAGRAPH.LEFT, size: float | int | None = None):
|
||||
p = doc.add_paragraph(style=style_name)
|
||||
p.alignment = align
|
||||
if size is not None:
|
||||
for run in p.runs:
|
||||
run.clear()
|
||||
run = p.add_run(text)
|
||||
style_east_asia = FONT_HEI
|
||||
style_ascii = FONT_TNR
|
||||
if style_name == 'Heading 1':
|
||||
style_east_asia = FONT_HEI
|
||||
style_ascii = FONT_TNR
|
||||
elif style_name == 'Heading 2':
|
||||
style_east_asia = FONT_HEI
|
||||
elif style_name == 'Heading 3':
|
||||
style_east_asia = FONT_HEI
|
||||
elif style_name == 'Heading 4':
|
||||
style_east_asia = FONT_HEI
|
||||
set_run_fonts(run, style_east_asia, style_ascii, size or 10.5, True)
|
||||
return p
|
||||
|
||||
|
||||
def render_inline(paragraph, text: str, east_asia: str, ascii_font: str, size: float | int, default_bold: bool = False):
|
||||
token_pattern = re.compile(r'(\*\*.*?\*\*|`[^`]+`)')
|
||||
parts = token_pattern.split(text)
|
||||
for part in parts:
|
||||
if not part:
|
||||
continue
|
||||
if part.startswith('**') and part.endswith('**'):
|
||||
run = paragraph.add_run(part[2:-2])
|
||||
set_run_fonts(run, east_asia, ascii_font, size, True)
|
||||
elif part.startswith('`') and part.endswith('`'):
|
||||
run = paragraph.add_run(part[1:-1])
|
||||
set_run_fonts(run, FONT_MONO, FONT_MONO, size, False)
|
||||
else:
|
||||
run = paragraph.add_run(part)
|
||||
set_run_fonts(run, east_asia, ascii_font, size, default_bold)
|
||||
|
||||
|
||||
def add_body_paragraph(doc: Document, text: str, indent: bool = True, hanging: bool = False):
|
||||
p = doc.add_paragraph()
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
|
||||
p.paragraph_format.line_spacing_rule = WD_LINE_SPACING.EXACTLY
|
||||
p.paragraph_format.line_spacing = Pt(20)
|
||||
if hanging:
|
||||
p.paragraph_format.left_indent = Pt(21)
|
||||
p.paragraph_format.first_line_indent = Pt(-21)
|
||||
else:
|
||||
p.paragraph_format.first_line_indent = Pt(21) if indent else Pt(0)
|
||||
render_inline(p, text, FONT_SONG, FONT_TNR, 10.5, False)
|
||||
return p
|
||||
|
||||
|
||||
def add_code_paragraph(doc: Document, text: str):
|
||||
p = doc.add_paragraph(style='CodeBlock')
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||||
p.paragraph_format.first_line_indent = Pt(0)
|
||||
run = p.add_run(text)
|
||||
set_run_fonts(run, FONT_MONO, FONT_MONO, 10.5, False)
|
||||
return p
|
||||
|
||||
|
||||
def clean_title_text(text: str) -> str:
|
||||
match = re.match(r'^第(\d+)章\s+(.+)$', text)
|
||||
if match:
|
||||
return f'{match.group(1)} {match.group(2)}'
|
||||
return text
|
||||
|
||||
|
||||
def split_cells(line: str) -> list[str]:
|
||||
parts = [part.strip() for part in line.strip().strip('|').split('|')]
|
||||
return parts
|
||||
|
||||
|
||||
def ensure_cell_has_paragraph(cell):
|
||||
if cell.paragraphs:
|
||||
p = cell.paragraphs[0]
|
||||
p.clear()
|
||||
return p
|
||||
return cell.add_paragraph()
|
||||
|
||||
|
||||
def set_cell_border(cell, **kwargs):
|
||||
tc = cell._tc
|
||||
tc_pr = tc.get_or_add_tcPr()
|
||||
tc_borders = tc_pr.first_child_found_in('w:tcBorders')
|
||||
if tc_borders is None:
|
||||
tc_borders = OxmlElement('w:tcBorders')
|
||||
tc_pr.append(tc_borders)
|
||||
|
||||
for edge in ('top', 'left', 'bottom', 'right', 'insideH', 'insideV'):
|
||||
edge_data = kwargs.get(edge)
|
||||
if edge_data:
|
||||
tag = 'w:' + edge
|
||||
element = tc_borders.find(qn(tag))
|
||||
if element is None:
|
||||
element = OxmlElement(tag)
|
||||
tc_borders.append(element)
|
||||
for key, value in edge_data.items():
|
||||
element.set(qn(f'w:{key}'), str(value))
|
||||
|
||||
|
||||
def remove_all_table_borders(table):
|
||||
tbl_pr = table._tbl.tblPr
|
||||
tbl_borders = tbl_pr.first_child_found_in('w:tblBorders')
|
||||
if tbl_borders is None:
|
||||
tbl_borders = OxmlElement('w:tblBorders')
|
||||
tbl_pr.append(tbl_borders)
|
||||
|
||||
for edge in ('top', 'left', 'bottom', 'right', 'insideH', 'insideV'):
|
||||
element = tbl_borders.find(qn(f'w:{edge}'))
|
||||
if element is None:
|
||||
element = OxmlElement(f'w:{edge}')
|
||||
tbl_borders.append(element)
|
||||
element.set(qn('w:val'), 'nil')
|
||||
|
||||
|
||||
def apply_three_line_table(table):
|
||||
remove_all_table_borders(table)
|
||||
row_count = len(table.rows)
|
||||
col_count = len(table.columns)
|
||||
|
||||
for row in table.rows:
|
||||
for cell in row.cells:
|
||||
set_cell_border(
|
||||
cell,
|
||||
top={'val': 'nil'},
|
||||
bottom={'val': 'nil'},
|
||||
left={'val': 'nil'},
|
||||
right={'val': 'nil'},
|
||||
)
|
||||
|
||||
for col in range(col_count):
|
||||
set_cell_border(
|
||||
table.rows[0].cells[col],
|
||||
top={'val': 'single', 'sz': 12, 'space': 0, 'color': '000000'},
|
||||
bottom={'val': 'single', 'sz': 4, 'space': 0, 'color': '000000'},
|
||||
left={'val': 'nil'},
|
||||
right={'val': 'nil'},
|
||||
)
|
||||
set_cell_border(
|
||||
table.rows[row_count - 1].cells[col],
|
||||
bottom={'val': 'single', 'sz': 12, 'space': 0, 'color': '000000'},
|
||||
left={'val': 'nil'},
|
||||
right={'val': 'nil'},
|
||||
)
|
||||
|
||||
|
||||
def add_table_title(doc: Document, title: str):
|
||||
p = doc.add_paragraph()
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
p.paragraph_format.first_line_indent = Pt(0)
|
||||
p.paragraph_format.space_before = Pt(6)
|
||||
p.paragraph_format.space_after = Pt(2)
|
||||
run = p.add_run(title)
|
||||
set_run_fonts(run, FONT_HEI, FONT_TNR, 9, True)
|
||||
|
||||
|
||||
def add_markdown_table(doc: Document, lines: list[str], title: str):
|
||||
add_table_title(doc, title)
|
||||
|
||||
header = split_cells(lines[0])
|
||||
body_lines = lines[2:]
|
||||
rows = [split_cells(line) for line in body_lines]
|
||||
|
||||
table = doc.add_table(rows=len(rows) + 1, cols=len(header))
|
||||
table.alignment = WD_TABLE_ALIGNMENT.CENTER
|
||||
table.autofit = True
|
||||
|
||||
for col, text in enumerate(header):
|
||||
cell = table.rows[0].cells[col]
|
||||
p = ensure_cell_has_paragraph(cell)
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
p.paragraph_format.first_line_indent = Pt(0)
|
||||
p.paragraph_format.line_spacing_rule = WD_LINE_SPACING.EXACTLY
|
||||
p.paragraph_format.line_spacing = Pt(14)
|
||||
run = p.add_run(text)
|
||||
set_run_fonts(run, FONT_SONG, FONT_TNR, 9, True)
|
||||
|
||||
for row_idx, row in enumerate(rows, start=1):
|
||||
for col_idx, text in enumerate(row):
|
||||
cell = table.rows[row_idx].cells[col_idx]
|
||||
p = ensure_cell_has_paragraph(cell)
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
p.paragraph_format.first_line_indent = Pt(0)
|
||||
p.paragraph_format.line_spacing_rule = WD_LINE_SPACING.EXACTLY
|
||||
p.paragraph_format.line_spacing = Pt(14)
|
||||
run = p.add_run(text)
|
||||
set_run_fonts(run, FONT_SONG, FONT_TNR, 9, False)
|
||||
|
||||
apply_three_line_table(table)
|
||||
doc.add_paragraph()
|
||||
|
||||
|
||||
def parse_markdown_blocks(lines: list[str]) -> list[Block]:
|
||||
blocks: list[Block] = []
|
||||
paragraph_lines: list[str] = []
|
||||
i = 0
|
||||
in_code = False
|
||||
code_lines: list[str] = []
|
||||
|
||||
def flush_paragraph():
|
||||
if paragraph_lines:
|
||||
blocks.append(Block(kind='paragraph', text=''.join(paragraph_lines).strip()))
|
||||
paragraph_lines.clear()
|
||||
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
stripped = line.strip()
|
||||
|
||||
if stripped.startswith('```'):
|
||||
flush_paragraph()
|
||||
if in_code:
|
||||
blocks.append(Block(kind='code', lines=code_lines.copy()))
|
||||
code_lines.clear()
|
||||
in_code = False
|
||||
else:
|
||||
in_code = True
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if in_code:
|
||||
code_lines.append(line.rstrip('\n'))
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if stripped == '':
|
||||
flush_paragraph()
|
||||
i += 1
|
||||
continue
|
||||
|
||||
heading_match = re.match(r'^(#{2,4})\s+(.+)$', stripped)
|
||||
if heading_match:
|
||||
flush_paragraph()
|
||||
blocks.append(
|
||||
Block(
|
||||
kind='heading',
|
||||
level=len(heading_match.group(1)),
|
||||
text=heading_match.group(2).strip(),
|
||||
)
|
||||
)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if stripped.startswith('|'):
|
||||
flush_paragraph()
|
||||
table_lines: list[str] = []
|
||||
while i < len(lines) and lines[i].strip().startswith('|'):
|
||||
table_lines.append(lines[i].strip())
|
||||
i += 1
|
||||
blocks.append(Block(kind='table', lines=table_lines))
|
||||
continue
|
||||
|
||||
list_match = re.match(r'^(\d+\.\s+|[-*]\s+)(.+)$', stripped)
|
||||
if list_match:
|
||||
flush_paragraph()
|
||||
blocks.append(Block(kind='list_item', text=stripped))
|
||||
i += 1
|
||||
continue
|
||||
|
||||
paragraph_lines.append(stripped)
|
||||
i += 1
|
||||
|
||||
flush_paragraph()
|
||||
return blocks
|
||||
|
||||
|
||||
def split_document_blocks(blocks: list[Block]) -> tuple[list[Block], list[Block]]:
|
||||
abstract_blocks: list[Block] = []
|
||||
body_blocks: list[Block] = []
|
||||
in_body = False
|
||||
|
||||
for block in blocks:
|
||||
if block.kind == 'heading' and block.text and re.match(r'^第\d+章\s+', block.text):
|
||||
in_body = True
|
||||
if in_body:
|
||||
body_blocks.append(block)
|
||||
else:
|
||||
abstract_blocks.append(block)
|
||||
return abstract_blocks, body_blocks
|
||||
|
||||
|
||||
def read_source() -> tuple[str, list[Block]]:
|
||||
text = SOURCE.read_text(encoding='utf-8')
|
||||
lines = text.splitlines()
|
||||
if not lines or not lines[0].startswith('# '):
|
||||
raise ValueError('源 Markdown 缺少一级标题作为论文题目。')
|
||||
|
||||
title = lines[0][2:].strip()
|
||||
content_lines = lines[1:]
|
||||
|
||||
cutoff = len(content_lines)
|
||||
for idx, line in enumerate(content_lines):
|
||||
if line.strip() == '## 后续补强建议':
|
||||
cutoff = idx
|
||||
break
|
||||
|
||||
content_lines = content_lines[:cutoff]
|
||||
blocks = parse_markdown_blocks(content_lines)
|
||||
return title, blocks
|
||||
|
||||
|
||||
def add_section_heading(doc: Document, text: str, level: int):
|
||||
if level == 2:
|
||||
normalized = clean_title_text(text)
|
||||
p = doc.add_paragraph(style='Heading 1')
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||||
p.paragraph_format.first_line_indent = Pt(0)
|
||||
p.clear()
|
||||
run = p.add_run(normalized)
|
||||
set_run_fonts(run, FONT_HEI, FONT_TNR, 14, True)
|
||||
return
|
||||
|
||||
if level == 3:
|
||||
p = doc.add_paragraph(style='Heading 2')
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||||
p.paragraph_format.first_line_indent = Pt(0)
|
||||
p.clear()
|
||||
run = p.add_run(text)
|
||||
set_run_fonts(run, FONT_HEI, FONT_TNR, 12, True)
|
||||
return
|
||||
|
||||
p = doc.add_paragraph(style='Heading 3')
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.LEFT
|
||||
p.paragraph_format.first_line_indent = Pt(0)
|
||||
p.clear()
|
||||
run = p.add_run(text)
|
||||
set_run_fonts(run, FONT_HEI, FONT_TNR, 10.5, True)
|
||||
|
||||
|
||||
def render_blocks(doc: Document, blocks: list[Block], is_abstract: bool = False):
|
||||
table_index = 0
|
||||
current_section = '摘要' if is_abstract else ''
|
||||
|
||||
for block in blocks:
|
||||
if block.kind == 'heading':
|
||||
if is_abstract and block.text == '摘要':
|
||||
current_section = '摘要'
|
||||
p = doc.add_paragraph(style='Heading 1')
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
p.paragraph_format.first_line_indent = Pt(0)
|
||||
p.paragraph_format.space_before = Pt(16)
|
||||
p.paragraph_format.space_after = Pt(10)
|
||||
p.clear()
|
||||
run = p.add_run('摘 要')
|
||||
set_run_fonts(run, FONT_HEI, FONT_TNR, 16, True)
|
||||
else:
|
||||
heading_text = block.text or ''
|
||||
current_section = heading_text
|
||||
if heading_text in ('参考文献', '致谢'):
|
||||
doc.add_page_break()
|
||||
p = doc.add_paragraph(style='Heading 1')
|
||||
p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
p.paragraph_format.first_line_indent = Pt(0)
|
||||
p.paragraph_format.line_spacing_rule = WD_LINE_SPACING.MULTIPLE
|
||||
p.paragraph_format.line_spacing = 1.5
|
||||
p.clear()
|
||||
run = p.add_run(heading_text)
|
||||
set_run_fonts(run, FONT_HEI, FONT_TNR, 14, True)
|
||||
else:
|
||||
add_section_heading(doc, heading_text, block.level or 3)
|
||||
continue
|
||||
|
||||
if block.kind == 'paragraph':
|
||||
text = block.text or ''
|
||||
no_indent = text.startswith('**关键词') or text.startswith('关键词:')
|
||||
is_reference = current_section == '参考文献' and re.match(r'^\[\d+\]', text)
|
||||
add_body_paragraph(doc, text, indent=not no_indent and not is_reference, hanging=is_reference)
|
||||
continue
|
||||
|
||||
if block.kind == 'list_item':
|
||||
add_body_paragraph(doc, block.text or '', indent=False)
|
||||
continue
|
||||
|
||||
if block.kind == 'code':
|
||||
for line in block.lines or []:
|
||||
add_code_paragraph(doc, line)
|
||||
if not block.lines:
|
||||
add_code_paragraph(doc, '')
|
||||
continue
|
||||
|
||||
if block.kind == 'table':
|
||||
title = TABLE_TITLES[table_index] if table_index < len(TABLE_TITLES) else f'表 {table_index + 1}'
|
||||
add_markdown_table(doc, block.lines or [], title)
|
||||
table_index += 1
|
||||
|
||||
|
||||
def build_document():
|
||||
title, blocks = read_source()
|
||||
abstract_blocks, body_blocks = split_document_blocks(blocks)
|
||||
|
||||
doc = Document()
|
||||
configure_document(doc)
|
||||
add_cover(doc, title)
|
||||
|
||||
preface_section = doc.add_section(WD_SECTION_START.NEW_PAGE)
|
||||
apply_page_layout(preface_section)
|
||||
configure_footer_page_number(preface_section, 'upperRoman', 1)
|
||||
|
||||
add_toc_page(doc)
|
||||
render_blocks(doc, abstract_blocks, is_abstract=True)
|
||||
|
||||
body_section = doc.add_section(WD_SECTION_START.NEW_PAGE)
|
||||
apply_page_layout(body_section)
|
||||
configure_footer_page_number(body_section, 'decimal', 1)
|
||||
|
||||
render_blocks(doc, body_blocks, is_abstract=False)
|
||||
doc.save(OUTPUT)
|
||||
|
||||
|
||||
def main():
|
||||
build_document()
|
||||
print(f'Generated: {OUTPUT}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user