Commit 59b0b0c3 authored by 赵小蒙's avatar 赵小蒙

make markdown时特殊符号转义

parent 8a2736a5
from magic_pdf.libs.markdown_utils import ocr_escape_special_markdown_char
from magic_pdf.libs.ocr_content_type import ContentType
......@@ -14,7 +15,7 @@ def ocr_mk_nlp_markdown(pdf_info_dict: dict):
for span in line['spans']:
if not span.get('content'):
continue
content = span['content'].replace('$', '\$') # 转义$
content = ocr_escape_special_markdown_char(span['content']) # 转义特殊符号
if span['type'] == ContentType.InlineEquation:
content = f"${content}$"
elif span['type'] == ContentType.InterlineEquation:
......@@ -43,7 +44,7 @@ def ocr_mk_mm_markdown(pdf_info_dict: dict):
else:
content = f"![]({span['image_path']})"
else:
content = span['content'].replace('$', '\$') # 转义$
content = ocr_escape_special_markdown_char(span['content']) # 转义特殊符号
if span['type'] == ContentType.InlineEquation:
content = f"${content}$"
elif span['type'] == ContentType.InterlineEquation:
......
......@@ -18,3 +18,14 @@ def escape_special_markdown_char(pymu_blocks):
span['text'] = span['text'].replace(char, "\\" + char)
return pymu_blocks
def ocr_escape_special_markdown_char(content):
"""
转义正文里对markdown语法有特殊意义的字符
"""
special_chars = ["*", "`", "~", "$"]
for char in special_chars:
content = content.replace(char, "\\" + char)
return content
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment