Commit 0dbbf9c3 authored by 赵小蒙's avatar 赵小蒙

解决'[]'括起来的文本被识别成链接的问题

parent 8e3beebd
......@@ -67,16 +67,17 @@ def ocr_mk_mm_markdown_with_para(pdf_info_dict: dict):
for span in line['spans']:
span_type = span.get('type')
if span_type == ContentType.Text:
para_text += span['content']
content = span['content']
elif span_type == ContentType.InlineEquation:
para_text += f" ${span['content']}$ "
content = f" ${span['content']}$ "
elif span_type == ContentType.InterlineEquation:
para_text += f"$$\n{span['content']}\n$$ "
elif span_type in [ ContentType.Image, ContentType.Table ]:
para_text += f"![]({join_path(s3_image_save_path, span['image_path'])})"
markdown.append(para_text)
content = f"$$\n{span['content']}\n$$ "
elif span_type in [ContentType.Image, ContentType.Table]:
content = f"![]({join_path(s3_image_save_path, span['image_path'])})"
para_text += content + ' '
markdown.append(para_text.strip() + ' ')
return '\n\n'.join(markdown)
return '\n'.join(markdown)
def make_standard_format_with_para(pdf_info_dict: dict):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment