Commit d3ee9abb authored by 赵小蒙's avatar 赵小蒙

更新ocr_mk_mm_markdown_with_para_core逻辑

parent 07e4f115
...@@ -107,6 +107,7 @@ def ocr_mk_mm_markdown_with_para_core(paras_of_layout, mode): ...@@ -107,6 +107,7 @@ def ocr_mk_mm_markdown_with_para_core(paras_of_layout, mode):
for line in para: for line in para:
for span in line['spans']: for span in line['spans']:
span_type = span.get('type') span_type = span.get('type')
content = ''
if span_type == ContentType.Text: if span_type == ContentType.Text:
content = split_long_words(span['content']) content = split_long_words(span['content'])
# content = span['content'] # content = span['content']
...@@ -119,7 +120,7 @@ def ocr_mk_mm_markdown_with_para_core(paras_of_layout, mode): ...@@ -119,7 +120,7 @@ def ocr_mk_mm_markdown_with_para_core(paras_of_layout, mode):
content = f"\n![]({join_path(s3_image_save_path, span['image_path'])})\n" content = f"\n![]({join_path(s3_image_save_path, span['image_path'])})\n"
elif mode == 'nlp': elif mode == 'nlp':
pass pass
if content: if content != '':
para_text += content + ' ' para_text += content + ' '
page_markdown.append(para_text.strip() + ' ') page_markdown.append(para_text.strip() + ' ')
return page_markdown return page_markdown
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment