Commit dbe79ba1 authored by 赵小蒙's avatar 赵小蒙

ocr_mk_mm_markdown_with_para_and_pagination逻辑更新

parent f36c2656
......@@ -99,9 +99,10 @@ def ocr_mk_mm_markdown_with_para_and_pagination(pdf_info_dict: dict):
markdown_with_para_and_pagination = []
for page_no, page_info in pdf_info_dict.items():
page_markdown = []
paras = page_info.get("para_blocks")
if not paras:
paras_of_layout = page_info.get("para_blocks")
if not paras_of_layout:
continue
for paras in paras_of_layout:
for para in paras:
para_text = ''
for line in para:
......@@ -120,7 +121,7 @@ def ocr_mk_mm_markdown_with_para_and_pagination(pdf_info_dict: dict):
page_markdown.append(para_text.strip() + ' ')
markdown_with_para_and_pagination.append({
'page_no': page_no,
'md': '\n\n'.join(page_markdown)
'md_content': '\n\n'.join(page_markdown)
})
return markdown_with_para_and_pagination
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment