Unverified Commit c23883b6 authored by myhloli's avatar myhloli Committed by GitHub

Merge pull request #46 from papayalove/master

更新了para_split
parents 82421f7c 94776e5e
......@@ -697,3 +697,9 @@ def para_split(pdf_info_dict, debug_mode, lang="en"):
new_layout_bbox = new_layout_of_pages[page_num]
__connect_middle_align_text(page_paras, new_layout_bbox, page_num, lang, debug_mode=debug_mode)
__merge_signle_list_text(page_paras, new_layout_bbox, page_num, lang)
# layout展平
for page_num, page in enumerate(pdf_info_dict.values()):
page_paras = page['para_blocks']
page_blocks = [block for layout in page_paras for block in layout]
page["para_blocks"] = page_blocks
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment