Commit 37483f0a authored by liukaiwen's avatar liukaiwen

更新了para_split

parent 4cc88d2b
...@@ -696,4 +696,10 @@ def para_split(pdf_info_dict, debug_mode, lang="en"): ...@@ -696,4 +696,10 @@ def para_split(pdf_info_dict, debug_mode, lang="en"):
page_paras = page['para_blocks'] page_paras = page['para_blocks']
new_layout_bbox = new_layout_of_pages[page_num] new_layout_bbox = new_layout_of_pages[page_num]
__connect_middle_align_text(page_paras, new_layout_bbox, page_num, lang, debug_mode=debug_mode) __connect_middle_align_text(page_paras, new_layout_bbox, page_num, lang, debug_mode=debug_mode)
__merge_signle_list_text(page_paras, new_layout_bbox, page_num, lang) __merge_signle_list_text(page_paras, new_layout_bbox, page_num, lang)
\ No newline at end of file
# layout展平
for page_num, page in enumerate(pdf_info_dict.values()):
page_paras = page['para_blocks']
page_blocks = [block for layout in page_paras for block in layout]
page["para_blocks"] = page_blocks
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment