Unverified Commit b39f3a8e authored by myhloli's avatar myhloli Committed by GitHub

Merge pull request #35 from myhloli/master

将ocr_parse逻辑切换到v2,并解决几个parse过程中的error
parents e8544335 dcf6e712
......@@ -252,7 +252,7 @@ def fix_image_block(block, img_blocks):
break
# 根据list长度,判断img_block中是否有img_caption
if len(img_block['img_caption_bbox']) > 0:
if img_block['img_caption_bbox'] is not None:
img_caption_block, img_caption_spans = merge_spans_to_block(
block['spans'], img_block['img_caption_bbox'], BlockType.ImageCaption
)
......@@ -280,7 +280,7 @@ def fix_table_block(block, table_blocks):
break
# 根据list长度,判断table_block中是否有caption
if len(table_block['table_caption_bbox']) > 0:
if table_block['table_caption_bbox'] is not None:
table_caption_block, table_caption_spans = merge_spans_to_block(
block['spans'], table_block['table_caption_bbox'], BlockType.TableCaption
)
......@@ -293,7 +293,7 @@ def fix_table_block(block, table_blocks):
block['spans'].remove(span)
# 根据list长度,判断table_block中是否有table_note
if len(table_block['table_footnote_bbox']) > 0:
if table_block['table_footnote_bbox'] is not None:
table_footnote_block, table_footnote_spans = merge_spans_to_block(
block['spans'], table_block['table_footnote_bbox'], BlockType.TableFootnote
)
......
......@@ -222,10 +222,10 @@ def get_qa_need_list_v2(blocks):
interline_equations = []
for block in blocks:
if block["type"] == BlockType.Image:
if block["block_type"] == BlockType.Image:
images.append(block)
elif block["type"] == BlockType.Table:
elif block["block_type"] == BlockType.Table:
tables.append(block)
elif block["type"] == BlockType.InterlineEquation:
elif block["block_type"] == BlockType.InterlineEquation:
interline_equations.append(block)
return images, tables, interline_equations
......@@ -15,7 +15,7 @@
from loguru import logger
from magic_pdf.rw import AbsReaderWriter
from magic_pdf.pdf_parse_by_ocr import parse_pdf_by_ocr
from magic_pdf.pdf_parse_by_ocr_v2 import parse_pdf_by_ocr
from magic_pdf.pdf_parse_by_txt import parse_pdf_by_txt
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment