Commit f1252e71 authored by 赵小蒙's avatar 赵小蒙

fix lost image bug

parent 875b5db1
from loguru import logger
from magic_pdf.libs.boxbase import __is_overlaps_y_exceeds_threshold, get_minbox_if_overlap_by_ratio, \
calculate_overlap_area_in_bbox1_area_ratio
calculate_overlap_area_in_bbox1_area_ratio, _is_in_or_part_overlap_with_area_ratio
from magic_pdf.libs.drop_tag import DropTag
from magic_pdf.libs.ocr_content_type import ContentType, BlockType
from magic_pdf.pre_proc.ocr_span_list_modify import modify_y_axis, modify_inline_equation
......@@ -247,10 +247,11 @@ def fix_image_block(block, img_blocks):
block['blocks'] = []
# 遍历img_blocks,找到与当前block匹配的img_block
for img_block in img_blocks:
if img_block['bbox'] == block['bbox']:
if _is_in_or_part_overlap_with_area_ratio(block['bbox'], img_block['bbox'], 0.95):
# 创建img_body_block
for span in block['spans']:
if span['type'] == ContentType.Image and span['bbox'] == img_block['img_body_bbox']:
if span['type'] == ContentType.Image and img_block['img_body_bbox'] == span['bbox']:
# 创建img_body_block
img_body_block = make_body_block(span, img_block['img_body_bbox'], BlockType.ImageBody)
block['blocks'].append(img_body_block)
......@@ -275,11 +276,11 @@ def fix_table_block(block, table_blocks):
block['blocks'] = []
# 遍历table_blocks,找到与当前block匹配的table_block
for table_block in table_blocks:
if table_block['bbox'] == block['bbox']:
if _is_in_or_part_overlap_with_area_ratio(block['bbox'], table_block['bbox'], 0.95):
# 创建table_body_block
for span in block['spans']:
if span['type'] == ContentType.Table and span['bbox'] == table_block['table_body_bbox']:
if span['type'] == ContentType.Table and table_block['table_body_bbox'] == span['bbox']:
# 创建table_body_block
table_body_block = make_body_block(span, table_block['table_body_bbox'], BlockType.TableBody)
block['blocks'].append(table_body_block)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment