Commit 0c279ffc authored by 赵小蒙's avatar 赵小蒙

更新remove_spans_by_bboxes中选择被删除的span的逻辑

parent f9bd0040
...@@ -177,6 +177,27 @@ def calculate_overlap_area_2_minbox_area_ratio(bbox1, bbox2): ...@@ -177,6 +177,27 @@ def calculate_overlap_area_2_minbox_area_ratio(bbox1, bbox2):
else: else:
return intersection_area / min_box_area return intersection_area / min_box_area
def calculate_overlap_area_in_bbox1_area_ratio(bbox1, bbox2):
"""
计算box1和box2的重叠面积占bbox1的比例
"""
# Determine the coordinates of the intersection rectangle
x_left = max(bbox1[0], bbox2[0])
y_top = max(bbox1[1], bbox2[1])
x_right = min(bbox1[2], bbox2[2])
y_bottom = min(bbox1[3], bbox2[3])
if x_right < x_left or y_bottom < y_top:
return 0.0
# The area of overlap area
intersection_area = (x_right - x_left) * (y_bottom - y_top)
bbox1_area = (bbox1[2]-bbox1[0])*(bbox1[3]-bbox1[1])
if bbox1_area == 0:
return 0
else:
return intersection_area / bbox1_area
def get_minbox_if_overlap_by_ratio(bbox1, bbox2, ratio): def get_minbox_if_overlap_by_ratio(bbox1, bbox2, ratio):
""" """
......
from magic_pdf.libs.boxbase import _is_in_or_part_overlap from magic_pdf.libs.boxbase import calculate_overlap_area_in_bbox1_area_ratio
def remove_spans_by_bboxes(spans, need_remove_spans_bboxes): def remove_spans_by_bboxes(spans, need_remove_spans_bboxes):
# 遍历spans, 判断是否在removed_span_block_bboxes中 # 遍历spans, 判断是否在removed_span_block_bboxes中
# 如果是, 则删除该span # 如果是, 则删除该span 否则, 保留该span
# 否则, 保留该span
need_remove_spans = [] need_remove_spans = []
for span in spans: for span in spans:
for bbox in need_remove_spans_bboxes: for removed_bbox in need_remove_spans_bboxes:
if _is_in_or_part_overlap(span['bbox'], bbox): if calculate_overlap_area_in_bbox1_area_ratio(span['bbox'], removed_bbox) > 0.5:
need_remove_spans.append(span) need_remove_spans.append(span)
break break
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment