Commit 5f992de4 authored by myhloli's avatar myhloli

fix(magic_pdf): prevent removal of low-confidence spans already dropped

parent ba07ebce
...@@ -5,12 +5,17 @@ from magic_pdf.libs.boxbase import calculate_overlap_area_in_bbox1_area_ratio, g ...@@ -5,12 +5,17 @@ from magic_pdf.libs.boxbase import calculate_overlap_area_in_bbox1_area_ratio, g
from magic_pdf.libs.drop_tag import DropTag from magic_pdf.libs.drop_tag import DropTag
from magic_pdf.libs.ocr_content_type import ContentType, BlockType from magic_pdf.libs.ocr_content_type import ContentType, BlockType
def remove_overlaps_low_confidence_spans(spans): def remove_overlaps_low_confidence_spans(spans):
dropped_spans = [] dropped_spans = []
# 删除重叠spans中置信度低的的那些 # 删除重叠spans中置信度低的的那些
for span1 in spans: for span1 in spans:
for span2 in spans: for span2 in spans:
if span1 != span2: if span1 != span2:
# span1 或 span2 任何一个都不应该在 dropped_spans 中
if span1 in dropped_spans or span2 in dropped_spans:
continue
else:
if calculate_iou(span1['bbox'], span2['bbox']) > 0.9: if calculate_iou(span1['bbox'], span2['bbox']) > 0.9:
if span1['score'] < span2['score']: if span1['score'] < span2['score']:
span_need_remove = span1 span_need_remove = span1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment