Commit 3955a3b3 authored by 赵小蒙's avatar 赵小蒙

update some annotation

parent 3a0a08e4
......@@ -335,6 +335,19 @@ def find_right_nearest_text_bbox(pymu_blocks, obj_bbox):
def bbox_relative_pos(bbox1, bbox2):
"""
判断两个矩形框的相对位置关系
Args:
bbox1: 一个四元组,表示第一个矩形框的左上角和右下角的坐标,格式为(x1, y1, x1b, y1b)
bbox2: 一个四元组,表示第二个矩形框的左上角和右下角的坐标,格式为(x2, y2, x2b, y2b)
Returns:
一个四元组,表示矩形框1相对于矩形框2的位置关系,格式为(left, right, bottom, top)
其中,left表示矩形框1是否在矩形框2的左侧,right表示矩形框1是否在矩形框2的右侧,
bottom表示矩形框1是否在矩形框2的下方,top表示矩形框1是否在矩形框2的上方
"""
x1, y1, x1b, y1b = bbox1
x2, y2, x2b, y2b = bbox2
......@@ -345,6 +358,17 @@ def bbox_relative_pos(bbox1, bbox2):
return left, right, bottom, top
def bbox_distance(bbox1, bbox2):
"""
计算两个矩形框的距离。
Args:
bbox1 (tuple): 第一个矩形框的坐标,格式为 (x1, y1, x2, y2),其中 (x1, y1) 为左上角坐标,(x2, y2) 为右下角坐标。
bbox2 (tuple): 第二个矩形框的坐标,格式为 (x1, y1, x2, y2),其中 (x1, y1) 为左上角坐标,(x2, y2) 为右下角坐标。
Returns:
float: 矩形框之间的距离。
"""
def dist(point1, point2):
return math.sqrt((point1[0]-point2[0])**2 + (point1[1]-point2[1])**2)
......
......@@ -61,7 +61,7 @@ def draw_bbox_with_number(i, bbox_list, page, rgb_config, fill_config):
) # Draw the rectangle
page.insert_text(
(x0, y0 + 10), str(j + 1), fontsize=10, color=new_rgb
) # Insert the index at the top left corner of the rectangle
) # Insert the index in the top left corner of the rectangle
def draw_layout_bbox(pdf_info, pdf_bytes, out_path):
......
......@@ -32,7 +32,7 @@ def remove_horizontal_overlap_block_which_smaller(all_bboxes):
is_useful_block_horz_overlap, smaller_bbox = check_useful_block_horizontal_overlap(useful_blocks)
if is_useful_block_horz_overlap:
logger.warning(
f"skip this page, reason: {DropReason.TEXT_BLCOK_HOR_OVERLAP}")
f"skip this page, reason: {DropReason.USEFUL_BLOCK_HOR_OVERLAP}")
for bbox in all_bboxes.copy():
if smaller_bbox == bbox[:4]:
all_bboxes.remove(bbox)
......
......@@ -57,7 +57,7 @@ class AbsPipe(ABC):
@staticmethod
def classify(pdf_bytes: bytes) -> str:
"""
根据pdf的元数据,判断是否是文本pdf,还是ocr pdf
根据pdf的元数据,判断是文本pdf,还是ocr pdf
"""
pdf_meta = pdf_meta_scan(pdf_bytes)
if pdf_meta.get("_need_drop", False): # 如果返回了需要丢弃的标志,则抛出异常
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment