Unverified Commit faa8a267 authored by myhloli's avatar myhloli Committed by GitHub

Merge pull request #91 from icecraft/fix/table_footnote

fix: table and footnote relations
parents ef0e779c bd1ca92a
...@@ -89,6 +89,25 @@ class MagicModel: ...@@ -89,6 +89,25 @@ class MagicModel:
ret = [] ret = []
MAX_DIS_OF_POINT = 10**9 + 7 MAX_DIS_OF_POINT = 10**9 + 7
# subject 和 object 的 bbox 会合并成一个大的 bbox (named: merged bbox)。 筛选出所有和 merged bbox 有 overlap 且 overlap 面积大于 object 的面积的 subjects。
# 再求出筛选出的 subjects 和 object 的最短距离!
def may_find_other_nearest_bbox(subject_idx, object_idx):
ret = float("inf")
x0 = min(all_bboxes[subject_idx]["bbox"][0], all_bboxes[object_idx]["bbox"][0])
y0 = min(all_bboxes[subject_idx]["bbox"][1], all_bboxes[object_idx]["bbox"][1])
x1 = max(all_bboxes[subject_idx]["bbox"][2], all_bboxes[object_idx]["bbox"][2])
y1 = max(all_bboxes[subject_idx]["bbox"][3], all_bboxes[object_idx]["bbox"][3])
object_area = abs(all_bboxes[object_idx]["bbox"][2] - all_bboxes[object_idx]["bbox"][0]) * abs(all_bboxes[object_idx]["bbox"][3] - all_bboxes[object_idx]["bbox"][1])
for i in range(len(all_bboxes)):
if i == subject_idx or all_bboxes[i]["category_id"] != subject_category_id:
continue
if _is_part_overlap([x0, y0, x1, y1], all_bboxes[i]["bbox"]) or _is_in(all_bboxes[i]["bbox"], [x0, y0, x1, y1]):
i_area = abs(all_bboxes[i]["bbox"][2] - all_bboxes[i]["bbox"][0]) * abs(all_bboxes[i]["bbox"][3] - all_bboxes[i]["bbox"][1])
if i_area >= object_area:
ret = min(float("inf"), dis[i][object_idx])
return ret
subjects = self.__reduct_overlap( subjects = self.__reduct_overlap(
list( list(
map( map(
...@@ -170,8 +189,10 @@ class MagicModel: ...@@ -170,8 +189,10 @@ class MagicModel:
arr.sort(key=lambda x: x[0]) arr.sort(key=lambda x: x[0])
if len(arr) > 0: if len(arr) > 0:
candidates.append(arr[0][1]) # bug: 离该subject 最近的 object 可能跨越了其它的 subject 。比如 [this subect] [some sbuject] [the nearest objec of subject]
seen.add(arr[0][1]) if may_find_other_nearest_bbox(i, j) >= arr[0][0]:
candidates.append(arr[0][1])
seen.add(arr[0][1])
# 已经获取初始种子 # 已经获取初始种子
for j in set(candidates): for j in set(candidates):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment