Unverified Commit 65c3ac66 authored by Kaiwen Liu's avatar Kaiwen Liu Committed by GitHub

<fix>(para_split_v2): index out of range issue of span_text first char (#396)

Co-authored-by: 's avatarliukaiwen <liukaiwen@pjlab.org.cn>
parent 0b764d59
...@@ -100,7 +100,7 @@ def __detect_list_lines(lines, new_layout_bboxes, lang): ...@@ -100,7 +100,7 @@ def __detect_list_lines(lines, new_layout_bboxes, lang):
if lang != 'en': if lang != 'en':
return lines, None return lines, None
else:
total_lines = len(lines) total_lines = len(lines)
line_fea_encode = [] line_fea_encode = []
""" """
...@@ -114,6 +114,9 @@ def __detect_list_lines(lines, new_layout_bboxes, lang): ...@@ -114,6 +114,9 @@ def __detect_list_lines(lines, new_layout_bboxes, lang):
x_map_tag_dict, min_x_tag = cluster_line_x(lines) x_map_tag_dict, min_x_tag = cluster_line_x(lines)
for l in lines: for l in lines:
span_text = __get_span_text(l['spans'][0]) span_text = __get_span_text(l['spans'][0])
if not span_text:
line_fea_encode.append(0)
continue
first_char = span_text[0] first_char = span_text[0]
layout = __find_layout_bbox_by_line(l['bbox'], new_layout_bboxes) layout = __find_layout_bbox_by_line(l['bbox'], new_layout_bboxes)
if not layout: if not layout:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment