Commit 7dcf1b7c authored by 赵小蒙's avatar 赵小蒙

fix equation replace type

parent db7b829d
...@@ -21,11 +21,7 @@ from magic_pdf.pre_proc.ocr_span_list_modify import ( ...@@ -21,11 +21,7 @@ from magic_pdf.pre_proc.ocr_span_list_modify import (
remove_overlaps_min_spans, remove_overlaps_min_spans,
get_qa_need_list_v2, get_qa_need_list_v2,
) )
from magic_pdf.pre_proc.equations_replace import (
combine_chars_to_pymudict,
remove_chars_in_text_blocks,
replace_equations_in_textblock,
)
from magic_pdf.pre_proc.equations_replace import ( from magic_pdf.pre_proc.equations_replace import (
combine_chars_to_pymudict, combine_chars_to_pymudict,
remove_chars_in_text_blocks, remove_chars_in_text_blocks,
...@@ -55,16 +51,34 @@ def txt_spans_extract(pdf_page, inline_equations, interline_equations): ...@@ -55,16 +51,34 @@ def txt_spans_extract(pdf_page, inline_equations, interline_equations):
bbox = span["bbox"] bbox = span["bbox"]
if float_equal(bbox[0], bbox[2]) or float_equal(bbox[1], bbox[3]): if float_equal(bbox[0], bbox[2]) or float_equal(bbox[1], bbox[3]):
continue continue
spans.append( if span.get('type') == ContentType.InlineEquation:
{ spans.append(
"bbox": list(span["bbox"]), {
"content": span["text"], "bbox": list(span["bbox"]),
"type": ContentType.Text, "content": span["latex"],
} "type": ContentType.InlineEquation,
) }
)
elif span.get('type') == ContentType.InterlineEquation:
spans.append(
{
"bbox": list(span["bbox"]),
"content": span["latex"],
"type": ContentType.InterlineEquation,
}
)
else:
spans.append(
{
"bbox": list(span["bbox"]),
"content": span["text"],
"type": ContentType.Text,
}
)
return spans return spans
def replace_text_span(pymu_spans, ocr_spans): def replace_text_span(pymu_spans, ocr_spans):
return list(filter(lambda x: x["type"] != ContentType.Text, ocr_spans)) + pymu_spans return list(filter(lambda x: x["type"] != ContentType.Text, ocr_spans)) + pymu_spans
......
...@@ -191,13 +191,13 @@ def insert_interline_equations_textblock(interline_eq_bboxes, pymu_block_list): ...@@ -191,13 +191,13 @@ def insert_interline_equations_textblock(interline_eq_bboxes, pymu_block_list):
"spans": [ "spans": [
{ {
"size": 9.962599754333496, "size": 9.962599754333496,
"_type": TYPE_INTERLINE_EQUATION, "type": TYPE_INTERLINE_EQUATION,
"flags": 4, "flags": 4,
"font": TYPE_INTERLINE_EQUATION, "font": TYPE_INTERLINE_EQUATION,
"color": 0, "color": 0,
"ascender": 0.9409999847412109, "ascender": 0.9409999847412109,
"descender": -0.3050000071525574, "descender": -0.3050000071525574,
"text": f"\n$$\n{latex_content}\n$$\n", "latex": latex_content,
"origin": [bbox[0], bbox[1]], "origin": [bbox[0], bbox[1]],
"bbox": bbox, "bbox": bbox,
} }
...@@ -309,13 +309,13 @@ def replace_line_v2(eqinfo, line): ...@@ -309,13 +309,13 @@ def replace_line_v2(eqinfo, line):
equation_span = { equation_span = {
"size": 9.962599754333496, "size": 9.962599754333496,
"_type": TYPE_INLINE_EQUATION, "type": TYPE_INLINE_EQUATION,
"flags": 4, "flags": 4,
"font": TYPE_INLINE_EQUATION, "font": TYPE_INLINE_EQUATION,
"color": 0, "color": 0,
"ascender": 0.9409999847412109, "ascender": 0.9409999847412109,
"descender": -0.3050000071525574, "descender": -0.3050000071525574,
"text": "", "latex": "",
"origin": [337.1410153102337, 216.0205245153934], "origin": [337.1410153102337, 216.0205245153934],
"bbox": [ "bbox": [
337.1410153102337, 337.1410153102337,
...@@ -325,11 +325,11 @@ def replace_line_v2(eqinfo, line): ...@@ -325,11 +325,11 @@ def replace_line_v2(eqinfo, line):
], ],
} }
# equation_span = line['spans'][0].copy() # equation_span = line['spans'][0].copy()
equation_span["text"] = f" ${eqinfo['latex']}$ " equation_span["latex"] = eqinfo['latex']
equation_span["bbox"] = [x0, equation_span["bbox"][1], x1, equation_span["bbox"][3]] equation_span["bbox"] = [x0, equation_span["bbox"][1], x1, equation_span["bbox"][3]]
equation_span["origin"] = [equation_span["bbox"][0], equation_span["bbox"][1]] equation_span["origin"] = [equation_span["bbox"][0], equation_span["bbox"][1]]
equation_span["chars"] = delete_chars equation_span["chars"] = delete_chars
equation_span["_type"] = TYPE_INLINE_EQUATION equation_span["type"] = TYPE_INLINE_EQUATION
equation_span["_eq_bbox"] = eqinfo["bbox"] equation_span["_eq_bbox"] = eqinfo["bbox"]
line["spans"].insert(first_overlap_span_idx + 1, equation_span) # 放入公式 line["spans"].insert(first_overlap_span_idx + 1, equation_span) # 放入公式
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment