Unverified Commit 016f871a authored by myhloli's avatar myhloli Committed by GitHub

Merge pull request #86 from myhloli/master

fix equation replace type
parents c8ccc390 232964d0
......@@ -21,11 +21,7 @@ from magic_pdf.pre_proc.ocr_span_list_modify import (
remove_overlaps_min_spans,
get_qa_need_list_v2,
)
from magic_pdf.pre_proc.equations_replace import (
combine_chars_to_pymudict,
remove_chars_in_text_blocks,
replace_equations_in_textblock,
)
from magic_pdf.pre_proc.equations_replace import (
combine_chars_to_pymudict,
remove_chars_in_text_blocks,
......@@ -55,6 +51,23 @@ def txt_spans_extract(pdf_page, inline_equations, interline_equations):
bbox = span["bbox"]
if float_equal(bbox[0], bbox[2]) or float_equal(bbox[1], bbox[3]):
continue
if span.get('type') == ContentType.InlineEquation:
spans.append(
{
"bbox": list(span["bbox"]),
"content": span["latex"],
"type": ContentType.InlineEquation,
}
)
elif span.get('type') == ContentType.InterlineEquation:
spans.append(
{
"bbox": list(span["bbox"]),
"content": span["latex"],
"type": ContentType.InterlineEquation,
}
)
else:
spans.append(
{
"bbox": list(span["bbox"]),
......@@ -65,6 +78,7 @@ def txt_spans_extract(pdf_page, inline_equations, interline_equations):
return spans
def replace_text_span(pymu_spans, ocr_spans):
return list(filter(lambda x: x["type"] != ContentType.Text, ocr_spans)) + pymu_spans
......
......@@ -191,13 +191,13 @@ def insert_interline_equations_textblock(interline_eq_bboxes, pymu_block_list):
"spans": [
{
"size": 9.962599754333496,
"_type": TYPE_INTERLINE_EQUATION,
"type": TYPE_INTERLINE_EQUATION,
"flags": 4,
"font": TYPE_INTERLINE_EQUATION,
"color": 0,
"ascender": 0.9409999847412109,
"descender": -0.3050000071525574,
"text": f"\n$$\n{latex_content}\n$$\n",
"latex": latex_content,
"origin": [bbox[0], bbox[1]],
"bbox": bbox,
}
......@@ -309,13 +309,13 @@ def replace_line_v2(eqinfo, line):
equation_span = {
"size": 9.962599754333496,
"_type": TYPE_INLINE_EQUATION,
"type": TYPE_INLINE_EQUATION,
"flags": 4,
"font": TYPE_INLINE_EQUATION,
"color": 0,
"ascender": 0.9409999847412109,
"descender": -0.3050000071525574,
"text": "",
"latex": "",
"origin": [337.1410153102337, 216.0205245153934],
"bbox": [
337.1410153102337,
......@@ -325,11 +325,11 @@ def replace_line_v2(eqinfo, line):
],
}
# equation_span = line['spans'][0].copy()
equation_span["text"] = f" ${eqinfo['latex']}$ "
equation_span["latex"] = eqinfo['latex']
equation_span["bbox"] = [x0, equation_span["bbox"][1], x1, equation_span["bbox"][3]]
equation_span["origin"] = [equation_span["bbox"][0], equation_span["bbox"][1]]
equation_span["chars"] = delete_chars
equation_span["_type"] = TYPE_INLINE_EQUATION
equation_span["type"] = TYPE_INLINE_EQUATION
equation_span["_eq_bbox"] = eqinfo["bbox"]
line["spans"].insert(first_overlap_span_idx + 1, equation_span) # 放入公式
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment