Commit 1340a97a authored by 赵小蒙's avatar 赵小蒙

统一使用ocr组装markdown

parent f6d8f6ca
...@@ -106,8 +106,9 @@ class AbsPipe(ABC): ...@@ -106,8 +106,9 @@ class AbsPipe(ABC):
parse_type = pdf_mid_data["_parse_type"] parse_type = pdf_mid_data["_parse_type"]
pdf_info_list = pdf_mid_data["pdf_info"] pdf_info_list = pdf_mid_data["pdf_info"]
if parse_type == AbsPipe.PIP_TXT: if parse_type == AbsPipe.PIP_TXT:
content_list = mk_universal_format(pdf_info_list, img_buket_path) # content_list = mk_universal_format(pdf_info_list, img_buket_path)
md_content = mk_mm_markdown(content_list) # md_content = mk_mm_markdown(content_list)
md_content = ocr_mk_mm_markdown_with_para(pdf_info_list, img_buket_path)
elif parse_type == AbsPipe.PIP_OCR: elif parse_type == AbsPipe.PIP_OCR:
md_content = ocr_mk_mm_markdown_with_para(pdf_info_list, img_buket_path) md_content = ocr_mk_mm_markdown_with_para(pdf_info_list, img_buket_path)
return md_content return md_content
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment