Commit ffc20db7 authored by kernel.h@qq.com's avatar kernel.h@qq.com

修改包引用问题

parent 31f3d4cc
......@@ -54,7 +54,7 @@ def prepare_env():
def _do_parse(pdf_bytes, model_list, parse_method, image_writer, md_writer, image_dir):
uni_pipe = UNIPipe()
uni_pipe = UNIPipe(pdf_bytes, model_list, image_writer, image_dir)
jso_useful_key = {
"_pdf_type": "txt",
"model_list": model_list,
......@@ -62,7 +62,7 @@ def _do_parse(pdf_bytes, model_list, parse_method, image_writer, md_writer, imag
if parse_method == "ocr":
jso_useful_key["_pdf_type"] = "ocr"
pdf_mid_data = uni_pipe.parse(pdf_bytes, image_writer, jso_useful_key)
pdf_mid_data = uni_pipe.pipe_parse()
md_content = UNIPipe.mk_markdown(pdf_mid_data, image_dir)
part_file_name = datetime.now().strftime("%H-%M-%S")
md_writer.write(content=md_content, path=f"{part_file_name}.md", mode=MODE_TXT)
......
......@@ -4,7 +4,7 @@ from magic_pdf.dict2md.mkcontent import mk_universal_format, mk_mm_markdown
from magic_pdf.dict2md.ocr_mkcontent import make_standard_format_with_para, ocr_mk_mm_markdown_with_para
from magic_pdf.filter.pdf_classify_by_type import classify
from magic_pdf.filter.pdf_meta_scan import pdf_meta_scan
from magic_pdf.io.AbsReaderWriter import AbsReaderWriter
from magic_pdf.rw.AbsReaderWriter import AbsReaderWriter
from magic_pdf.libs.drop_reason import DropReason
from magic_pdf.libs.json_compressor import JsonCompressor
......
from magic_pdf.io.AbsReaderWriter import AbsReaderWriter
from magic_pdf.rw.AbsReaderWriter import AbsReaderWriter
from magic_pdf.libs.json_compressor import JsonCompressor
from magic_pdf.pipe.AbsPipe import AbsPipe
from magic_pdf.user_api import parse_ocr_pdf
......
from magic_pdf.io.AbsReaderWriter import AbsReaderWriter
from magic_pdf.rw.AbsReaderWriter import AbsReaderWriter
from magic_pdf.libs.json_compressor import JsonCompressor
from magic_pdf.pipe.AbsPipe import AbsPipe
from magic_pdf.user_api import parse_txt_pdf
......
......@@ -29,9 +29,9 @@ class UNIPipe(AbsPipe):
def pipe_parse(self):
if self.pdf_type == "txt":
self.pdf_mid_data = parse_union_pdf(pdf_bytes, self.model_list, self.image_writer)
self.pdf_mid_data = parse_union_pdf(self.pdf_bytes, self.model_list, self.image_writer)
elif self.pdf_type == "ocr":
self.pdf_mid_data = parse_ocr_pdf(pdf_bytes, self.model_list, self.image_writer)
self.pdf_mid_data = parse_ocr_pdf(self.pdf_bytes, self.model_list, self.image_writer)
self.compressed_pdf_mid_data = JsonCompressor.compress_json(self.pdf_mid_data)
def pipe_mk_uni_format(self):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment