Commit f65be6e0 authored by 赵小蒙's avatar 赵小蒙

pdf_parse_by_model.py ---> pdf_parse_by_txt.py

parent 0f3bfa10
...@@ -8,7 +8,7 @@ from loguru import logger ...@@ -8,7 +8,7 @@ from loguru import logger
from magic_pdf.libs.commons import join_path, read_file from magic_pdf.libs.commons import join_path, read_file
from magic_pdf.dict2md.mkcontent import mk_mm_markdown, mk_universal_format from magic_pdf.dict2md.mkcontent import mk_mm_markdown, mk_universal_format
from magic_pdf.pipeline import parse_pdf_by_model from magic_pdf.pdf_parse_by_txt import parse_pdf_by_txt
...@@ -25,7 +25,7 @@ def main(s3_pdf_path: str, s3_pdf_profile: str, pdf_model_path: str, pdf_model_p ...@@ -25,7 +25,7 @@ def main(s3_pdf_path: str, s3_pdf_profile: str, pdf_model_path: str, pdf_model_p
pdf_bytes = read_file(s3_pdf_path, s3_pdf_profile) pdf_bytes = read_file(s3_pdf_path, s3_pdf_profile)
try: try:
paras_dict = parse_pdf_by_model( paras_dict = parse_pdf_by_txt(
pdf_bytes, pdf_model_path, save_path, book_name, pdf_model_profile, start_page_num, debug_mode=debug_mode pdf_bytes, pdf_model_path, save_path, book_name, pdf_model_profile, start_page_num, debug_mode=debug_mode
) )
parent_dir = os.path.dirname(text_content_save_path) parent_dir = os.path.dirname(text_content_save_path)
......
...@@ -70,7 +70,7 @@ paraMergeException_msg = ParaMergeException().message ...@@ -70,7 +70,7 @@ paraMergeException_msg = ParaMergeException().message
def parse_pdf_by_model( def parse_pdf_by_txt(
pdf_bytes, pdf_bytes,
pdf_model_output, pdf_model_output,
save_path, save_path,
......
...@@ -13,7 +13,7 @@ from magic_pdf.libs.commons import ( ...@@ -13,7 +13,7 @@ from magic_pdf.libs.commons import (
from magic_pdf.libs.drop_reason import DropReason from magic_pdf.libs.drop_reason import DropReason
from magic_pdf.libs.json_compressor import JsonCompressor from magic_pdf.libs.json_compressor import JsonCompressor
from magic_pdf.dict2md.mkcontent import mk_universal_format from magic_pdf.dict2md.mkcontent import mk_universal_format
from magic_pdf.pdf_parse_by_model import parse_pdf_by_model from magic_pdf.pdf_parse_by_txt import parse_pdf_by_txt
from magic_pdf.filter.pdf_classify_by_type import classify from magic_pdf.filter.pdf_classify_by_type import classify
from magic_pdf.filter.pdf_meta_scan import pdf_meta_scan from magic_pdf.filter.pdf_meta_scan import pdf_meta_scan
from loguru import logger from loguru import logger
...@@ -310,7 +310,7 @@ def parse_pdf(jso: dict, start_page_id=0, debug_mode=False) -> dict: ...@@ -310,7 +310,7 @@ def parse_pdf(jso: dict, start_page_id=0, debug_mode=False) -> dict:
f"book_name is:{book_name},start_time is:{formatted_time(start_time)}", f"book_name is:{book_name},start_time is:{formatted_time(start_time)}",
file=sys.stderr, file=sys.stderr,
) )
pdf_info_dict = parse_pdf_by_model( pdf_info_dict = parse_pdf_by_txt(
pdf_bytes, pdf_bytes,
model_output_json_list, model_output_json_list,
save_path, save_path,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment