Commit c4fc4d5c authored by 赵小蒙's avatar 赵小蒙

format

parent 8efd3de3
...@@ -83,9 +83,7 @@ def do_parse( ...@@ -83,9 +83,7 @@ def do_parse(
): ):
local_image_dir, local_md_dir = prepare_env(pdf_file_name, parse_method) local_image_dir, local_md_dir = prepare_env(pdf_file_name, parse_method)
image_writer, md_writer = DiskReaderWriter(local_image_dir), DiskReaderWriter( image_writer, md_writer = DiskReaderWriter(local_image_dir), DiskReaderWriter(local_md_dir)
local_md_dir
)
image_dir = (os.path.basename(local_image_dir),) image_dir = (os.path.basename(local_image_dir),)
if parse_method == "auto": if parse_method == "auto":
...@@ -96,7 +94,7 @@ def do_parse( ...@@ -96,7 +94,7 @@ def do_parse(
elif parse_method == "ocr": elif parse_method == "ocr":
pipe = OCRPipe(pdf_bytes, model_list, image_writer, is_debug=True) pipe = OCRPipe(pdf_bytes, model_list, image_writer, is_debug=True)
else: else:
print("unknow parse method") print("unknown parse method")
sys.exit(1) sys.exit(1)
pipe.pipe_classify() pipe.pipe_classify()
...@@ -115,8 +113,7 @@ def do_parse( ...@@ -115,8 +113,7 @@ def do_parse(
# write_to_csv(r"D:\project\20231108code-clean\linshixuqiu\pdf_dev\新模型\新建文件夹\luanma.csv", # write_to_csv(r"D:\project\20231108code-clean\linshixuqiu\pdf_dev\新模型\新建文件夹\luanma.csv",
# [pdf_file_name, pipe.pdf_mid_data['not_common_character_rate'], pipe.pdf_mid_data['not_printable_rate']]) # [pdf_file_name, pipe.pdf_mid_data['not_common_character_rate'], pipe.pdf_mid_data['not_printable_rate']])
md_content = pipe.pipe_mk_markdown(image_dir, drop_mode=DropMode.NONE) md_content = pipe.pipe_mk_markdown(str(image_dir), drop_mode=DropMode.NONE)
if f_dump_md: if f_dump_md:
"""写markdown""" """写markdown"""
md_writer.write( md_writer.write(
...@@ -148,8 +145,8 @@ def do_parse( ...@@ -148,8 +145,8 @@ def do_parse(
path=f"{pdf_file_name}_origin.pdf", path=f"{pdf_file_name}_origin.pdf",
mode=AbsReaderWriter.MODE_BIN, mode=AbsReaderWriter.MODE_BIN,
) )
content_list = pipe.pipe_mk_uni_format(image_dir, drop_mode=DropMode.NONE)
content_list = pipe.pipe_mk_uni_format(str(image_dir), drop_mode=DropMode.NONE)
if f_dump_content_list: if f_dump_content_list:
"""写content_list""" """写content_list"""
md_writer.write( md_writer.write(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment