Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
pdf-miner
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Qin Kaijie
pdf-miner
Commits
c4fc4d5c
Commit
c4fc4d5c
authored
Jun 20, 2024
by
赵小蒙
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
format
parent
8efd3de3
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
4 additions
and
7 deletions
+4
-7
magicpdf.py
magic_pdf/cli/magicpdf.py
+4
-7
No files found.
magic_pdf/cli/magicpdf.py
View file @
c4fc4d5c
...
@@ -83,9 +83,7 @@ def do_parse(
...
@@ -83,9 +83,7 @@ def do_parse(
):
):
local_image_dir
,
local_md_dir
=
prepare_env
(
pdf_file_name
,
parse_method
)
local_image_dir
,
local_md_dir
=
prepare_env
(
pdf_file_name
,
parse_method
)
image_writer
,
md_writer
=
DiskReaderWriter
(
local_image_dir
),
DiskReaderWriter
(
image_writer
,
md_writer
=
DiskReaderWriter
(
local_image_dir
),
DiskReaderWriter
(
local_md_dir
)
local_md_dir
)
image_dir
=
(
os
.
path
.
basename
(
local_image_dir
),)
image_dir
=
(
os
.
path
.
basename
(
local_image_dir
),)
if
parse_method
==
"auto"
:
if
parse_method
==
"auto"
:
...
@@ -96,7 +94,7 @@ def do_parse(
...
@@ -96,7 +94,7 @@ def do_parse(
elif
parse_method
==
"ocr"
:
elif
parse_method
==
"ocr"
:
pipe
=
OCRPipe
(
pdf_bytes
,
model_list
,
image_writer
,
is_debug
=
True
)
pipe
=
OCRPipe
(
pdf_bytes
,
model_list
,
image_writer
,
is_debug
=
True
)
else
:
else
:
print
(
"unknow parse method"
)
print
(
"unknow
n
parse method"
)
sys
.
exit
(
1
)
sys
.
exit
(
1
)
pipe
.
pipe_classify
()
pipe
.
pipe_classify
()
...
@@ -115,8 +113,7 @@ def do_parse(
...
@@ -115,8 +113,7 @@ def do_parse(
# write_to_csv(r"D:\project\20231108code-clean\linshixuqiu\pdf_dev\新模型\新建文件夹\luanma.csv",
# write_to_csv(r"D:\project\20231108code-clean\linshixuqiu\pdf_dev\新模型\新建文件夹\luanma.csv",
# [pdf_file_name, pipe.pdf_mid_data['not_common_character_rate'], pipe.pdf_mid_data['not_printable_rate']])
# [pdf_file_name, pipe.pdf_mid_data['not_common_character_rate'], pipe.pdf_mid_data['not_printable_rate']])
md_content
=
pipe
.
pipe_mk_markdown
(
image_dir
,
drop_mode
=
DropMode
.
NONE
)
md_content
=
pipe
.
pipe_mk_markdown
(
str
(
image_dir
),
drop_mode
=
DropMode
.
NONE
)
if
f_dump_md
:
if
f_dump_md
:
"""写markdown"""
"""写markdown"""
md_writer
.
write
(
md_writer
.
write
(
...
@@ -148,8 +145,8 @@ def do_parse(
...
@@ -148,8 +145,8 @@ def do_parse(
path
=
f
"{pdf_file_name}_origin.pdf"
,
path
=
f
"{pdf_file_name}_origin.pdf"
,
mode
=
AbsReaderWriter
.
MODE_BIN
,
mode
=
AbsReaderWriter
.
MODE_BIN
,
)
)
content_list
=
pipe
.
pipe_mk_uni_format
(
image_dir
,
drop_mode
=
DropMode
.
NONE
)
content_list
=
pipe
.
pipe_mk_uni_format
(
str
(
image_dir
),
drop_mode
=
DropMode
.
NONE
)
if
f_dump_content_list
:
if
f_dump_content_list
:
"""写content_list"""
"""写content_list"""
md_writer
.
write
(
md_writer
.
write
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment