Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
pdf-miner
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Qin Kaijie
pdf-miner
Commits
ef03c906
Commit
ef03c906
authored
Apr 16, 2024
by
kernel.h@qq.com
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
io --> rw
parent
1b8ea610
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
11 additions
and
11 deletions
+11
-11
magicpdf.py
magic_pdf/cli/magicpdf.py
+3
-3
pdf_image_tools.py
magic_pdf/libs/pdf_image_tools.py
+1
-1
para_split.py
magic_pdf/para/para_split.py
+2
-2
UNIPipe.py
magic_pdf/pipe/UNIPipe.py
+2
-2
AbsReaderWriter.py
magic_pdf/rw/AbsReaderWriter.py
+0
-0
DiskReaderWriter.py
magic_pdf/rw/DiskReaderWriter.py
+1
-1
S3ReaderWriter.py
magic_pdf/rw/S3ReaderWriter.py
+1
-1
__init__.py
magic_pdf/rw/__init__.py
+0
-0
user_api.py
magic_pdf/user_api.py
+1
-1
No files found.
magic_pdf/cli/magicpdf.py
View file @
ef03c906
...
...
@@ -33,8 +33,8 @@ from magic_pdf.libs.path_utils import (
remove_non_official_s3_args
,
)
from
magic_pdf.libs.config_reader
import
get_local_dir
from
magic_pdf.
io
.S3ReaderWriter
import
S3ReaderWriter
,
MODE_BIN
,
MODE_TXT
from
magic_pdf.
io
.DiskReaderWriter
import
DiskReaderWriter
from
magic_pdf.
rw
.S3ReaderWriter
import
S3ReaderWriter
,
MODE_BIN
,
MODE_TXT
from
magic_pdf.
rw
.DiskReaderWriter
import
DiskReaderWriter
from
magic_pdf.libs.json_compressor
import
JsonCompressor
...
...
@@ -119,7 +119,7 @@ def json_command(json, method):
_do_parse
(
pdf_data
,
jso
,
jso
[
'doc_layout_result'
]
,
method
,
local_image_rw
,
local_md_rw
,
...
...
magic_pdf/libs/pdf_image_tools.py
View file @
ef03c906
from
loguru
import
logger
from
magic_pdf.
io
.AbsReaderWriter
import
AbsReaderWriter
from
magic_pdf.
rw
.AbsReaderWriter
import
AbsReaderWriter
from
magic_pdf.libs.commons
import
fitz
from
magic_pdf.libs.commons
import
join_path
from
magic_pdf.libs.hash_utils
import
compute_sha256
...
...
magic_pdf/para/para_split.py
View file @
ef03c906
...
...
@@ -341,8 +341,8 @@ def __connect_list_inter_layout(layout_paras, new_layout_bbox, layout_list_info,
"""
if
len
(
layout_paras
)
==
0
or
len
(
layout_list_info
)
==
0
:
# 0的时候最后的return 会出错
return
layout_paras
,
[
False
,
False
]
if
page_num
==
343
:
pass
#
if page_num==343:
#
pass
for
i
in
range
(
1
,
len
(
layout_paras
)):
pre_layout_list_info
=
layout_list_info
[
i
-
1
]
...
...
magic_pdf/pipe/UNIPipe.py
View file @
ef03c906
...
...
@@ -6,8 +6,8 @@ from magic_pdf.dict2md.mkcontent import mk_universal_format, mk_mm_markdown
from
magic_pdf.dict2md.ocr_mkcontent
import
make_standard_format_with_para
,
ocr_mk_mm_markdown_with_para
from
magic_pdf.filter.pdf_classify_by_type
import
classify
from
magic_pdf.filter.pdf_meta_scan
import
pdf_meta_scan
from
magic_pdf.
io
.AbsReaderWriter
import
AbsReaderWriter
from
magic_pdf.
io
.DiskReaderWriter
import
DiskReaderWriter
from
magic_pdf.
rw
.AbsReaderWriter
import
AbsReaderWriter
from
magic_pdf.
rw
.DiskReaderWriter
import
DiskReaderWriter
from
magic_pdf.libs.commons
import
join_path
from
magic_pdf.libs.detect_language_from_model
import
get_language_from_model
from
magic_pdf.libs.drop_reason
import
DropReason
...
...
magic_pdf/
io
/AbsReaderWriter.py
→
magic_pdf/
rw
/AbsReaderWriter.py
View file @
ef03c906
File moved
magic_pdf/
io
/DiskReaderWriter.py
→
magic_pdf/
rw
/DiskReaderWriter.py
View file @
ef03c906
import
os
from
magic_pdf.
io
.AbsReaderWriter
import
AbsReaderWriter
from
magic_pdf.
rw
.AbsReaderWriter
import
AbsReaderWriter
from
loguru
import
logger
...
...
magic_pdf/
io
/S3ReaderWriter.py
→
magic_pdf/
rw
/S3ReaderWriter.py
View file @
ef03c906
from
magic_pdf.
io
.AbsReaderWriter
import
AbsReaderWriter
from
magic_pdf.
rw
.AbsReaderWriter
import
AbsReaderWriter
from
magic_pdf.libs.commons
import
parse_aws_param
,
parse_bucket_key
import
boto3
from
loguru
import
logger
...
...
magic_pdf/
io
/__init__.py
→
magic_pdf/
rw
/__init__.py
View file @
ef03c906
File moved
magic_pdf/user_api.py
View file @
ef03c906
...
...
@@ -14,7 +14,7 @@
"""
from
loguru
import
logger
from
magic_pdf.
io
import
AbsReaderWriter
from
magic_pdf.
rw
import
AbsReaderWriter
from
magic_pdf.pdf_parse_by_ocr
import
parse_pdf_by_ocr
from
magic_pdf.pdf_parse_by_txt
import
parse_pdf_by_txt
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment