Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
pdf-miner
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Qin Kaijie
pdf-miner
Commits
a0eff3be
Commit
a0eff3be
authored
Oct 28, 2024
by
liukaiwen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
feat: table model update with paddle recognition v4
parent
51f56aa3
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
19 additions
and
3 deletions
+19
-3
Constants.py
magic_pdf/libs/Constants.py
+6
-0
pdf_extract_kit.py
magic_pdf/model/pdf_extract_kit.py
+13
-3
No files found.
magic_pdf/libs/Constants.py
View file @
a0eff3be
...
...
@@ -37,4 +37,10 @@ REC_MODEL_DIR = "ch_PP-OCRv4_rec_infer"
# pp rec char dict path
REC_CHAR_DICT
=
"ppocr_keys_v1.txt"
# pp rec copy rec directory
PP_REC_DIRECTORY
=
".paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer"
# pp rec copy det directory
PP_DET_DIRECTORY
=
".paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer"
magic_pdf/model/pdf_extract_kit.py
View file @
a0eff3be
from
loguru
import
logger
import
os
import
time
from
pathlib
import
Path
import
shutil
from
magic_pdf.libs.Constants
import
*
from
magic_pdf.libs.clean_memory
import
clean_memory
from
magic_pdf.model.model_list
import
AtomicModel
...
...
@@ -271,6 +272,17 @@ class CustomPEKModel:
device
=
self
.
device
)
home_directory
=
Path
.
home
()
det_source
=
os
.
path
.
join
(
models_dir
,
table_model_dir
,
DETECT_MODEL_DIR
)
rec_source
=
os
.
path
.
join
(
models_dir
,
table_model_dir
,
REC_MODEL_DIR
)
det_dest_dir
=
os
.
path
.
join
(
home_directory
,
PP_DET_DIRECTORY
)
rec_dest_dir
=
os
.
path
.
join
(
home_directory
,
PP_REC_DIRECTORY
)
if
not
os
.
path
.
exists
(
det_dest_dir
):
shutil
.
copytree
(
det_source
,
det_dest_dir
)
if
not
os
.
path
.
exists
(
rec_dest_dir
):
shutil
.
copytree
(
rec_source
,
rec_dest_dir
)
logger
.
info
(
'DocAnalysis init done!'
)
def
__call__
(
self
,
image
):
...
...
@@ -433,7 +445,5 @@ class CustomPEKModel:
logger
.
info
(
f
"-----page total time: {round(time.time() - page_start, 2)}-----"
)
return
layout_res
if
__name__
==
'__main__'
:
print
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment