Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
pdf-miner
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Qin Kaijie
pdf-miner
Commits
b18496b0
Commit
b18496b0
authored
Aug 07, 2024
by
liukaiwen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add table recognition success detect
parent
cae215bb
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
15 additions
and
5 deletions
+15
-5
Constants.py
magic_pdf/libs/Constants.py
+4
-1
pdf_extract_kit.py
magic_pdf/model/pdf_extract_kit.py
+11
-4
No files found.
magic_pdf/libs/Constants.py
View file @
b18496b0
...
...
@@ -9,3 +9,6 @@ block维度自定义字段
"""
# block中lines是否被删除
LINES_DELETED
=
"lines_deleted"
# table recognition max time default value
TABLE_MAX_TIME_VALUE
=
400
\ No newline at end of file
magic_pdf/model/pdf_extract_kit.py
View file @
b18496b0
...
...
@@ -2,6 +2,7 @@ from loguru import logger
import
os
import
time
from
magic_pdf.libs.Constants
import
TABLE_MAX_TIME_VALUE
os
.
environ
[
'NO_ALBUMENTATIONS_UPDATE'
]
=
'1'
# 禁止albumentations检查更新
try
:
...
...
@@ -105,6 +106,7 @@ class CustomPEKModel:
self
.
apply_formula
=
kwargs
.
get
(
"apply_formula"
,
self
.
configs
[
"config"
][
"formula"
])
self
.
table_config
=
kwargs
.
get
(
"table_config"
,
self
.
configs
[
"config"
][
"table_config"
])
self
.
apply_table
=
self
.
table_config
.
get
(
"is_table_recog_enable"
,
False
)
self
.
table_max_time
=
self
.
table_config
.
get
(
"max_time"
,
TABLE_MAX_TIME_VALUE
)
self
.
apply_ocr
=
ocr
logger
.
info
(
"DocAnalysis init, this may take some times. apply_layout: {}, apply_formula: {}, apply_ocr: {}, apply_table: {}"
.
format
(
...
...
@@ -141,9 +143,8 @@ class CustomPEKModel:
# init structeqtable
if
self
.
apply_table
:
max_time
=
self
.
table_config
.
get
(
"max_time"
,
400
)
self
.
table_model
=
table_model_init
(
str
(
os
.
path
.
join
(
models_dir
,
self
.
configs
[
"weights"
][
"table"
])),
max_time
=
max_time
,
_device_
=
self
.
device
)
max_time
=
self
.
table_
max_time
,
_device_
=
self
.
device
)
logger
.
info
(
'DocAnalysis init done!'
)
def
__call__
(
self
,
image
):
...
...
@@ -290,6 +291,12 @@ class CustomPEKModel:
end_time
=
time
.
time
()
run_time
=
end_time
-
start_time
logger
.
info
(
f
"------------table recognition processing ends within {run_time}s-----"
)
if
run_time
>
self
.
table_max_time
:
logger
.
warning
(
f
"------------table recognition processing exceeds max time {self.table_max_time}s----------"
)
# 判断是否返回正常
if
latex_code
and
latex_code
.
strip
()
.
endswith
(
'end{tabular}'
):
layout
[
"latex"
]
=
latex_code
else
:
print
(
latex_code
)
logger
.
warning
(
f
"------------table recognition processing fails----------"
)
return
layout_res
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment