Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
pdf-miner
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Qin Kaijie
pdf-miner
Commits
1ba1f1aa
Unverified
Commit
1ba1f1aa
authored
Jul 18, 2024
by
Xiaomeng Zhao
Committed by
GitHub
Jul 18, 2024
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #169 from dt-yy/master
parents
1fe56d05
8b714854
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
82 additions
and
0 deletions
+82
-0
test_bench_gpu.py
tests/test_cli/test_bench_gpu.py
+82
-0
No files found.
tests/test_cli/test_bench_gpu.py
0 → 100644
View file @
1ba1f1aa
import
pytest
import
os
from
conf
import
conf
import
os
import
json
from
magic_pdf.pipe.UNIPipe
import
UNIPipe
from
magic_pdf.rw.DiskReaderWriter
import
DiskReaderWriter
from
lib
import
calculate_score
pdf_res_path
=
conf
.
conf
[
"pdf_res_path"
]
code_path
=
conf
.
conf
[
"code_path"
]
pdf_dev_path
=
conf
.
conf
[
"pdf_dev_path"
]
class
TestCliCuda
:
"""
test cli cuda
"""
def
test_pdf_sdk_cuda
(
self
):
"""
pdf sdk cuda
"""
clean_magicpdf
(
pdf_res_path
)
pdf_to_markdown
()
fr
=
open
(
os
.
path
.
join
(
pdf_dev_path
,
"result.json"
),
"r"
,
encoding
=
"utf-8"
)
lines
=
fr
.
readlines
()
last_line
=
lines
[
-
1
]
.
strip
()
last_score
=
json
.
loads
(
last_line
)
last_simscore
=
last_score
[
"average_sim_score"
]
last_editdistance
=
last_score
[
"average_edit_distance"
]
last_bleu
=
last_score
[
"average_bleu_score"
]
os
.
system
(
f
"python tests/test_cli/lib/pre_clean.py --tool_name mineru --download_dir {pdf_dev_path}"
)
now_score
=
get_score
()
print
(
"now_score:"
,
now_score
)
if
not
os
.
path
.
exists
(
os
.
path
.
join
(
pdf_dev_path
,
"ci"
)):
os
.
makedirs
(
os
.
path
.
join
(
pdf_dev_path
,
"ci"
),
exist_ok
=
True
)
fw
=
open
(
os
.
path
.
join
(
pdf_dev_path
,
"ci"
,
"result.json"
),
"w+"
,
encoding
=
"utf-8"
)
fw
.
write
(
json
.
dumps
(
now_score
)
+
"
\n
"
)
now_simscore
=
now_score
[
"average_sim_score"
]
now_editdistance
=
now_score
[
"average_edit_distance"
]
now_bleu
=
now_score
[
"average_bleu_score"
]
assert
last_simscore
<=
now_simscore
assert
last_editdistance
<=
now_editdistance
assert
last_bleu
<=
now_bleu
def
pdf_to_markdown
():
"""
pdf to md
"""
demo_names
=
list
()
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
)
for
pdf_file
in
os
.
listdir
(
pdf_path
):
if
pdf_file
.
endswith
(
'.pdf'
):
demo_names
.
append
(
pdf_file
.
split
(
'.'
)[
0
])
for
demo_name
in
demo_names
:
pdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"pdf"
,
f
"{demo_name}.pdf"
)
cmd
=
"magic-pdf pdf-command --pdf
%
s --inside_model true"
%
(
pdf_path
)
os
.
system
(
cmd
)
dir_path
=
os
.
path
.
join
(
pdf_dev_path
,
"mineru"
)
if
not
os
.
path
.
exists
(
dir_path
):
os
.
makedirs
(
dir_path
,
exist_ok
=
True
)
res_path
=
os
.
path
.
join
(
dir_path
,
f
"{demo_name}.md"
)
#src_path = os.path.join(pdf_res_path, "pdf", f"{demo_name}.pdf")
#shutil.copy(src_path, res_path)
def
get_score
():
"""
get score
"""
score
=
calculate_score
.
Scoring
(
os
.
path
.
join
(
pdf_dev_path
,
"result.json"
))
score
.
calculate_similarity_total
(
"mineru"
,
pdf_dev_path
)
res
=
score
.
summary_scores
()
return
res
def
clean_magicpdf
(
pdf_res_path
):
"""
clean magicpdf
"""
cmd
=
"rm -rf
%
s"
%
(
pdf_res_path
)
os
.
system
(
cmd
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment