Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
pdf-miner
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Qin Kaijie
pdf-miner
Commits
275feb5c
Commit
275feb5c
authored
Jun 26, 2024
by
quyuan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update ci
parent
715531c4
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
21 additions
and
16 deletions
+21
-16
benchmark.py
tools/benchmark.py
+21
-16
No files found.
tools/benchmark.py
View file @
275feb5c
...
@@ -2,31 +2,36 @@ import zipfile
...
@@ -2,31 +2,36 @@ import zipfile
import
os
import
os
import
shutil
import
shutil
code_path
=
os
.
environ
.
get
(
'GITHUB_WORKSPACE'
)
code_path
=
os
.
environ
.
get
(
'GITHUB_WORKSPACE'
)
#code_path = "/home/quyuan/actions-runner/_work/Magic-PDF/Magic-PDF.bk"
#评测集存放路径
pdf_dev_path
=
"/home/quyuan/data"
pdf_dev_path
=
"/home/quyuan/data"
#magicpdf跑测结果
pdf_res_path
=
"/home/quyuan/code/Magic-PDF/Magic-PDF/Magic-PDF/ci/magic-pdf"
pdf_res_path
=
"/home/quyuan/code/Magic-PDF/Magic-PDF/Magic-PDF/ci/magic-pdf"
file_types
=
[
"academic_literature"
,
"atlas"
,
"courseware"
,
"colorful_textbook"
,
"historical_documents"
,
"notes"
,
"ordinary_books"
,
"ordinary_exam_paper"
,
"ordinary_textbook"
,
"research_report"
,
"special_exam_paper"
]
#file_types = ["academic_literature"]
def
test_cli
():
def
test_cli
():
magicpdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"output"
)
magicpdf_path
=
os
.
path
.
join
(
pdf_dev_path
,
"output"
)
if
not
os
.
path
.
exists
(
magicpdf_path
):
os
.
makedirs
(
magicpdf_path
)
cmd
=
'cd
%
s && export PYTHONPATH=. && find
%
s -type f -name "*.pdf" | xargs -I{} python magic_pdf/cli/magicpdf.py pdf-command --pdf {}'
%
(
code_path
,
magicpdf_path
)
cmd
=
'cd
%
s && export PYTHONPATH=. && find
%
s -type f -name "*.pdf" | xargs -I{} python magic_pdf/cli/magicpdf.py pdf-command --pdf {}'
%
(
code_path
,
magicpdf_path
)
os
.
system
(
cmd
)
os
.
system
(
cmd
)
for
root
,
dirs
,
files
in
os
.
walk
(
pdf_res_path
):
for
annotaion_name
in
os
.
walk
(
os
.
path
.
join
(
pdf_dev_path
,
"ci"
))
:
for
magic_file
in
files
:
if
annotaion_name
.
endswith
(
'.md'
)
:
for
file_type
in
file_types
:
for
pdf_res_path
in
os
.
listdir
(
pdf_res_path
):
target_dir
=
os
.
path
.
join
(
pdf_dev_path
,
"ci"
,
file_type
,
"magicpdf"
)
if
annotaion_name
in
os
.
path
.
join
(
pdf_res_path
,
annotaion_name
,
"auto"
):
if
magic_file
.
endswith
(
".md"
)
and
magic_file
.
startswith
(
file_type
):
prefix
=
annotaion_name
.
split
(
'_'
)[
-
2
]
source_file
=
os
.
path
.
join
(
root
,
magic_file
)
if
not
os
.
path
.
exists
(
os
.
join
(
pdf_dev_path
,
prefix
)):
target_file
=
os
.
path
.
join
(
pdf_dev_path
,
"ci"
,
file_type
,
"magicpdf"
,
magic_file
)
#os.makedirs(os.path.join(pdf_dev_path, prefix))
if
not
os
.
path
.
exists
(
target_dir
):
shutil
.
copy
(
os
.
path
.
join
(
pdf_res_path
,
annotaion_name
.
strip
(
".md"
),
"auto"
,
annotaion_name
),
os
.
join
(
pdf_dev_path
,
"ci"
,
prefix
,
annotaion_name
))
os
.
makedirs
(
target_dir
)
shutil
.
copy
(
source_file
,
target_file
)
def
calculate_score
():
def
calculate_score
():
cmd
=
"cd
%
s && export PYTHONPATH=. && python tools/clean_photo.py --tool_name annotations --download_dir
%
s"
%
(
code_path
,
pdf_dev_path
)
data_path
=
os
.
path
.
join
(
pdf_dev_path
,
"ci"
)
cmd
=
"cd
%
s && export PYTHONPATH=. && python tools/clean_photo.py --tool_name annotations --download_dir
%
s"
%
(
code_path
,
data_path
)
os
.
system
(
cmd
)
os
.
system
(
cmd
)
cmd
=
"cd
%
s && export PYTHONPATH=. && python tools/clean_photo.py --tool_name magicpdf --download_dir
%
s"
%
(
code_path
,
pdf_dev
_path
)
cmd
=
"cd
%
s && export PYTHONPATH=. && python tools/clean_photo.py --tool_name magicpdf --download_dir
%
s"
%
(
code_path
,
data
_path
)
os
.
system
(
cmd
)
os
.
system
(
cmd
)
cmd
=
"cd
%
s && export PYTHONPATH=. && python tools/markdown_calculate.py --tool_name
pdf-command --download_dir
%
s --results
%
s"
%
(
code_path
,
pdf_dev_path
,
os
.
path
.
join
(
pdf_dev
_path
,
"result.json"
))
cmd
=
"cd
%
s && export PYTHONPATH=. && python tools/markdown_calculate.py --tool_name
magicpdf --download_dir
%
s --results
%
s"
%
(
code_path
,
data_path
,
os
.
path
.
join
(
data
_path
,
"result.json"
))
os
.
system
(
cmd
)
os
.
system
(
cmd
)
...
@@ -40,6 +45,6 @@ def extrat_zip(zip_file_path, extract_to_path):
...
@@ -40,6 +45,6 @@ def extrat_zip(zip_file_path, extract_to_path):
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
extrat_zip
(
os
.
path
.
join
(
pdf_dev_path
,
'output.zip'
),
os
.
path
.
join
(
pdf_dev_path
,
'datasets'
))
extrat_zip
(
os
.
path
.
join
(
pdf_dev_path
,
'output.zip'
),
os
.
path
.
join
(
pdf_dev_path
))
test_cli
()
test_cli
()
calculate_score
()
calculate_score
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment