Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
pdf-miner
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Qin Kaijie
pdf-miner
Commits
b5b58d64
Unverified
Commit
b5b58d64
authored
Apr 10, 2024
by
Kaiwen Liu
Committed by
GitHub
Apr 10, 2024
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'magicpdf:master' into master
parents
6bc9df82
db54796a
Changes
3
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
269 additions
and
16 deletions
+269
-16
benchmark.yml
.github/workflows/benchmark.yml
+5
-3
base_data.json
tools/base_data.json
+87
-0
ocr_badcase.py
tools/ocr_badcase.py
+177
-13
No files found.
.github/workflows/benchmark.yml
View file @
b5b58d64
...
@@ -18,14 +18,16 @@ jobs:
...
@@ -18,14 +18,16 @@ jobs:
fail-fast
:
true
fail-fast
:
true
steps
:
steps
:
-
name
:
config-net
run
:
|
export http_proxy=http://bigdata_open_proxy:H89k5qwQRDYfz@10.140.90.20:10811
export https_proxy=http://bigdata_open_proxy:H89k5qwQRDYfz@10.140.90.20:10811
-
name
:
PDF benchmark
-
name
:
PDF benchmark
uses
:
actions/checkout@v3
uses
:
actions/checkout@v3
with
:
with
:
fetch-depth
:
2
fetch-depth
:
2
-
name
:
check-requirements
-
name
:
check-requirements
run
:
|
run
:
|
export http_proxy=http://bigdata_open_proxy:H89k5qwQRDYfz@10.140.90.20:10811
export https_proxy=http://bigdata_open_proxy:H89k5qwQRDYfz@10.140.90.20:10811
changed_files=$(git diff --name-only -r HEAD~1 HEAD)
changed_files=$(git diff --name-only -r HEAD~1 HEAD)
echo $changed_files
echo $changed_files
if [[ $changed_files =~ "requirements.txt" ]]; then
if [[ $changed_files =~ "requirements.txt" ]]; then
...
@@ -36,7 +38,7 @@ jobs:
...
@@ -36,7 +38,7 @@ jobs:
-
name
:
benchmark
-
name
:
benchmark
run
:
|
run
:
|
echo "start test"
echo "start test"
cd tools && python ocr_badcase.py pdf_json_label_0306.json ocr_dataset.json json_files.zip
output
.json
cd tools && python ocr_badcase.py pdf_json_label_0306.json ocr_dataset.json json_files.zip
badcase.json overall.json base_data
.json
notify_to_feishu
:
notify_to_feishu
:
if
:
${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }}
if
:
${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }}
needs
:
[
pdf-test
]
needs
:
[
pdf-test
]
...
...
tools/base_data.json
0 → 100644
View file @
b5b58d64
{
"accuracy"
:
1.0
,
"precision"
:
1.0
,
"recall"
:
1.0
,
"f1_score"
:
1.0
,
"pdf间的平均编辑距离"
:
133.10256410256412
,
"pdf间的平均bleu"
:
0.28838311595434046
,
"分段准确率"
:
0.07220216606498195
,
"行内公式准确率"
:
{
"accuracy"
:
0.004835727492533068
,
"precision"
:
0.008790072388831437
,
"recall"
:
0.010634970284641852
,
"f1_score"
:
0.009624911535739562
},
"行内公式编辑距离"
:
1.6176470588235294
,
"行内公式bleu"
:
0.17154724654721457
,
"行间公式准确率"
:
{
"accuracy"
:
0.08490566037735849
,
"precision"
:
0.1836734693877551
,
"recall"
:
0.13636363636363635
,
"f1_score"
:
0.1565217391304348
},
"行间公式编辑距离"
:
113.22222222222223
,
"行间公式bleu"
:
0.2531053359913409
,
"丢弃文本准确率"
:
{
"accuracy"
:
0.00035398230088495576
,
"precision"
:
0.0006389776357827476
,
"recall"
:
0.0007930214115781126
,
"f1_score"
:
0.0007077140835102619
},
"丢弃文本标签准确率"
:
{
"color_background_header_txt_block"
:
{
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1-score"
:
0.0
,
"support"
:
41.0
},
"header"
:
{
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1-score"
:
0.0
,
"support"
:
4.0
},
"footnote"
:
{
"precision"
:
1.0
,
"recall"
:
0.009708737864077669
,
"f1-score"
:
0.019230769230769232
,
"support"
:
103.0
},
"on-table"
:
{
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1-score"
:
0.0
,
"support"
:
665.0
},
"rotate"
:
{
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1-score"
:
0.0
,
"support"
:
63.0
},
"on-image"
:
{
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1-score"
:
0.0
,
"support"
:
380.0
},
"micro avg"
:
{
"precision"
:
1.0
,
"recall"
:
0.0007961783439490446
,
"f1-score"
:
0.0015910898965791568
,
"support"
:
1256.0
}
},
"丢弃图片准确率"
:
{
"accuracy"
:
0.0
,
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1_score"
:
0.0
},
"丢弃表格准确率"
:
{
"accuracy"
:
0.0
,
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1_score"
:
0.0
}
}
\ No newline at end of file
tools/ocr_badcase.py
View file @
b5b58d64
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment