Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
pdf-miner
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Qin Kaijie
pdf-miner
Commits
474f16bc
Commit
474f16bc
authored
Apr 10, 2024
by
quyuan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
CI yaml
parent
2dcf477d
Changes
8
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
8363 additions
and
2 deletions
+8363
-2
badcase.json_2024-04-10_17-03-36.json
tools/badcase.json_2024-04-10_17-03-36.json
+2700
-0
badcase.json_2024-04-10_17-06-39.json
tools/badcase.json_2024-04-10_17-06-39.json
+2700
-0
badcase.json_2024-04-10_17-07-54.json
tools/badcase.json_2024-04-10_17-07-54.json
+2700
-0
base_data.json
tools/base_data.json
+1
-1
ocr_badcase.py
tools/ocr_badcase.py
+1
-1
overall.json_2024-04-10_17-03-36.json
tools/overall.json_2024-04-10_17-03-36.json
+87
-0
overall.json_2024-04-10_17-06-39.json
tools/overall.json_2024-04-10_17-06-39.json
+87
-0
overall.json_2024-04-10_17-07-54.json
tools/overall.json_2024-04-10_17-07-54.json
+87
-0
No files found.
tools/badcase.json_2024-04-10_17-03-36.json
0 → 100644
View file @
474f16bc
This diff is collapsed.
Click to expand it.
tools/badcase.json_2024-04-10_17-06-39.json
0 → 100644
View file @
474f16bc
This diff is collapsed.
Click to expand it.
tools/badcase.json_2024-04-10_17-07-54.json
0 → 100644
View file @
474f16bc
This diff is collapsed.
Click to expand it.
tools/base_data.json
View file @
474f16bc
...
...
@@ -3,7 +3,7 @@
"precision"
:
1.0
,
"recall"
:
1.0
,
"f1_score"
:
1.0
,
"pdf间的平均编辑距离"
:
13
3
.10256410256412
,
"pdf间的平均编辑距离"
:
13
1
.10256410256412
,
"pdf间的平均bleu"
:
0.28838311595434046
,
"分段准确率"
:
0.07220216606498195
,
"行内公式准确率"
:
{
...
...
tools/ocr_badcase.py
View file @
474f16bc
...
...
@@ -843,8 +843,8 @@ def main(standard_file, test_file, zip_file, badcase_path, overall_path,base_dat
save_results
(
result_dict
,
overall_report_dict
,
badcase_file
,
overall_file
)
result
=
compare_edit_distance
(
base_data_path
,
overall_report_dict
)
print
(
result
)
assert
result
==
1
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
(
description
=
"主函数,执行整个评估流程。"
)
...
...
tools/overall.json_2024-04-10_17-03-36.json
0 → 100644
View file @
474f16bc
{
"accuracy"
:
1.0
,
"precision"
:
1.0
,
"recall"
:
1.0
,
"f1_score"
:
1.0
,
"pdf间的平均编辑距离"
:
10.10256410256412
,
"pdf间的平均bleu"
:
0.28838311595434046
,
"分段准确率"
:
0.07220216606498195
,
"行内公式准确率"
:
{
"accuracy"
:
0.004835727492533068
,
"precision"
:
0.008790072388831437
,
"recall"
:
0.010634970284641852
,
"f1_score"
:
0.009624911535739562
},
"行内公式编辑距离"
:
1.6176470588235294
,
"行内公式bleu"
:
0.17154724654721457
,
"行间公式准确率"
:
{
"accuracy"
:
0.08490566037735849
,
"precision"
:
0.1836734693877551
,
"recall"
:
0.13636363636363635
,
"f1_score"
:
0.1565217391304348
},
"行间公式编辑距离"
:
113.22222222222223
,
"行间公式bleu"
:
0.2531053359913409
,
"丢弃文本准确率"
:
{
"accuracy"
:
0.00035398230088495576
,
"precision"
:
0.0006389776357827476
,
"recall"
:
0.0007930214115781126
,
"f1_score"
:
0.0007077140835102619
},
"丢弃文本标签准确率"
:
{
"rotate"
:
{
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1-score"
:
0.0
,
"support"
:
63
},
"color_background_header_txt_block"
:
{
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1-score"
:
0.0
,
"support"
:
41
},
"footnote"
:
{
"precision"
:
1.0
,
"recall"
:
0.009708737864077669
,
"f1-score"
:
0.01923076923076923
,
"support"
:
103
},
"header"
:
{
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1-score"
:
0.0
,
"support"
:
4
},
"on-image"
:
{
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1-score"
:
0.0
,
"support"
:
380
},
"on-table"
:
{
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1-score"
:
0.0
,
"support"
:
665
},
"micro avg"
:
{
"precision"
:
1.0
,
"recall"
:
0.0007961783439490446
,
"f1-score"
:
0.001591089896579157
,
"support"
:
1256
}
},
"丢弃图片准确率"
:
{
"accuracy"
:
0.0
,
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1_score"
:
0.0
},
"丢弃表格准确率"
:
{
"accuracy"
:
0.0
,
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1_score"
:
0.0
}
}
\ No newline at end of file
tools/overall.json_2024-04-10_17-06-39.json
0 → 100644
View file @
474f16bc
{
"accuracy"
:
1.0
,
"precision"
:
1.0
,
"recall"
:
1.0
,
"f1_score"
:
1.0
,
"pdf间的平均编辑距离"
:
133.10256410256412
,
"pdf间的平均bleu"
:
0.28838311595434046
,
"分段准确率"
:
0.07220216606498195
,
"行内公式准确率"
:
{
"accuracy"
:
0.004835727492533068
,
"precision"
:
0.008790072388831437
,
"recall"
:
0.010634970284641852
,
"f1_score"
:
0.009624911535739562
},
"行内公式编辑距离"
:
1.6176470588235294
,
"行内公式bleu"
:
0.17154724654721457
,
"行间公式准确率"
:
{
"accuracy"
:
0.08490566037735849
,
"precision"
:
0.1836734693877551
,
"recall"
:
0.13636363636363635
,
"f1_score"
:
0.1565217391304348
},
"行间公式编辑距离"
:
113.22222222222223
,
"行间公式bleu"
:
0.2531053359913409
,
"丢弃文本准确率"
:
{
"accuracy"
:
0.00035398230088495576
,
"precision"
:
0.0006389776357827476
,
"recall"
:
0.0007930214115781126
,
"f1_score"
:
0.0007077140835102619
},
"丢弃文本标签准确率"
:
{
"rotate"
:
{
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1-score"
:
0.0
,
"support"
:
63
},
"footnote"
:
{
"precision"
:
1.0
,
"recall"
:
0.009708737864077669
,
"f1-score"
:
0.01923076923076923
,
"support"
:
103
},
"on-image"
:
{
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1-score"
:
0.0
,
"support"
:
380
},
"color_background_header_txt_block"
:
{
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1-score"
:
0.0
,
"support"
:
41
},
"on-table"
:
{
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1-score"
:
0.0
,
"support"
:
665
},
"header"
:
{
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1-score"
:
0.0
,
"support"
:
4
},
"micro avg"
:
{
"precision"
:
1.0
,
"recall"
:
0.0007961783439490446
,
"f1-score"
:
0.001591089896579157
,
"support"
:
1256
}
},
"丢弃图片准确率"
:
{
"accuracy"
:
0.0
,
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1_score"
:
0.0
},
"丢弃表格准确率"
:
{
"accuracy"
:
0.0
,
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1_score"
:
0.0
}
}
\ No newline at end of file
tools/overall.json_2024-04-10_17-07-54.json
0 → 100644
View file @
474f16bc
{
"accuracy"
:
1.0
,
"precision"
:
1.0
,
"recall"
:
1.0
,
"f1_score"
:
1.0
,
"pdf间的平均编辑距离"
:
133.10256410256412
,
"pdf间的平均bleu"
:
0.28838311595434046
,
"分段准确率"
:
0.07220216606498195
,
"行内公式准确率"
:
{
"accuracy"
:
0.004835727492533068
,
"precision"
:
0.008790072388831437
,
"recall"
:
0.010634970284641852
,
"f1_score"
:
0.009624911535739562
},
"行内公式编辑距离"
:
1.6176470588235294
,
"行内公式bleu"
:
0.17154724654721457
,
"行间公式准确率"
:
{
"accuracy"
:
0.08490566037735849
,
"precision"
:
0.1836734693877551
,
"recall"
:
0.13636363636363635
,
"f1_score"
:
0.1565217391304348
},
"行间公式编辑距离"
:
113.22222222222223
,
"行间公式bleu"
:
0.2531053359913409
,
"丢弃文本准确率"
:
{
"accuracy"
:
0.00035398230088495576
,
"precision"
:
0.0006389776357827476
,
"recall"
:
0.0007930214115781126
,
"f1_score"
:
0.0007077140835102619
},
"丢弃文本标签准确率"
:
{
"header"
:
{
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1-score"
:
0.0
,
"support"
:
4
},
"on-image"
:
{
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1-score"
:
0.0
,
"support"
:
380
},
"rotate"
:
{
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1-score"
:
0.0
,
"support"
:
63
},
"color_background_header_txt_block"
:
{
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1-score"
:
0.0
,
"support"
:
41
},
"on-table"
:
{
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1-score"
:
0.0
,
"support"
:
665
},
"footnote"
:
{
"precision"
:
1.0
,
"recall"
:
0.009708737864077669
,
"f1-score"
:
0.01923076923076923
,
"support"
:
103
},
"micro avg"
:
{
"precision"
:
1.0
,
"recall"
:
0.0007961783439490446
,
"f1-score"
:
0.001591089896579157
,
"support"
:
1256
}
},
"丢弃图片准确率"
:
{
"accuracy"
:
0.0
,
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1_score"
:
0.0
},
"丢弃表格准确率"
:
{
"accuracy"
:
0.0
,
"precision"
:
0.0
,
"recall"
:
0.0
,
"f1_score"
:
0.0
}
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment