Commit 229dc3c7 authored by quyuan's avatar quyuan

add tool link

parent d2e250ce
......@@ -47,8 +47,7 @@ jobs:
- name: get-benchmark-result
run: |
echo "start test"
cd tools && python text_badcase.py pdf_json_label_0306.json pdf_json_label_0229.json json_files.zip text_overall base_data_text.json --badcase_path text_badcase --s3_bucket_name llm-process-pperf --s3_file_directory qa-validate/pdf-datasets/badcase --AWS_ACCESS_KEY 7X9CWNHIVOHH3LXRD5WK --AWS_SECRET_KEY IHLyTsv7h4ArzReLWUGZNKvwqB7CMrRi6e7ZyUt0 --END_POINT_URL http://p-ceph-norm-inside.pjlab.org.cn:80
python ocr_badcase.py pdf_json_label_0306.json ocr_dataset.json json_files.zip ocr_overall base_data_ocr.json --badcase_path ocr_badcase --s3_bucket_name llm-process-pperf --s3_file_directory qa-validate/pdf-datasets/badcase --AWS_ACCESS_KEY 7X9CWNHIVOHH3LXRD5WK --AWS_SECRET_KEY IHLyTsv7h4ArzReLWUGZNKvwqB7CMrRi6e7ZyUt0 --END_POINT_URL http://p-ceph-norm-inside.pjlab.org.cn:80
cd tools && python benchmark.py
notify_to_feishu:
if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }}
......
......@@ -7,22 +7,24 @@ pdf_res_path = "/home/quyuan/code/Magic-PDF/Magic-PDF/Magic-PDF/ci/magic-pdf"
def test_cli():
cmd = 'cd %s && export PYTHONPATH=. && find %s -type f -name "*.pdf" | xargs -I{} python magic_pdf/cli/magicpdf.py pdf-command --pdf {}' % (code_path, pdf_dev_path)
os.system(cmd)
for annotaion_name in os.listdir(os.join(pdf_dev_path, "output")):
if not os.path.exists(os.path.join(pdf_dev_path, "output")):
os.makedirs(os.path.join(pdf_dev_path, "output"))
for annotaion_name in os.listdir(os.path.join(pdf_dev_path, "output")):
if annotaion_name.endswith('.pdf'):
for pdf_res_path in os.listdir(pdf_res_path):
if ".md" in os.join(pdf_res_path, annotaion_name, "auto"):
if ".md" in os.path.join(pdf_res_path, annotaion_name, "auto"):
prefix = annotaion_name.split('_')[-2]
if not os.path.exists(os.join(pdf_dev_path, prefix)):
os.makedirs(os.join(pdf_dev_path, prefix))
shutil.copy(os.join(pdf_res_path, annotaion_name, "auto", annotaion_name + ".md"), os.join(pdf_dev_path, prefix, annotaion_name + ".md"))
os.makedirs(os.path.join(pdf_dev_path, prefix))
shutil.copy(os.path.join(pdf_res_path, annotaion_name, "auto", annotaion_name + ".md"), os.join(pdf_dev_path, prefix, annotaion_name + ".md"))
def calculate_score():
cmd = "cd %s && export PYTHONPATH=. && python tools/clean_photo.py --tool_name annotations --download_dir %s" % pdf_dev_path
cmd = "cd %s && export PYTHONPATH=. && python tools/clean_photo.py --tool_name annotations --download_dir %s" % (code_path, pdf_dev_path)
os.system(cmd)
cmd = "cd %s && export PYTHONPATH=. && python tools/clean_photo.py --tool_name magicpdf --download_dir %s" % (pdf_dev_path)
cmd = "cd %s && export PYTHONPATH=. && python tools/clean_photo.py --tool_name magicpdf --download_dir %s" % (code_path, pdf_dev_path)
os.system(cmd)
cmd = "cd %s && export PYTHONPATH=. && python tools/markdown_calculate.py --tool_name pdf-command --download_dir %s --results %s" % (pdf_dev_path, os.join(pdf_dev_path, "result.json"))
cmd = "cd %s && export PYTHONPATH=. && python tools/markdown_calculate.py --tool_name pdf-command --download_dir %s --results %s" % (code_path, pdf_dev_path, os.path.join(pdf_dev_path, "result.json"))
os.system(cmd)
......@@ -36,6 +38,6 @@ def extrat_zip(zip_file_path, extract_to_path):
if __name__ == "__main__":
extrat_zip(os.join(pdf_dev_path, 'output.zip'), os.join(pdf_dev_path,'datasets'))
extrat_zip(os.path.join(pdf_dev_path, 'output.zip'), os.path.join(pdf_dev_path,'datasets'))
test_cli()
calculate_score()
\ No newline at end of file
calculate_score()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment