Unverified Commit 0aa45778 authored by yyy's avatar yyy Committed by GitHub

feat: add test case (#645)

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

* feat: add table case

---------
Co-authored-by: 's avatarquyuan <quyuan@pjlab.org>
parent 24c143fe
......@@ -10,7 +10,6 @@ on:
paths-ignore:
- "cmds/**"
- "**.md"
- "**.yml"
pull_request:
branches:
- "master"
......@@ -18,12 +17,11 @@ on:
paths-ignore:
- "cmds/**"
- "**.md"
- "**.yml"
workflow_dispatch:
jobs:
cli-test:
runs-on: pdf
timeout-minutes: 120
timeout-minutes: 240
strategy:
fail-fast: true
......@@ -33,17 +31,16 @@ jobs:
with:
fetch-depth: 2
- name: install
run: |
echo $GITHUB_WORKSPACE && sh tests/retry_env.sh
- name: unit test
- name: install&test
run: |
source activate mineru
conda env list
pip show coverage
# cd $GITHUB_WORKSPACE && sh tests/retry_env.sh
cd $GITHUB_WORKSPACE && python tests/clean_coverage.py
cd $GITHUB_WORKSPACE && export PYTHONPATH=. && coverage run -m pytest tests/unittest --cov=magic_pdf/ --cov-report term-missing --cov-report html
cd $GITHUB_WORKSPACE && coverage run -m pytest tests/unittest/ --cov=magic_pdf/ --cov-report html --cov-report term-missing
cd $GITHUB_WORKSPACE && python tests/get_coverage.py
- name: cli test
run: |
source ~/.bashrc && cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_cli.py
cd $GITHUB_WORKSPACE && pytest -m P0 -s -v tests/test_cli/test_cli_sdk.py
notify_to_feishu:
if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }}
......
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
name: mineru
on:
schedule:
- cron: '0 22 * * *' # 每天晚上 10 点执行
jobs:
cli-test:
runs-on: pdf
timeout-minutes: 240
strategy:
fail-fast: true
steps:
- name: PDF cli
uses: actions/checkout@v3
with:
fetch-depth: 2
- name: install&test
run: |
source activate mineru
conda env list
pip show coverage
# cd $GITHUB_WORKSPACE && sh tests/retry_env.sh
cd $GITHUB_WORKSPACE && python tests/clean_coverage.py
cd $GITHUB_WORKSPACE && coverage run -m pytest tests/unittest/ --cov=magic_pdf/ --cov-report html --cov-report term-missing
cd $GITHUB_WORKSPACE && python tests/get_coverage.py
cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_cli_sdk.py
notify_to_feishu:
if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }}
needs: cli-test
runs-on: pdf
steps:
- name: get_actor
run: |
metion_list="dt-yy"
echo $GITHUB_ACTOR
if [[ $GITHUB_ACTOR == "drunkpig" ]]; then
metion_list="xuchao"
elif [[ $GITHUB_ACTOR == "myhloli" ]]; then
metion_list="zhaoxiaomeng"
elif [[ $GITHUB_ACTOR == "icecraft" ]]; then
metion_list="xurui1"
fi
echo $metion_list
echo "METIONS=$metion_list" >> "$GITHUB_ENV"
echo ${{ env.METIONS }}
- name: notify
run: |
echo ${{ secrets.USER_ID }}
curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"'${{ github.repository }}' GitHubAction Failed","content":[[{"tag":"text","text":""},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}' ${{ secrets.WEBHOOK_URL }}
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
name: mineru
on:
push:
branches:
- "master"
- "dev"
paths-ignore:
- "cmds/**"
- "**.md"
workflow_dispatch:
jobs:
cli-test:
runs-on: pdf
timeout-minutes: 240
strategy:
fail-fast: true
steps:
- name: PDF cli
uses: actions/checkout@v3
with:
fetch-depth: 2
- name: install&test
run: |
source activate mineru
conda env list
pip show coverage
# cd $GITHUB_WORKSPACE && sh tests/retry_env.sh
cd $GITHUB_WORKSPACE && python tests/clean_coverage.py
cd $GITHUB_WORKSPACE && coverage run -m pytest tests/unittest/ --cov=magic_pdf/ --cov-report html --cov-report term-missing
cd $GITHUB_WORKSPACE && python tests/get_coverage.py
cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_cli_sdk.py
notify_to_feishu:
if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }}
needs: cli-test
runs-on: pdf
steps:
- name: get_actor
run: |
metion_list="dt-yy"
echo $GITHUB_ACTOR
if [[ $GITHUB_ACTOR == "drunkpig" ]]; then
metion_list="xuchao"
elif [[ $GITHUB_ACTOR == "myhloli" ]]; then
metion_list="zhaoxiaomeng"
elif [[ $GITHUB_ACTOR == "icecraft" ]]; then
metion_list="xurui1"
fi
echo $metion_list
echo "METIONS=$metion_list" >> "$GITHUB_ENV"
echo ${{ env.METIONS }}
- name: notify
run: |
echo ${{ secrets.USER_ID }}
curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"'${{ github.repository }}' GitHubAction Failed","content":[[{"tag":"text","text":""},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}' ${{ secrets.WEBHOOK_URL }}
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
name: update-base
on:
push:
tags:
- '*released'
workflow_dispatch:
jobs:
pdf-test:
runs-on: pdf
timeout-minutes: 40
steps:
- name: update-base
uses: actions/checkout@v3
- name: start-update
run: |
echo "start test"
*.tar
*.tar.gz
*.zip
venv*/
envs/
slurm_logs/
......@@ -31,7 +32,7 @@ tmp
.vscode
.vscode/
ocr_demo
.coveragerc
/app/common/__init__.py
/magic_pdf/config/__init__.py
source.dev.env
......
......@@ -17,3 +17,4 @@ pyopenssl==24.0.0
struct-eqtable==0.1.0
pytest-cov
beautifulsoup4
coverage
\ No newline at end of file
......@@ -21,4 +21,5 @@ def delete_file(path):
print(f"Error deleting directory '{path}': {e}")
if __name__ == "__main__":
delete_file("htmlcov")
\ No newline at end of file
delete_file("htmlcov/")
#delete_file(".coverage")
#!/bin/bash
# 定义最大重试次数
max_retries=5
retry_count=0
while true; do
# prepare env
source activate MinerU
pip install -r requirements-qa.txt
pip uninstall magic-pdf
pip install -U magic-pdf[full] --extra-index-url https://wheels.myhloli.com
pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
#python -m pip install -r requirements-qa.txt
python -m pip install -U magic-pdf[full] --extra-index-url https://wheels.myhloli.com -i https://pypi.tuna.tsinghua.edu.cn/simple
python -m pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
exit_code=$?
if [ $exit_code -eq 0 ]; then
echo "test.sh 成功执行!"
......@@ -22,6 +19,6 @@ while true; do
exit 1
fi
echo "test.sh 执行失败 (退出码: $exit_code)。尝试第 $retry_count 次重试..."
sleep 5 # 等待 5 秒后重试
sleep 5
fi
done
......@@ -4,5 +4,5 @@ conf = {
"pdf_dev_path" : os.environ.get('GITHUB_WORKSPACE') + "/tests/test_cli/pdf_dev",
"pdf_res_path": "/tmp/magic-pdf",
"jsonl_path": "s3://llm-qatest-pnorm/mineru/test/line1.jsonl",
"s3_pdf_path": "s3://llm-qatest-pnorm/mineru/test/test.pdf"
"s3_pdf_path": "s3://llm-qatest-pnorm/mineru/test/test_rearch_report.pdf"
}
\ No newline at end of file
import pytest
import torch
def clear_gpu_memory():
'''
clear GPU memory
'''
torch.cuda.empty_cache()
print("GPU memory cleared.")
@pytest.hookimpl(tryfirst=True, hookwrapper=True)
def pytest_runtest_teardown(item, nextitem):
'''
clear GPU memory after each test
'''
yield
clear_gpu_memory()
\ No newline at end of file
"""common definitions."""
import os
import shutil
import re
import json
def check_shell(cmd):
"""shell successful."""
res = os.system(cmd)
assert res == 0
def update_config_file(file_path, key, value):
"""update config file."""
with open(file_path, 'r', encoding="utf-8") as f:
config = json.loads(f.read())
config[key] = value
with open(file_path, 'w', encoding="utf-8") as f:
f.write(json.dumps(config))
def cli_count_folders_and_check_contents(file_path):
"""" count cli files."""
......@@ -41,3 +48,32 @@ def delete_file(path):
print(f"Directory '{path}' and its contents deleted.")
except TypeError as e:
print(f"Error deleting directory '{path}': {e}")
def check_latex_table_exists(file_path):
"""check latex table exists."""
pattern = r'\\begin\{tabular\}.*?\\end\{tabular\}'
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
matches = re.findall(pattern, content, re.DOTALL)
return len(matches) > 0
def check_html_table_exists(file_path):
"""check html table exists."""
pattern = r'<table.*?>.*?</table>'
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
matches = re.findall(pattern, content, re.DOTALL)
return len(matches) > 0
def check_close_tables(file_path):
"""delete no tables."""
latex_pattern = r'\\begin\{tabular\}.*?\\end\{tabular\}'
html_pattern = r'<table.*?>.*?</table>'
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
latex_matches = re.findall(latex_pattern, content, re.DOTALL)
html_matches = re.findall(html_pattern, content, re.DOTALL)
if len(latex_matches) == 0 and len(html_matches) == 0:
return True
else:
return False
\ No newline at end of file
"""test cli and sdk."""
import logging
import os
import pytest
from conf import conf
from lib import common
import time
import magic_pdf.model as model_config
from magic_pdf.pipe.UNIPipe import UNIPipe
from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
......@@ -57,6 +56,7 @@ class TestCli:
@pytest.mark.P0
def test_pdf_ocr_sdk(self):
"""pdf sdk ocr test."""
time.sleep(2)
demo_names = list()
pdf_path = os.path.join(pdf_dev_path, 'pdf')
for pdf_file in os.listdir(pdf_path):
......@@ -92,6 +92,7 @@ class TestCli:
@pytest.mark.P0
def test_pdf_txt_sdk(self):
"""pdf sdk txt test."""
time.sleep(2)
demo_names = list()
pdf_path = os.path.join(pdf_dev_path, 'pdf')
for pdf_file in os.listdir(pdf_path):
......@@ -99,7 +100,6 @@ class TestCli:
demo_names.append(pdf_file.split('.')[0])
for demo_name in demo_names:
pdf_path = os.path.join(pdf_dev_path, 'pdf', f'{demo_name}.pdf')
print(pdf_path)
pdf_bytes = open(pdf_path, 'rb').read()
local_image_dir = os.path.join(pdf_dev_path, 'pdf', 'images')
image_dir = str(os.path.basename(local_image_dir))
......@@ -127,6 +127,7 @@ class TestCli:
@pytest.mark.P0
def test_pdf_cli_auto(self):
"""magic_pdf cli test auto."""
time.sleep(2)
demo_names = []
pdf_path = os.path.join(pdf_dev_path, 'pdf')
for pdf_file in os.listdir(pdf_path):
......@@ -143,8 +144,9 @@ class TestCli:
os.path.join(res_path, demo_name, 'auto'))
@pytest.mark.P0
def test_pdf_clit_txt(self):
def test_pdf_cli_txt(self):
"""magic_pdf cli test txt."""
time.sleep(2)
demo_names = []
pdf_path = os.path.join(pdf_dev_path, 'pdf')
for pdf_file in os.listdir(pdf_path):
......@@ -161,8 +163,9 @@ class TestCli:
os.path.join(res_path, demo_name, 'txt'))
@pytest.mark.P0
def test_pdf_clit_ocr(self):
def test_pdf_cli_ocr(self):
"""magic_pdf cli test ocr."""
time.sleep(2)
demo_names = []
pdf_path = os.path.join(pdf_dev_path, 'pdf')
for pdf_file in os.listdir(pdf_path):
......@@ -178,84 +181,101 @@ class TestCli:
common.cli_count_folders_and_check_contents(
os.path.join(res_path, demo_name, 'ocr'))
@pytest.mark.skip(reason='out-of-date api')
@pytest.mark.P1
def test_pdf_dev_cli_local_jsonl_txt(self):
"""magic_pdf_dev cli local txt."""
time.sleep(2)
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, "txt")
logging.info(cmd)
os.system(cmd)
@pytest.mark.skip(reason='out-of-date api')
@pytest.mark.P1
def test_pdf_dev_cli_local_jsonl_ocr(self):
"""magic_pdf_dev cli local ocr."""
time.sleep(2)
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, 'ocr')
logging.info(cmd)
os.system(cmd)
@pytest.mark.skip(reason='out-of-date api')
@pytest.mark.P1
def test_pdf_dev_cli_local_jsonl_auto(self):
"""magic_pdf_dev cli local auto."""
time.sleep(2)
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, 'auto')
logging.info(cmd)
os.system(cmd)
@pytest.mark.skip(reason='out-of-date api')
@pytest.mark.P1
def test_pdf_dev_cli_s3_jsonl_txt(self):
"""magic_pdf_dev cli s3 txt."""
time.sleep(2)
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, "txt")
logging.info(cmd)
os.system(cmd)
@pytest.mark.skip(reason='out-of-date api')
@pytest.mark.P1
def test_pdf_dev_cli_s3_jsonl_ocr(self):
"""magic_pdf_dev cli s3 ocr."""
time.sleep(2)
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, 'ocr')
logging.info(cmd)
os.system(cmd)
@pytest.mark.skip(reason='out-of-date api')
@pytest.mark.P1
def test_pdf_dev_cli_s3_jsonl_auto(self):
"""magic_pdf_dev cli s3 auto."""
time.sleep(2)
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, 'auto')
logging.info(cmd)
os.system(cmd)
@pytest.mark.P1
def test_pdf_dev_cli_pdf_json_auto(self):
"""magic_pdf_dev cli pdf+json auto."""
time.sleep(2)
json_path = os.path.join(pdf_dev_path, 'test_model.json')
pdf_path = os.path.join(pdf_dev_path, 'pdf', 'research_report_1f978cd81fb7260c8f7644039ec2c054.pdf')
pdf_path = os.path.join(pdf_dev_path, 'pdf', 'test_rearch_report.pdf')
cmd = 'magic-pdf-dev --pdf %s --json %s --method %s' % (pdf_path, json_path, 'auto')
logging.info(cmd)
os.system(cmd)
@pytest.mark.skip(reason='out-of-date api')
@pytest.mark.P1
def test_pdf_dev_cli_pdf_json_ocr(self):
"""magic_pdf_dev cli pdf+json ocr."""
time.sleep(2)
json_path = os.path.join(pdf_dev_path, 'test_model.json')
pdf_path = os.path.join(pdf_dev_path, 'pdf', 'research_report_1f978cd81fb7260c8f7644039ec2c054.pdf')
pdf_path = os.path.join(pdf_dev_path, 'pdf', 'test_rearch_report.pdf')
cmd = 'magic-pdf-dev --pdf %s --json %s --method %s' % (pdf_path, json_path, 'auto')
logging.info(cmd)
os.system(cmd)
@pytest.mark.P1
def test_s3_sdk_suto(self):
pdf_ak = os.environ.get('pdf_ak', "")
"""
test s3 sdk auto.
"""
time.sleep(2)
pdf_ak = os.getenv('pdf_ak')
print (pdf_ak)
pdf_sk = os.environ.get('pdf_sk', "")
pdf_bucket = os.environ.get('bucket', "")
pdf_endpoint = os.environ.get('pdf_endpoint', "")
s3_pdf_path = conf.conf["s3_pdf_path"]
image_dir = "s3://" + pdf_bucket + "/mineru/test/test.md"
image_dir = "s3://" + pdf_bucket + "/mineru/test/output"
print (image_dir)
s3pdf_cli = S3ReaderWriter(pdf_ak, pdf_sk, pdf_endpoint)
s3image_cli = S3ReaderWriter(pdf_ak, pdf_sk, pdf_endpoint, parent_path=image_dir)
pdf_bytes = s3pdf_cli.read(s3_pdf_path, mode=s3pdf_cli.MODE_BIN)
......@@ -267,6 +287,60 @@ class TestCli:
md_content = pipe.pipe_mk_markdown(image_dir, drop_mode="none")
assert len(md_content) > 0
@pytest.mark.P1
def test_local_magic_pdf_open_st_table(self):
"""magic pdf cli open st table."""
time.sleep(2)
pre_cmd = "cp ~/magic_pdf_st.json ~/magic-pdf.json"
print (pre_cmd)
os.system(pre_cmd)
pdf_path = os.path.join(pdf_dev_path, "pdf", "test_rearch_report.pdf")
common.delete_file(pdf_res_path)
cli_cmd = "magic-pdf -p %s -o %s" % (pdf_path, pdf_res_path)
os.system(cli_cmd)
res = common.check_latex_table_exists(os.path.join(pdf_res_path, "test_rearch_report", "auto", "test_rearch_report.md"))
assert res is True
@pytest.mark.P1
def test_local_magic_pdf_open_html_table(self):
"""magic pdf cli open html table."""
time.sleep(2)
pre_cmd = "cp ~/magic_pdf_html.json ~/magic-pdf.json"
os.system(pre_cmd)
pdf_path = os.path.join(pdf_dev_path, "pdf", "test_rearch_report.pdf")
common.delete_file(pdf_res_path)
cli_cmd = "magic-pdf -p %s -o %s" % (pdf_path, pdf_res_path)
os.system(cli_cmd)
res = common.check_html_table_exists(os.path.join(pdf_res_path, "test_rearch_report", "auto", "test_rearch_report.md"))
assert res is True
@pytest.mark.P1
def test_magic_pdf_close_html_table_cpu(self):
"""magic pdf cli close html table cpu mode."""
time.sleep(2)
pre_cmd = "cp ~/magic_pdf_html_table_cpu.json ~/magic-pdf.json"
os.system(pre_cmd)
pdf_path = os.path.join(pdf_dev_path, "pdf", "test_rearch_report.pdf")
common.delete_file(pdf_res_path)
cli_cmd = "magic-pdf -p %s -o %s" % (pdf_path, pdf_res_path)
os.system(cli_cmd)
res = common.check_html_table_exists(os.path.join(pdf_res_path, "test_rearch_report", "auto", "test_rearch_report.md"))
assert res is True
@pytest.mark.P1
def test_local_magic_pdf_close_html_table(self):
"""magic pdf cli close table."""
time.sleep(2)
pre_cmd = "cp ~/magic_pdf_close_table.json ~/magic-pdf.json"
os.system(pre_cmd)
pdf_path = os.path.join(pdf_dev_path, "pdf", "test_rearch_report.pdf")
common.delete_file(pdf_res_path)
cli_cmd = "magic-pdf -p %s -o %s" % (pdf_path, pdf_res_path)
os.system(cli_cmd)
res = common.check_close_tables(os.path.join(pdf_res_path, "test_rearch_report", "auto", "test_rearch_report.md"))
assert res is True
if __name__ == '__main__':
pytest.main()
"""
test performance
"""
import os
import shutil
import json
from lib import calculate_score
import pytest
from conf import conf
code_path = os.environ.get('GITHUB_WORKSPACE')
pdf_dev_path = conf.conf["pdf_dev_path"]
pdf_res_path = conf.conf["pdf_res_path"]
class TestTable():
"""
test table
"""
def test_perf_close_table(self):
"""
test perf when close table
"""
def get_score():
"""
get score
"""
score = calculate_score.Scoring(os.path.join(pdf_dev_path, "result.json"))
score.calculate_similarity_total("mineru", pdf_dev_path)
res = score.summary_scores()
return res
"""
test table case
"""
import os
import shutil
import json
from lib import calculate_score
import pytest
from conf import conf
code_path = os.environ.get('GITHUB_WORKSPACE')
pdf_dev_path = conf.conf["pdf_dev_path"]
pdf_res_path = conf.conf["pdf_res_path"]
class TestTable():
"""
test table
"""
def test_paddle_table_master_cuda(self):
"""
select table: paddle table master,mode is cuda
"""
def test_paddle_table_master_cpu(self):
"""
select table: paddle table master, mode is cpu
"""
def test_st_table_cuda(self):
"""
select table: ST, mode is cuda
"""
def test_st_table_cpu(self):
"""
select table: ST, mode is cpu
"""
def test_close_table_cuda(self):
"""
close table, mode is cuda
"""
def get_score():
"""
get score
"""
score = calculate_score.Scoring(os.path.join(pdf_dev_path, "result.json"))
score.calculate_similarity_total("mineru", pdf_dev_path)
res = score.summary_scores()
return res
......@@ -7,7 +7,7 @@ class TestppTableModel:
img = Image.open("tests/unittest/test_table/assets/table.jpg")
# 修改table模型路径
config = {"device": "cuda",
"model_dir": "/home/quyuan/PDF-Extract-Kit/models/TabRec/TableMaster"}
"model_dir": "/home/quyuan/.cache/modelscope/hub/opendatalab/PDF-Extract-Kit/models/TabRec/TableMaster"}
table_model = ppTableModel(config)
res = table_model.img2html(img)
true_value = """<td><table border="1"><thead><tr><td><b>Methods</b></td><td><b>R</b></td><td><b>P</b></td><td><b>F</b></td><td><b>FPS</b></td></tr></thead><tbody><tr><td>SegLink [26]</td><td>70.0</td><td>86.0</td><td>77.0</td><td>8.9</td></tr><tr><td>PixelLink [4]</td><td>73.2</td><td>83.0</td><td>77.8</td><td>-</td></tr><tr><td>TextSnake [18]</td><td>73.9</td><td>83.2</td><td>78.3</td><td>1.1</td></tr><tr><td>TextField [37]</td><td>75.9</td><td>87.4</td><td>81.3</td><td>5.2 </td></tr><tr><td>MSR[38]</td><td>76.7</td><td>87.4</td><td>81.7</td><td>-</td></tr><tr><td>FTSN[3]</td><td>77.1</td><td>87.6</td><td>82.0</td><td>-</td></tr><tr><td>LSE[30]</td><td>81.7</td><td>84.2</td><td>82.9</td><td>-</td></tr><tr><td>CRAFT [2]</td><td>78.2</td><td>88.2</td><td>82.9</td><td>8.6</td></tr><tr><td>MCN [16]</td><td>79</td><td>88.</td><td>83</td><td>-</td></tr><tr><td>ATRR[35]</td><td>82.1</td><td>85.2</td><td>83.6</td><td>-</td></tr><tr><td>PAN [34]</td><td>83.8</td><td>84.4</td><td>84.1</td><td>30.2</td></tr><tr><td>DB[12]</td><td>79.2</td><td>91.5</td><td>84.9</td><td>32.0</td></tr><tr><td>DRRG [41]</td><td>82.30</td><td>88.05</td><td>85.08</td><td>-</td></tr><tr><td>Ours (SynText)</td><td>80.68</td><td>85.40</td><td>82.97</td><td>12.68</td></tr><tr><td>Ours (MLT-17)</td><td>84.54</td><td>86.62</td><td>85.57</td><td>12.31</td></tr></tbody></table></td>\n"""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment