Commit a5ff8ace authored by 赵小蒙's avatar 赵小蒙

update paddleocr to 2.8+ and add layout score output

parent f80560ff
...@@ -90,7 +90,10 @@ def doc_analyze(pdf_bytes: bytes, ocr: bool = False, show_log: bool = False): ...@@ -90,7 +90,10 @@ def doc_analyze(pdf_bytes: bytes, ocr: bool = False, show_log: bool = False):
line['category_id'] = 2 line['category_id'] = 2
else: else:
logger.warning(f"unknown type: {line['type']}") logger.warning(f"unknown type: {line['type']}")
line['score'] = 0.5 + random.random() * 0.5
# 兼容不输出score的paddleocr版本
if line.get("score") is None:
line['score'] = 0.5 + random.random() * 0.5
res = line.pop('res', None) res = line.pop('res', None)
if res is not None and len(res) > 0: if res is not None and len(res) > 0:
......
...@@ -16,4 +16,4 @@ nltk==3.8.1 ...@@ -16,4 +16,4 @@ nltk==3.8.1
s3pathlib>=2.1.1 s3pathlib>=2.1.1
pytest pytest
paddlepaddle paddlepaddle
paddleocr>=2.6.0.3 paddleocr @ https://github.com/myhloli/PaddleOCR/releases/download/paddleocr-2.8.2-released/paddleocr-2.8.2-py3-none-any.whl
\ No newline at end of file \ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment