Commit b18496b0 authored by liukaiwen's avatar liukaiwen

add table recognition success detect

parent cae215bb
...@@ -9,3 +9,6 @@ block维度自定义字段 ...@@ -9,3 +9,6 @@ block维度自定义字段
""" """
# block中lines是否被删除 # block中lines是否被删除
LINES_DELETED = "lines_deleted" LINES_DELETED = "lines_deleted"
# table recognition max time default value
TABLE_MAX_TIME_VALUE = 400
\ No newline at end of file
...@@ -2,6 +2,7 @@ from loguru import logger ...@@ -2,6 +2,7 @@ from loguru import logger
import os import os
import time import time
from magic_pdf.libs.Constants import TABLE_MAX_TIME_VALUE
os.environ['NO_ALBUMENTATIONS_UPDATE'] = '1' # 禁止albumentations检查更新 os.environ['NO_ALBUMENTATIONS_UPDATE'] = '1' # 禁止albumentations检查更新
try: try:
...@@ -105,6 +106,7 @@ class CustomPEKModel: ...@@ -105,6 +106,7 @@ class CustomPEKModel:
self.apply_formula = kwargs.get("apply_formula", self.configs["config"]["formula"]) self.apply_formula = kwargs.get("apply_formula", self.configs["config"]["formula"])
self.table_config = kwargs.get("table_config", self.configs["config"]["table_config"]) self.table_config = kwargs.get("table_config", self.configs["config"]["table_config"])
self.apply_table = self.table_config.get("is_table_recog_enable", False) self.apply_table = self.table_config.get("is_table_recog_enable", False)
self.table_max_time = self.table_config.get("max_time", TABLE_MAX_TIME_VALUE)
self.apply_ocr = ocr self.apply_ocr = ocr
logger.info( logger.info(
"DocAnalysis init, this may take some times. apply_layout: {}, apply_formula: {}, apply_ocr: {}, apply_table: {}".format( "DocAnalysis init, this may take some times. apply_layout: {}, apply_formula: {}, apply_ocr: {}, apply_table: {}".format(
...@@ -141,9 +143,8 @@ class CustomPEKModel: ...@@ -141,9 +143,8 @@ class CustomPEKModel:
# init structeqtable # init structeqtable
if self.apply_table: if self.apply_table:
max_time = self.table_config.get("max_time", 400)
self.table_model = table_model_init(str(os.path.join(models_dir, self.configs["weights"]["table"])), self.table_model = table_model_init(str(os.path.join(models_dir, self.configs["weights"]["table"])),
max_time=max_time, _device_=self.device) max_time = self.table_max_time, _device_=self.device)
logger.info('DocAnalysis init done!') logger.info('DocAnalysis init done!')
def __call__(self, image): def __call__(self, image):
...@@ -290,6 +291,12 @@ class CustomPEKModel: ...@@ -290,6 +291,12 @@ class CustomPEKModel:
end_time = time.time() end_time = time.time()
run_time = end_time - start_time run_time = end_time - start_time
logger.info(f"------------table recognition processing ends within {run_time}s-----") logger.info(f"------------table recognition processing ends within {run_time}s-----")
if run_time > self.table_max_time:
logger.warning(f"------------table recognition processing exceeds max time {self.table_max_time}s----------")
# 判断是否返回正常
if latex_code and latex_code.strip().endswith('end{tabular}'):
layout["latex"] = latex_code layout["latex"] = latex_code
else:
print(latex_code)
logger.warning(f"------------table recognition processing fails----------")
return layout_res return layout_res
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment