Commit b9667fd3 authored by liukaiwen's avatar liukaiwen

add table recognition and conversion to LaTeX

parent dbe628ee
......@@ -124,7 +124,6 @@ def ocr_mk_markdown_with_para_core_v2(paras_of_layout, mode, img_buket_path=""):
for block in para_block['blocks']: # 1st.拼table_caption
if block['type'] == BlockType.TableCaption:
table_caption = merge_para_with_text(block)
para_text += table_caption
for block in para_block['blocks']: # 2nd.拼table_body
if block['type'] == BlockType.TableBody:
for line in block['lines']:
......
......@@ -104,6 +104,7 @@ class CustomPEKModel:
self.apply_layout = kwargs.get("apply_layout", self.configs["config"]["layout"])
self.apply_formula = kwargs.get("apply_formula", self.configs["config"]["formula"])
self.table_config = kwargs.get("table_config", self.configs["config"]["table_config"])
self.apply_table = self.table_config.get("is_table_recog_enable", False)
self.apply_ocr = ocr
logger.info(
"DocAnalysis init, this may take some times. apply_layout: {}, apply_formula: {}, apply_ocr: {}".format(
......@@ -139,7 +140,7 @@ class CustomPEKModel:
self.ocr_model = ModifiedPaddleOCR(show_log=show_log)
# init structeqtable
if self.table_config.get("is_table_recog_enable", False):
if self.apply_table:
max_time = self.table_config.get("max_time", 400)
self.table_model = table_model_init(str(os.path.join(models_dir, self.configs["weights"]["table"])),
max_time=max_time, _device_=self.device)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment