Commit b9667fd3 authored by liukaiwen's avatar liukaiwen

add table recognition and conversion to LaTeX

parent dbe628ee
...@@ -124,7 +124,6 @@ def ocr_mk_markdown_with_para_core_v2(paras_of_layout, mode, img_buket_path=""): ...@@ -124,7 +124,6 @@ def ocr_mk_markdown_with_para_core_v2(paras_of_layout, mode, img_buket_path=""):
for block in para_block['blocks']: # 1st.拼table_caption for block in para_block['blocks']: # 1st.拼table_caption
if block['type'] == BlockType.TableCaption: if block['type'] == BlockType.TableCaption:
table_caption = merge_para_with_text(block) table_caption = merge_para_with_text(block)
para_text += table_caption
for block in para_block['blocks']: # 2nd.拼table_body for block in para_block['blocks']: # 2nd.拼table_body
if block['type'] == BlockType.TableBody: if block['type'] == BlockType.TableBody:
for line in block['lines']: for line in block['lines']:
......
...@@ -104,6 +104,7 @@ class CustomPEKModel: ...@@ -104,6 +104,7 @@ class CustomPEKModel:
self.apply_layout = kwargs.get("apply_layout", self.configs["config"]["layout"]) self.apply_layout = kwargs.get("apply_layout", self.configs["config"]["layout"])
self.apply_formula = kwargs.get("apply_formula", self.configs["config"]["formula"]) self.apply_formula = kwargs.get("apply_formula", self.configs["config"]["formula"])
self.table_config = kwargs.get("table_config", self.configs["config"]["table_config"]) self.table_config = kwargs.get("table_config", self.configs["config"]["table_config"])
self.apply_table = self.table_config.get("is_table_recog_enable", False)
self.apply_ocr = ocr self.apply_ocr = ocr
logger.info( logger.info(
"DocAnalysis init, this may take some times. apply_layout: {}, apply_formula: {}, apply_ocr: {}".format( "DocAnalysis init, this may take some times. apply_layout: {}, apply_formula: {}, apply_ocr: {}".format(
...@@ -139,7 +140,7 @@ class CustomPEKModel: ...@@ -139,7 +140,7 @@ class CustomPEKModel:
self.ocr_model = ModifiedPaddleOCR(show_log=show_log) self.ocr_model = ModifiedPaddleOCR(show_log=show_log)
# init structeqtable # init structeqtable
if self.table_config.get("is_table_recog_enable", False): if self.apply_table:
max_time = self.table_config.get("max_time", 400) max_time = self.table_config.get("max_time", 400)
self.table_model = table_model_init(str(os.path.join(models_dir, self.configs["weights"]["table"])), self.table_model = table_model_init(str(os.path.join(models_dir, self.configs["weights"]["table"])),
max_time=max_time, _device_=self.device) max_time=max_time, _device_=self.device)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment