Commit 4c096443 authored by liukaiwen's avatar liukaiwen

add table recognition and convertion to LaTeX

parent d04f3f22
...@@ -35,8 +35,8 @@ from magic_pdf.model.pek_sub_modules.self_modify import ModifiedPaddleOCR ...@@ -35,8 +35,8 @@ from magic_pdf.model.pek_sub_modules.self_modify import ModifiedPaddleOCR
from magic_pdf.model.pek_sub_modules.structeqtable.StructTableModel import StructTableModel from magic_pdf.model.pek_sub_modules.structeqtable.StructTableModel import StructTableModel
def table_model_init(model_path): def table_model_init(model_path, _device_ = 'cpu'):
table_model = StructTableModel(model_path) table_model = StructTableModel(model_path, device = _device_)
return table_model return table_model
...@@ -140,7 +140,7 @@ class CustomPEKModel: ...@@ -140,7 +140,7 @@ class CustomPEKModel:
# init structeqtable # init structeqtable
if self.apply_table: if self.apply_table:
self.table_model = table_model_init(str(os.path.join(models_dir, self.configs["weights"]["table"]))) self.table_model = table_model_init(str(os.path.join(models_dir, self.configs["weights"]["table"])), _device_=self.device)
logger.info('DocAnalysis init done!') logger.info('DocAnalysis init done!')
def __call__(self, image): def __call__(self, image):
......
from struct_eqtable.model import StructTable from struct_eqtable.model import StructTable
from pypandoc import convert_text from pypandoc import convert_text
class StructTableModel: class StructTableModel:
def __init__(self, model_path, max_new_tokens=2048, max_time=400): def __init__(self, model_path, max_new_tokens=2048, max_time=400, device = 'cpu'):
# init # init
self.model_path = model_path self.model_path = model_path
self.max_new_tokens = max_new_tokens # maximum output tokens length self.max_new_tokens = max_new_tokens # maximum output tokens length
self.max_time = max_time # timeout for processing in seconds self.max_time = max_time # timeout for processing in seconds
self.model = StructTable(self.model_path, self.max_new_tokens, self.max_time) if device == 'cpu':
self.model = StructTable(self.model_path, self.max_new_tokens, self.max_time)
else:
self.model = StructTable(self.model_path, self.max_new_tokens, self.max_time).cuda()
def image2latex(self, image) -> str: def image2latex(self, image) -> str:
# #
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment