Merge branch 'opendatalab:dev' into dev

076a2a14 · linfeng · GitHub · fad2bbc1 · 24c143fe · 076a2a14
Unverified Commit 076a2a14 authored Sep 20, 2024 by linfeng Committed by GitHub Sep 20, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 25 additions and 22 deletions

pdf_extract_kit.py magic_pdf/model/pdf_extract_kit.py +24 -21

model_configs.yaml magic_pdf/resources/model_config/model_configs.yaml +1 -1

No files found.
--- a/magic_pdf/model/pdf_extract_kit.py
+++ b/magic_pdf/model/pdf_extract_kit.py
@@ -32,7 +32,7 @@ except ImportError as e:
    exit(1)
 from magic_pdf.model.pek_sub_modules.layoutlmv3.model_init import Layoutlmv3_Predictor
-from magic_pdf.model.pek_sub_modules.post_process import get_croped_image, latex_rm_whitespace
+from magic_pdf.model.pek_sub_modules.post_process import latex_rm_whitespace
 from magic_pdf.model.pek_sub_modules.self_modify import ModifiedPaddleOCR
 from magic_pdf.model.pek_sub_modules.structeqtable.StructTableModel import StructTableModel
 from magic_pdf.model.ppTableModel import ppTableModel
@@ -63,7 +63,8 @@ def mfr_model_init(weight_dir, cfg_path, _device_='cpu'):
    cfg.config.model.tokenizer_config.path = weight_dir
    task = tasks.setup_task(cfg)
    model = task.build_model(cfg)
-    model = model.to(_device_)
+    model.to(_device_)
+    model.eval()
    vis_processor = load_processor('formula_image_eval', cfg.config.datasets.formula_rec_eval.vis_processor.eval)
    mfr_transform = transforms.Compose([vis_processor, ])
    return [model, mfr_transform]
@@ -154,6 +155,23 @@ def atom_model_init(model_name: str, **kwargs):
    return atom_model
+#  Unified crop img logic
+def crop_img(input_res, input_pil_img, crop_paste_x=0, crop_paste_y=0):
+    crop_xmin, crop_ymin = int(input_res['poly'][0]), int(input_res['poly'][1])
+    crop_xmax, crop_ymax = int(input_res['poly'][4]), int(input_res['poly'][5])
+    # Create a white background with an additional width and height of 50
+    crop_new_width = crop_xmax - crop_xmin + crop_paste_x * 2
+    crop_new_height = crop_ymax - crop_ymin + crop_paste_y * 2
+    return_image = Image.new('RGB', (crop_new_width, crop_new_height), 'white')
+    # Crop image
+    crop_box = (crop_xmin, crop_ymin, crop_xmax, crop_ymax)
+    cropped_img = input_pil_img.crop(crop_box)
+    return_image.paste(cropped_img, (crop_paste_x, crop_paste_y))
+    return_list = [crop_paste_x, crop_paste_y, crop_xmin, crop_ymin, crop_xmax, crop_ymax, crop_new_width, crop_new_height]
+    return return_image, return_list
 class CustomPEKModel:
    def __init__(self, ocr: bool = False, show_log: bool = False, **kwargs):
@@ -264,6 +282,8 @@ class CustomPEKModel:
        layout_cost = round(time.time() - layout_start, 2)
        logger.info(f"layout detection cost: {layout_cost}")
+        pil_img = Image.fromarray(image)
        if self.apply_formula:
            # 公式检测
            mfd_res = self.mfd_model.predict(image, imgsz=1888, conf=0.25, iou=0.45, verbose=True)[0]
@@ -277,7 +297,8 @@ class CustomPEKModel:
                }
                layout_res.append(new_item)
                latex_filling_list.append(new_item)
-                bbox_img = get_croped_image(Image.fromarray(image), [xmin, ymin, xmax, ymax])
+                # bbox_img = get_croped_image(pil_img, [xmin, ymin, xmax, ymax])
+                bbox_img = pil_img.crop((xmin, ymin, xmax, ymax))
                mf_image_list.append(bbox_img)
            # 公式识别
@@ -309,24 +330,6 @@ class CustomPEKModel:
            elif int(res['category_id']) in [5]:
                table_res_list.append(res)
-        #  Unified crop img logic
-        def crop_img(input_res, input_pil_img, crop_paste_x=0, crop_paste_y=0):
-            crop_xmin, crop_ymin = int(input_res['poly'][0]), int(input_res['poly'][1])
-            crop_xmax, crop_ymax = int(input_res['poly'][4]), int(input_res['poly'][5])
-            # Create a white background with an additional width and height of 50
-            crop_new_width = crop_xmax - crop_xmin + crop_paste_x * 2
-            crop_new_height = crop_ymax - crop_ymin + crop_paste_y * 2
-            return_image = Image.new('RGB', (crop_new_width, crop_new_height), 'white')
-            # Crop image
-            crop_box = (crop_xmin, crop_ymin, crop_xmax, crop_ymax)
-            cropped_img = input_pil_img.crop(crop_box)
-            return_image.paste(cropped_img, (crop_paste_x, crop_paste_y))
-            return_list = [crop_paste_x, crop_paste_y, crop_xmin, crop_ymin, crop_xmax, crop_ymax, crop_new_width, crop_new_height]
-            return return_image, return_list
-        pil_img = Image.fromarray(image)
        # ocr识别
        if self.apply_ocr:
            ocr_start = time.time()

--- a/magic_pdf/resources/model_config/model_configs.yaml
+++ b/magic_pdf/resources/model_config/model_configs.yaml
@@ -10,6 +10,6 @@ config:
 weights:
  layout: Layout/model_final.pth
  mfd: MFD/weights.pt
-  mfr: MFR/unimernet_base
+  mfr: MFR/unimernet_small
  struct_eqtable: TabRec/StructEqTable
  TableMaster: TabRec/TableMaster
\ No newline at end of file