Commit c90ee891 authored by myhloli's avatar myhloli

feat(draw_bbox): add model bbox drawing functionality

Implement the feature to draw bounding boxes for model elements in the PDF. This includes
adding new drawing functions and modifying existing ones to accommodate the new feature.
Also, updates are made to CLI tools and common utilities to support the model bbox drawing.
parent 445a397f
from magic_pdf.libs.Constants import CROSS_PAGE from magic_pdf.libs.Constants import CROSS_PAGE
from magic_pdf.libs.commons import fitz # PyMuPDF from magic_pdf.libs.commons import fitz # PyMuPDF
from magic_pdf.libs.ocr_content_type import ContentType, BlockType from magic_pdf.libs.ocr_content_type import ContentType, BlockType, CategoryId
from magic_pdf.model.magic_model import MagicModel
def draw_bbox_without_number(i, bbox_list, page, rgb_config, fill_config): def draw_bbox_without_number(i, bbox_list, page, rgb_config, fill_config):
...@@ -225,3 +226,67 @@ def draw_span_bbox(pdf_info, pdf_bytes, out_path): ...@@ -225,3 +226,67 @@ def draw_span_bbox(pdf_info, pdf_bytes, out_path):
# Save the PDF # Save the PDF
pdf_docs.save(f"{out_path}/spans.pdf") pdf_docs.save(f"{out_path}/spans.pdf")
def drow_model_bbox(model_list: list, pdf_bytes, out_path):
dropped_bbox_list = []
tables_body_list, tables_caption_list, tables_footnote_list = [], [], []
imgs_body_list, imgs_caption_list = [], []
titles_list = []
texts_list = []
interequations_list = []
pdf_docs = fitz.open("pdf", pdf_bytes)
magic_model = MagicModel(model_list, pdf_docs)
for i in range(len(model_list)):
page_dropped_list = []
tables_body, tables_caption, tables_footnote = [], [], []
imgs_body, imgs_caption = [], []
titles = []
texts = []
interequations = []
page_info = magic_model.get_model_list(i)
layout_dets = page_info["layout_dets"]
for layout_det in layout_dets:
bbox = layout_det["bbox"]
if layout_det["category_id"] == CategoryId.Text:
texts.append(bbox)
elif layout_det["category_id"] == CategoryId.Title:
titles.append(bbox)
elif layout_det["category_id"] == CategoryId.TableBody:
tables_body.append(bbox)
elif layout_det["category_id"] == CategoryId.TableCaption:
tables_caption.append(bbox)
elif layout_det["category_id"] == CategoryId.TableFootnote:
tables_footnote.append(bbox)
elif layout_det["category_id"] == CategoryId.ImageBody:
imgs_body.append(bbox)
elif layout_det["category_id"] == CategoryId.ImageCaption:
imgs_caption.append(bbox)
elif layout_det["category_id"] == CategoryId.InterlineEquation_YOLO:
interequations.append(bbox)
elif layout_det["category_id"] == CategoryId.Abandon:
page_dropped_list.append(bbox)
tables_body_list.append(tables_body)
tables_caption_list.append(tables_caption)
tables_footnote_list.append(tables_footnote)
imgs_body_list.append(imgs_body)
imgs_caption_list.append(imgs_caption)
titles_list.append(titles)
texts_list.append(texts)
interequations_list.append(interequations)
dropped_bbox_list.append(page_dropped_list)
for i, page in enumerate(pdf_docs):
draw_bbox_with_number(i, dropped_bbox_list, page, [158, 158, 158], True) # color !
draw_bbox_with_number(i, tables_body_list, page, [204, 204, 0], True)
draw_bbox_with_number(i, tables_caption_list, page, [255, 255, 102], True)
draw_bbox_with_number(i, tables_footnote_list, page, [229, 255, 204], True)
draw_bbox_with_number(i, imgs_body_list, page, [153, 255, 51], True)
draw_bbox_with_number(i, imgs_caption_list, page, [102, 178, 255], True)
draw_bbox_with_number(i, titles_list, page, [102, 102, 255], True)
draw_bbox_with_number(i, texts_list, page, [153, 0, 76], True)
draw_bbox_with_number(i, interequations_list, page, [0, 255, 0], True)
# Save the PDF
pdf_docs.save(f"{out_path}/model.pdf")
\ No newline at end of file
...@@ -19,3 +19,17 @@ class BlockType: ...@@ -19,3 +19,17 @@ class BlockType:
Footnote = "footnote" Footnote = "footnote"
Discarded = "discarded" Discarded = "discarded"
class CategoryId:
Title = 0
Text = 1
Abandon = 2
ImageBody = 3
ImageCaption = 4
TableBody = 5
TableCaption = 6
TableFootnote = 7
InterlineEquation_Layout = 8
InlineEquation = 13
InterlineEquation_YOLO = 14
OcrText = 15
...@@ -94,6 +94,7 @@ def jsonl(jsonl, method, output_dir): ...@@ -94,6 +94,7 @@ def jsonl(jsonl, method, output_dir):
jso["doc_layout_result"], jso["doc_layout_result"],
method, method,
f_dump_content_list=True, f_dump_content_list=True,
f_draw_model_bbox=True,
) )
...@@ -146,6 +147,7 @@ def pdf(pdf, json_data, output_dir, method): ...@@ -146,6 +147,7 @@ def pdf(pdf, json_data, output_dir, method):
model_json_list, model_json_list,
method, method,
f_dump_content_list=True, f_dump_content_list=True,
f_draw_model_bbox=True,
) )
......
...@@ -4,7 +4,7 @@ import copy ...@@ -4,7 +4,7 @@ import copy
import click import click
from loguru import logger from loguru import logger
from magic_pdf.libs.MakeContentConfig import DropMode, MakeMode from magic_pdf.libs.MakeContentConfig import DropMode, MakeMode
from magic_pdf.libs.draw_bbox import draw_layout_bbox, draw_span_bbox from magic_pdf.libs.draw_bbox import draw_layout_bbox, draw_span_bbox, drow_model_bbox
from magic_pdf.pipe.UNIPipe import UNIPipe from magic_pdf.pipe.UNIPipe import UNIPipe
from magic_pdf.pipe.OCRPipe import OCRPipe from magic_pdf.pipe.OCRPipe import OCRPipe
from magic_pdf.pipe.TXTPipe import TXTPipe from magic_pdf.pipe.TXTPipe import TXTPipe
...@@ -37,6 +37,7 @@ def do_parse( ...@@ -37,6 +37,7 @@ def do_parse(
f_dump_orig_pdf=True, f_dump_orig_pdf=True,
f_dump_content_list=False, f_dump_content_list=False,
f_make_md_mode=MakeMode.MM_MD, f_make_md_mode=MakeMode.MM_MD,
f_draw_model_bbox=False,
): ):
orig_model_list = copy.deepcopy(model_list) orig_model_list = copy.deepcopy(model_list)
local_image_dir, local_md_dir = prepare_env(output_dir, pdf_file_name, parse_method) local_image_dir, local_md_dir = prepare_env(output_dir, pdf_file_name, parse_method)
...@@ -73,6 +74,8 @@ def do_parse( ...@@ -73,6 +74,8 @@ def do_parse(
draw_layout_bbox(pdf_info, pdf_bytes, local_md_dir) draw_layout_bbox(pdf_info, pdf_bytes, local_md_dir)
if f_draw_span_bbox: if f_draw_span_bbox:
draw_span_bbox(pdf_info, pdf_bytes, local_md_dir) draw_span_bbox(pdf_info, pdf_bytes, local_md_dir)
if f_draw_model_bbox:
drow_model_bbox(orig_model_list, pdf_bytes, local_md_dir)
md_content = pipe.pipe_mk_markdown( md_content = pipe.pipe_mk_markdown(
image_dir, drop_mode=DropMode.NONE, md_make_mode=f_make_md_mode image_dir, drop_mode=DropMode.NONE, md_make_mode=f_make_md_mode
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment