Commit be1b1ae7 authored by myhloli's avatar myhloli

refactor(model): improve timing information and performance

- Enhance timing output precision to two decimal places for better readability- Calculate and log document analysis speed in pages per second
- Optimize logging for YOLO and table recognition processes
- Remove unnecessary comments and improve code efficiency
parent 14bb5865
......@@ -134,7 +134,10 @@ def doc_analyze(pdf_bytes: bytes, ocr: bool = False, show_log: bool = False,
page_info = {"page_no": index, "height": page_height, "width": page_width}
page_dict = {"layout_dets": result, "page_info": page_info}
model_json.append(page_dict)
doc_analyze_cost = time.time() - doc_analyze_start
logger.info(f"doc analyze cost: {doc_analyze_cost}")
doc_analyze_time = round(time.time() - doc_analyze_start, 2)
doc_analyze_speed = round( (end_page_id + 1 - start_page_id) / doc_analyze_time, 2)
logger.info(f"doc analyze time: {round(time.time() - doc_analyze_start, 2)},"
f" speed: {doc_analyze_speed} pages/second")
return model_json
......@@ -7,6 +7,7 @@ from magic_pdf.libs.clean_memory import clean_memory
from magic_pdf.model.model_list import AtomicModel
os.environ['NO_ALBUMENTATIONS_UPDATE'] = '1' # 禁止albumentations检查更新
os.environ['YOLO_VERBOSE'] = 'False' # disable yolo logger
try:
import cv2
import yaml
......@@ -274,6 +275,8 @@ class CustomPEKModel:
def __call__(self, image):
page_start = time.time()
latex_filling_list = []
mf_image_list = []
......@@ -281,13 +284,15 @@ class CustomPEKModel:
layout_start = time.time()
layout_res = self.layout_model(image, ignore_catids=[])
layout_cost = round(time.time() - layout_start, 2)
logger.info(f"layout detection cost: {layout_cost}")
logger.info(f"layout detection time: {layout_cost}")
pil_img = Image.fromarray(image)
if self.apply_formula:
# 公式检测
mfd_start = time.time()
mfd_res = self.mfd_model.predict(image, imgsz=1888, conf=0.25, iou=0.45, verbose=True)[0]
logger.info(f"mfd time: {round(time.time() - mfd_start, 2)}")
for xyxy, conf, cla in zip(mfd_res.boxes.xyxy.cpu(), mfd_res.boxes.conf.cpu(), mfd_res.boxes.cls.cpu()):
xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy]
new_item = {
......@@ -381,7 +386,7 @@ class CustomPEKModel:
})
ocr_cost = round(time.time() - ocr_start, 2)
logger.info(f"ocr cost: {ocr_cost}")
logger.info(f"ocr time: {ocr_cost}")
# 表格识别 table recognition
if self.apply_table:
......@@ -389,7 +394,7 @@ class CustomPEKModel:
for res in table_res_list:
new_image, _ = crop_img(res, pil_img)
single_table_start_time = time.time()
logger.info("------------------table recognition processing begins-----------------")
# logger.info("------------------table recognition processing begins-----------------")
latex_code = None
html_code = None
if self.table_model_type == STRUCT_EQTABLE:
......@@ -399,7 +404,7 @@ class CustomPEKModel:
html_code = self.table_model.img2html(new_image)
run_time = time.time() - single_table_start_time
logger.info(f"------------table recognition processing ends within {run_time}s-----")
# logger.info(f"------------table recognition processing ends within {run_time}s-----")
if run_time > self.table_max_time:
logger.warning(f"------------table recognition processing exceeds max time {self.table_max_time}s----------")
# 判断是否返回正常
......@@ -410,12 +415,13 @@ class CustomPEKModel:
if expected_ending:
res["latex"] = latex_code
else:
logger.warning(f"------------table recognition processing fails----------")
logger.warning(f"table recognition processing fails, not found expected LaTeX table end")
elif html_code:
res["html"] = html_code
else:
logger.warning(f"------------table recognition processing fails----------")
table_cost = round(time.time() - table_start, 2)
logger.info(f"table cost: {table_cost}")
logger.warning(f"table recognition processing fails, not get latex or html return")
logger.info(f"table time: {round(time.time() - table_start, 2)}")
logger.info(f"-----page total time: {round(time.time() - page_start, 2)}-----")
return layout_res
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment