from typing import List import cv2 from pdf_detection import Pipeline import pickle class LayoutBox(object): def __init__(self, clsid: int, pos: List[float], confidence: float): self.clsid = clsid self.pos = pos self.confidence = confidence class PageDetectionResult(object): def __init__(self, boxes: List[LayoutBox], image_path: str): self.boxes = boxes self.image_path = image_path pipeline = Pipeline('/mnt/pdf2markdown/models/PaddleDetection/inference_model/picodet_lcnet_x1_0_fgd_layout_cdla_infer') def page_detection_visual(page_detection_result: PageDetectionResult): img = cv2.imread(page_detection_result.image_path) for box in page_detection_result.boxes: pos = box.pos clsid = box.clsid confidence = box.confidence if clsid == 0: color = (0, 0, 0) text = 'text' elif clsid == 1: color = (255, 0, 0) text = 'title' elif clsid == 2: color = (0, 255, 0) text = 'figure' elif clsid == 4: color = (0, 0, 255) text = 'table' if clsid == 5: color = (255, 0, 255) text = 'table caption' text = f'{text} {confidence}' img = cv2.rectangle(img, (int(pos[0]), int(pos[1])), (int(pos[2]), int(pos[3])), color, 2) cv2.putText(img, text, (int(pos[0]), int(pos[1])), cv2.FONT_HERSHEY_TRIPLEX, 1, color, 2) return img img_path = '/mnt/research/PaddleOCR/PaddleDetection/datasets/train_output/JPEGImages/0090.jpg' page_detecion_outputs = pipeline(img_path) boxes = [] for output in page_detecion_outputs: boxes.append(LayoutBox(output[0], output[1], output[2])) res = PageDetectionResult(boxes, img_path) with open('/mnt/pdf2markdown/a.pkl', 'wb') as f: pickle.dump(res, f) # img = page_detection_visual(res) # cv2.imwrite('/mnt/pdf2markdown/0122.jpg', img)