pdf2markdown/helper/page_detection/test.py

from typing import List
import cv2
from pdf_detection import Pipeline
import pickle


class LayoutBox(object):
    def __init__(self, clsid: int, pos: List[float], confidence: float):
        self.clsid = clsid
        self.pos = pos
        self.confidence = confidence


class PageDetectionResult(object):
    def __init__(self, boxes: List[LayoutBox], image_path: str):
        self.boxes = boxes
        self.image_path = image_path

pipeline = Pipeline('/mnt/pdf2markdown/models/PaddleDetection/inference_model/picodet_lcnet_x1_0_fgd_layout_cdla_infer')


def page_detection_visual(page_detection_result: PageDetectionResult):
    img = cv2.imread(page_detection_result.image_path)
    for box in page_detection_result.boxes:
        pos = box.pos
        clsid = box.clsid
        confidence = box.confidence
        if clsid == 0:
            color = (0, 0, 0)
            text = 'text'
        elif clsid == 1:
            color = (255, 0, 0)
            text = 'title'
        elif clsid == 2:
            color = (0, 255, 0)
            text = 'figure'
        elif clsid == 4:
            color = (0, 0, 255)
            text = 'table'
        if clsid == 5:
            color = (255, 0, 255)
            text = 'table caption'
        text = f'{text} {confidence}'
        img = cv2.rectangle(img, (int(pos[0]), int(pos[1])), (int(pos[2]), int(pos[3])), color, 2)
        cv2.putText(img, text, (int(pos[0]), int(pos[1])), cv2.FONT_HERSHEY_TRIPLEX, 1, color, 2)
    return img

img_path = '/mnt/research/PaddleOCR/PaddleDetection/datasets/train_output/JPEGImages/0090.jpg'
page_detecion_outputs = pipeline(img_path)
boxes = []
for output in page_detecion_outputs:
    boxes.append(LayoutBox(output[0], output[1], output[2]))
res = PageDetectionResult(boxes, img_path)
with open('/mnt/pdf2markdown/a.pkl', 'wb') as f:
    pickle.dump(res, f)
# img = page_detection_visual(res)
# cv2.imwrite('/mnt/pdf2markdown/0122.jpg', img)