from typing import List from pdf2image import convert_from_path import os import paddleclas import cv2 from .page_detection.utils import PageDetectionResult from paddleocr import PaddleOCR paddle_clas_model = paddleclas.PaddleClas(model_name="text_image_orientation") def pdf2image(pdf_path, output_dir): if not os.path.isdir(output_dir): os.makedirs(output_dir) images = convert_from_path(pdf_path) for i, image in enumerate(images): image.save(f'{output_dir}/{i + 1}.jpg') def image_orient_cls(input_data): return paddle_clas_model.predict(input_data) def page_detection_visual(page_detection_result: PageDetectionResult): img = cv2.imread(page_detection_result.image_path) for box in page_detection_result.boxes: pos = box.pos clsid = box.clsid confidence = box.confidence if clsid == 0: color = (0, 0, 0) text = 'text' elif clsid == 1: color = (255, 0, 0) text = 'title' elif clsid == 2: color = (0, 255, 0) text = 'figure' elif clsid == 4: color = (0, 0, 255) text = 'table' if clsid == 5: color = (255, 0, 255) text = 'table caption' text = f'{text} {confidence}' img = cv2.rectangle(img, (int(pos[0]), int(pos[1])), (int(pos[2]), int(pos[3])), color, 2) cv2.putText(img, text, (int(pos[0]), int(pos[1])), cv2.FONT_HERSHEY_TRIPLEX, 1, color, 2) return img ocr = PaddleOCR(use_angle_cls=False, lang='ch', use_gpu=True, show_log=False) def text_rec(image): result = ocr.ocr(image, cls=False) boxes = [] texts = [] conficences = [] for idx in range(len(result)): res = result[idx] if not res: continue for line in res: if not line: continue box = line[0] text = line[1][0] confidence = line[1][1] boxes.append(box) texts.append(text) conficences.append(confidence) return boxes, texts, conficences