You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

48 lines
1.5 KiB
Python

1 month ago
from typing import List
from pdf2image import convert_from_path
import os
import paddleclas
import cv2
from .page_detection.utils import PageDetectionResult
paddle_clas_model = paddleclas.PaddleClas(model_name="text_image_orientation")
def pdf2image(pdf_path, output_dir):
if not os.path.isdir(output_dir):
os.makedirs(output_dir)
images = convert_from_path(pdf_path)
for i, image in enumerate(images):
image.save(f'{output_dir}/{i + 1}.jpg')
def image_orient_cls(input_data):
return paddle_clas_model.predict(input_data)
def page_detection_visual(page_detection_result: PageDetectionResult):
img = cv2.imread(page_detection_result.image_path)
for box in page_detection_result.boxes:
pos = box.pos
clsid = box.clsid
confidence = box.confidence
if clsid == 0:
color = (0, 0, 0)
text = 'text'
elif clsid == 1:
color = (255, 0, 0)
text = 'title'
elif clsid == 2:
color = (0, 255, 0)
text = 'figure'
elif clsid == 4:
color = (0, 0, 255)
text = 'table'
if clsid == 5:
color = (255, 0, 255)
text = 'table caption'
text = f'{text} {confidence}'
img = cv2.rectangle(img, (int(pos[0]), int(pos[1])), (int(pos[2]), int(pos[3])), color, 2)
cv2.putText(img, text, (int(pos[0]), int(pos[1])), cv2.FONT_HERSHEY_TRIPLEX, 1, color, 2)
return img