|
|
|
from typing import List
|
|
|
|
from pdf2image import convert_from_path
|
|
|
|
import os
|
|
|
|
import paddleclas
|
|
|
|
import cv2
|
|
|
|
from .page_detection.utils import PageDetectionResult
|
|
|
|
from paddleocr import PaddleOCR
|
|
|
|
|
|
|
|
|
|
|
|
paddle_clas_model = paddleclas.PaddleClas(model_name="text_image_orientation")
|
|
|
|
|
|
|
|
def pdf2image(pdf_path, output_dir):
|
|
|
|
if not os.path.isdir(output_dir):
|
|
|
|
os.makedirs(output_dir)
|
|
|
|
images = convert_from_path(pdf_path)
|
|
|
|
for i, image in enumerate(images):
|
|
|
|
image.save(f'{output_dir}/{i + 1}.jpg')
|
|
|
|
|
|
|
|
|
|
|
|
def image_orient_cls(input_data):
|
|
|
|
return paddle_clas_model.predict(input_data)
|
|
|
|
|
|
|
|
|
|
|
|
def page_detection_visual(page_detection_result: PageDetectionResult):
|
|
|
|
img = cv2.imread(page_detection_result.image_path)
|
|
|
|
for box in page_detection_result.boxes:
|
|
|
|
pos = box.pos
|
|
|
|
clsid = box.clsid
|
|
|
|
confidence = box.confidence
|
|
|
|
if clsid == 0:
|
|
|
|
color = (0, 0, 0)
|
|
|
|
text = 'text'
|
|
|
|
elif clsid == 1:
|
|
|
|
color = (255, 0, 0)
|
|
|
|
text = 'title'
|
|
|
|
elif clsid == 2:
|
|
|
|
color = (0, 255, 0)
|
|
|
|
text = 'figure'
|
|
|
|
elif clsid == 4:
|
|
|
|
color = (0, 0, 255)
|
|
|
|
text = 'table'
|
|
|
|
if clsid == 5:
|
|
|
|
color = (255, 0, 255)
|
|
|
|
text = 'table caption'
|
|
|
|
text = f'{text} {confidence}'
|
|
|
|
img = cv2.rectangle(img, (int(pos[0]), int(pos[1])), (int(pos[2]), int(pos[3])), color, 2)
|
|
|
|
cv2.putText(img, text, (int(pos[0]), int(pos[1])), cv2.FONT_HERSHEY_TRIPLEX, 1, color, 2)
|
|
|
|
return img
|
|
|
|
|
|
|
|
|
|
|
|
ocr = PaddleOCR(use_angle_cls=False, lang='ch', use_gpu=True, show_log=False)
|
|
|
|
|
|
|
|
def text_rec(image):
|
|
|
|
result = ocr.ocr(image, cls=False)
|
|
|
|
boxes = []
|
|
|
|
texts = []
|
|
|
|
conficences = []
|
|
|
|
for idx in range(len(result)):
|
|
|
|
res = result[idx]
|
|
|
|
if not res:
|
|
|
|
continue
|
|
|
|
for line in res:
|
|
|
|
if not line:
|
|
|
|
continue
|
|
|
|
box = line[0]
|
|
|
|
text = line[1][0]
|
|
|
|
confidence = line[1][1]
|
|
|
|
boxes.append(box)
|
|
|
|
texts.append(text)
|
|
|
|
conficences.append(confidence)
|
|
|
|
return boxes, texts, conficences
|