pdf2markdown/helper/image_helper.py

from typing import List
from pdf2image import convert_from_path
import os
import paddleclas
import cv2
from .page_detection.utils import PageDetectionResult
from paddleocr import PaddleOCR


paddle_clas_model = paddleclas.PaddleClas(model_name="text_image_orientation")

def pdf2image(pdf_path, output_dir):
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    images = convert_from_path(pdf_path)
    for i, image in enumerate(images):
        image.save(f'{output_dir}/{i + 1}.jpg')


def image_orient_cls(input_data):
    return paddle_clas_model.predict(input_data)


def page_detection_visual(page_detection_result: PageDetectionResult):
    img = cv2.imread(page_detection_result.image_path)
    for box in page_detection_result.boxes:
        pos = box.pos
        clsid = box.clsid
        confidence = box.confidence
        if clsid == 0:
            color = (0, 0, 0)
            text = 'text'
        elif clsid == 1:
            color = (255, 0, 0)
            text = 'title'
        elif clsid == 2:
            color = (0, 255, 0)
            text = 'figure'
        elif clsid == 4:
            color = (0, 0, 255)
            text = 'table'
        if clsid == 5:
            color = (255, 0, 255)
            text = 'table caption'
        text = f'{text} {confidence}'
        img = cv2.rectangle(img, (int(pos[0]), int(pos[1])), (int(pos[2]), int(pos[3])), color, 2)
        cv2.putText(img, text, (int(pos[0]), int(pos[1])), cv2.FONT_HERSHEY_TRIPLEX, 1, color, 2)
    return img


ocr = PaddleOCR(use_angle_cls=False, lang='ch', use_gpu=True, show_log=False)

def text_rec(image):
    result = ocr.ocr(image, cls=False)
    boxes = []
    texts = []
    conficences = []
    for idx in range(len(result)):
        res = result[idx]
        if not res:
            continue
        for line in res:
            if not line:
                continue
            box = line[0]
            text = line[1][0]
            confidence = line[1][1]
            boxes.append(box)
            texts.append(text)
            conficences.append(confidence)
    return boxes, texts, conficences
first commit 1 month ago			`from typing import List`
			`from pdf2image import convert_from_path`
			`import os`
			`import paddleclas`
			`import cv2`
			`from .page_detection.utils import PageDetectionResult`
表格识别和扫描件识别内部的ocr改为paddleocr 1 month ago			`from paddleocr import PaddleOCR`
first commit 1 month ago

			`paddle_clas_model = paddleclas.PaddleClas(model_name="text_image_orientation")`

			`def pdf2image(pdf_path, output_dir):`
			`if not os.path.isdir(output_dir):`
			`os.makedirs(output_dir)`
			`images = convert_from_path(pdf_path)`
			`for i, image in enumerate(images):`
			`image.save(f'{output_dir}/{i + 1}.jpg')`


			`def image_orient_cls(input_data):`
			`return paddle_clas_model.predict(input_data)`


			`def page_detection_visual(page_detection_result: PageDetectionResult):`
			`img = cv2.imread(page_detection_result.image_path)`
			`for box in page_detection_result.boxes:`
			`pos = box.pos`
			`clsid = box.clsid`
			`confidence = box.confidence`
			`if clsid == 0:`
			`color = (0, 0, 0)`
			`text = 'text'`
			`elif clsid == 1:`
			`color = (255, 0, 0)`
			`text = 'title'`
			`elif clsid == 2:`
			`color = (0, 255, 0)`
			`text = 'figure'`
			`elif clsid == 4:`
			`color = (0, 0, 255)`
			`text = 'table'`
			`if clsid == 5:`
			`color = (255, 0, 255)`
			`text = 'table caption'`
			`text = f'{text} {confidence}'`
			`img = cv2.rectangle(img, (int(pos[0]), int(pos[1])), (int(pos[2]), int(pos[3])), color, 2)`
			`cv2.putText(img, text, (int(pos[0]), int(pos[1])), cv2.FONT_HERSHEY_TRIPLEX, 1, color, 2)`
			`return img`
表格识别和扫描件识别内部的ocr改为paddleocr 1 month ago

			`ocr = PaddleOCR(use_angle_cls=False, lang='ch', use_gpu=True, show_log=False)`

			`def text_rec(image):`
			`result = ocr.ocr(image, cls=False)`
			`boxes = []`
			`texts = []`
			`conficences = []`
			`for idx in range(len(result)):`
			`res = result[idx]`
			`if not res:`
			`continue`
			`for line in res:`
			`if not line:`
			`continue`
			`box = line[0]`
			`text = line[1][0]`
			`confidence = line[1][1]`
			`boxes.append(box)`
			`texts.append(text)`
			`conficences.append(confidence)`
			`return boxes, texts, conficences`