diff --git a/helper/page_detection/test.py b/helper/page_detection/test.py deleted file mode 100644 index 7d5c267..0000000 --- a/helper/page_detection/test.py +++ /dev/null @@ -1,57 +0,0 @@ -from typing import List -import cv2 -from pdf_detection import Pipeline -import pickle - - -class LayoutBox(object): - def __init__(self, clsid: int, pos: List[float], confidence: float): - self.clsid = clsid - self.pos = pos - self.confidence = confidence - - -class PageDetectionResult(object): - def __init__(self, boxes: List[LayoutBox], image_path: str): - self.boxes = boxes - self.image_path = image_path - -pipeline = Pipeline('/mnt/pdf2markdown/models/PaddleDetection/inference_model/picodet_lcnet_x1_0_fgd_layout_cdla_infer') - - -def page_detection_visual(page_detection_result: PageDetectionResult): - img = cv2.imread(page_detection_result.image_path) - for box in page_detection_result.boxes: - pos = box.pos - clsid = box.clsid - confidence = box.confidence - if clsid == 0: - color = (0, 0, 0) - text = 'text' - elif clsid == 1: - color = (255, 0, 0) - text = 'title' - elif clsid == 2: - color = (0, 255, 0) - text = 'figure' - elif clsid == 4: - color = (0, 0, 255) - text = 'table' - if clsid == 5: - color = (255, 0, 255) - text = 'table caption' - text = f'{text} {confidence}' - img = cv2.rectangle(img, (int(pos[0]), int(pos[1])), (int(pos[2]), int(pos[3])), color, 2) - cv2.putText(img, text, (int(pos[0]), int(pos[1])), cv2.FONT_HERSHEY_TRIPLEX, 1, color, 2) - return img - -img_path = '/mnt/research/PaddleOCR/PaddleDetection/datasets/train_output/JPEGImages/0090.jpg' -page_detecion_outputs = pipeline(img_path) -boxes = [] -for output in page_detecion_outputs: - boxes.append(LayoutBox(output[0], output[1], output[2])) -res = PageDetectionResult(boxes, img_path) -with open('/mnt/pdf2markdown/a.pkl', 'wb') as f: - pickle.dump(res, f) -# img = page_detection_visual(res) -# cv2.imwrite('/mnt/pdf2markdown/0122.jpg', img) diff --git a/requirements.txt b/requirements.txt index 2bc6aba..4e2a43d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -72,11 +72,11 @@ lazy_loader==0.4 lmdb==1.6.2 loguru==0.7.3 lxml==5.4.0 --e git+http://192.168.10.28:3000/Yaxin/pdf2markdown.git@f030719b330c56e9909196a8d4e00d3e9ec003dc#egg=magic_pdf&subdirectory=third_party/MinerU +-e third_party/MinerU mammoth==1.9.0 markdown2==2.5.3 markdownify==0.13.1 --e git+http://192.168.10.28:3000/Yaxin/pdf2markdown.git@f030719b330c56e9909196a8d4e00d3e9ec003dc#egg=marker_pdf&subdirectory=third_party/marker +-e third_party/marker MarkupSafe==3.0.2 matplotlib==3.10.1 modelscope==1.25.0 @@ -173,7 +173,7 @@ six==1.17.0 sniffio==1.3.1 soupsieve==2.7 stringzilla==3.12.5 --e git+http://192.168.10.28:3000/Yaxin/pdf2markdown.git@f030719b330c56e9909196a8d4e00d3e9ec003dc#egg=surya_ocr&subdirectory=third_party/surya +-e third_party/surya sympy==1.13.1 termcolor==3.1.0 thop==0.1.1.post2209072238