Merge pull request 'zzc' (#7 ) from zzc into main

Reviewed-on: #7
requirements.txt
3 changed files with 31 additions and 60 deletions
--- a/fix_requirements.py
+++ b/fix_requirements.py
@ -0,0 +1,28 @@
+import re
+
+
+def simplify_editable_git_path(line: str) -> str:
+    if not line.strip().startswith("-e"):
+        return line
+
+    # 匹配 subdirectory 参数
+    match = re.search(r"subdirectory=([^\s&#]+)", line)
+    if match:
+        sub_path = match.group(1)
+        if sub_path.startswith("third_party/"):
+            return f"-e {sub_path}\n"
+    return line
+
+def fix_requirements():
+    with open('requirements.txt', "r") as infile:
+        lines = infile.readlines()
+
+    fixed_lines = [simplify_editable_git_path(line) for line in lines]
+
+    with open('requirements.txt', "w") as outfile:
+        outfile.writelines(fixed_lines)
+
+    print(f"✅ 已更新 requirements.txt：简化了包含 third_party 的 git 路径。")
+
+if __name__ == "__main__":
+    fix_requirements()
--- a/helper/page_detection/test.py
+++ b/helper/page_detection/test.py
@ -1,57 +0,0 @@
-from typing import List
-import cv2
-from pdf_detection import Pipeline
-import pickle
-
-
-class LayoutBox(object):
-    def __init__(self, clsid: int, pos: List[float], confidence: float):
-        self.clsid = clsid
-        self.pos = pos
-        self.confidence = confidence
-
-
-class PageDetectionResult(object):
-    def __init__(self, boxes: List[LayoutBox], image_path: str):
-        self.boxes = boxes
-        self.image_path = image_path
-
-pipeline = Pipeline('/mnt/pdf2markdown/models/PaddleDetection/inference_model/picodet_lcnet_x1_0_fgd_layout_cdla_infer')
-
-
-def page_detection_visual(page_detection_result: PageDetectionResult):
-    img = cv2.imread(page_detection_result.image_path)
-    for box in page_detection_result.boxes:
-        pos = box.pos
-        clsid = box.clsid
-        confidence = box.confidence
-        if clsid == 0:
-            color = (0, 0, 0)
-            text = 'text'
-        elif clsid == 1:
-            color = (255, 0, 0)
-            text = 'title'
-        elif clsid == 2:
-            color = (0, 255, 0)
-            text = 'figure'
-        elif clsid == 4:
-            color = (0, 0, 255)
-            text = 'table'
-        if clsid == 5:
-            color = (255, 0, 255)
-            text = 'table caption'
-        text = f'{text} {confidence}'
-        img = cv2.rectangle(img, (int(pos[0]), int(pos[1])), (int(pos[2]), int(pos[3])), color, 2)
-        cv2.putText(img, text, (int(pos[0]), int(pos[1])), cv2.FONT_HERSHEY_TRIPLEX, 1, color, 2)
-    return img
-
-img_path = '/mnt/research/PaddleOCR/PaddleDetection/datasets/train_output/JPEGImages/0090.jpg'
-page_detecion_outputs = pipeline(img_path)
-boxes = []
-for output in page_detecion_outputs:
-    boxes.append(LayoutBox(output[0], output[1], output[2]))
-res = PageDetectionResult(boxes, img_path)
-with open('/mnt/pdf2markdown/a.pkl', 'wb') as f:
-    pickle.dump(res, f)
-# img = page_detection_visual(res)
-# cv2.imwrite('/mnt/pdf2markdown/0122.jpg', img)
--- a/requirements.txt
+++ b/requirements.txt
@ -72,11 +72,11 @@ lazy_loader==0.4
 lmdb==1.6.2
 loguru==0.7.3
 lxml==5.4.0
-e git+http://192.168.10.28:3000/Yaxin/pdf2markdown.git@f030719b330c56e9909196a8d4e00d3e9ec003dc#egg=magic_pdf&subdirectory=third_party/MinerU
+-e third_party/MinerU
 mammoth==1.9.0
 markdown2==2.5.3
 markdownify==0.13.1
-e git+http://192.168.10.28:3000/Yaxin/pdf2markdown.git@f030719b330c56e9909196a8d4e00d3e9ec003dc#egg=marker_pdf&subdirectory=third_party/marker
+-e third_party/marker
 MarkupSafe==3.0.2
 matplotlib==3.10.1
 modelscope==1.25.0
@ -173,7 +173,7 @@ six==1.17.0
 sniffio==1.3.1
 soupsieve==2.7
 stringzilla==3.12.5
-e git+http://192.168.10.28:3000/Yaxin/pdf2markdown.git@f030719b330c56e9909196a8d4e00d3e9ec003dc#egg=surya_ocr&subdirectory=third_party/surya
+-e third_party/surya
 sympy==1.13.1
 termcolor==3.1.0
 thop==0.1.1.post2209072238
Author	SHA1	Message	Date
zhangzhichao	13ec4a6e93	Merge pull request 'zzc' (#7 ) from zzc into main Reviewed-on: #7	4 weeks ago
zhangzhichao	1b64f3b532	requirements.txt	4 weeks ago
zhangzhichao	d8b641b63b	requirements.txt	4 weeks ago