You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
34 lines
946 B
Python
34 lines
946 B
Python
4 weeks ago
|
#!/usr/bin/env python
|
||
|
from huggingface_hub import snapshot_download
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
|
||
|
mineru_patterns = [
|
||
|
# "models/Layout/LayoutLMv3/*",
|
||
|
"models/Layout/YOLO/*",
|
||
|
"models/MFD/YOLO/*",
|
||
|
"models/MFR/unimernet_hf_small_2503/*",
|
||
|
"models/OCR/paddleocr_torch/*",
|
||
|
# "models/TabRec/TableMaster/*",
|
||
|
# "models/TabRec/StructEqTable/*",
|
||
|
]
|
||
|
model_dir = snapshot_download(
|
||
|
"opendatalab/PDF-Extract-Kit-1.0",
|
||
|
allow_patterns=mineru_patterns,
|
||
|
local_dir="/opt/",
|
||
|
)
|
||
|
|
||
|
layoutreader_pattern = [
|
||
|
"*.json",
|
||
|
"*.safetensors",
|
||
|
]
|
||
|
layoutreader_model_dir = snapshot_download(
|
||
|
"hantian/layoutreader",
|
||
|
allow_patterns=layoutreader_pattern,
|
||
|
local_dir="/opt/layoutreader/",
|
||
|
)
|
||
|
|
||
|
model_dir = model_dir + "/models"
|
||
|
print(f"model_dir is: {model_dir}")
|
||
|
print(f"layoutreader_model_dir is: {layoutreader_model_dir}")
|