You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
34 lines
946 B
Python
34 lines
946 B
Python
#!/usr/bin/env python
|
|
from huggingface_hub import snapshot_download
|
|
|
|
if __name__ == "__main__":
|
|
|
|
mineru_patterns = [
|
|
# "models/Layout/LayoutLMv3/*",
|
|
"models/Layout/YOLO/*",
|
|
"models/MFD/YOLO/*",
|
|
"models/MFR/unimernet_hf_small_2503/*",
|
|
"models/OCR/paddleocr_torch/*",
|
|
# "models/TabRec/TableMaster/*",
|
|
# "models/TabRec/StructEqTable/*",
|
|
]
|
|
model_dir = snapshot_download(
|
|
"opendatalab/PDF-Extract-Kit-1.0",
|
|
allow_patterns=mineru_patterns,
|
|
local_dir="/opt/",
|
|
)
|
|
|
|
layoutreader_pattern = [
|
|
"*.json",
|
|
"*.safetensors",
|
|
]
|
|
layoutreader_model_dir = snapshot_download(
|
|
"hantian/layoutreader",
|
|
allow_patterns=layoutreader_pattern,
|
|
local_dir="/opt/layoutreader/",
|
|
)
|
|
|
|
model_dir = model_dir + "/models"
|
|
print(f"model_dir is: {model_dir}")
|
|
print(f"layoutreader_model_dir is: {layoutreader_model_dir}")
|