|
|
"""根据bucket的名字返回对应的s3 AK, SK,endpoint三元组."""
|
|
|
|
|
|
import json
|
|
|
import os
|
|
|
|
|
|
from loguru import logger
|
|
|
|
|
|
from magic_pdf.config.constants import MODEL_NAME
|
|
|
from magic_pdf.libs.commons import parse_bucket_key
|
|
|
|
|
|
# 定义配置文件名常量
|
|
|
CONFIG_FILE_NAME = os.getenv('MINERU_TOOLS_CONFIG_JSON', 'magic-pdf.json')
|
|
|
|
|
|
|
|
|
def read_config():
|
|
|
if os.path.isabs(CONFIG_FILE_NAME):
|
|
|
config_file = CONFIG_FILE_NAME
|
|
|
else:
|
|
|
home_dir = os.path.expanduser('~')
|
|
|
config_file = os.path.join(home_dir, CONFIG_FILE_NAME)
|
|
|
|
|
|
if not os.path.exists(config_file):
|
|
|
raise FileNotFoundError(f'{config_file} not found')
|
|
|
|
|
|
with open(config_file, 'r', encoding='utf-8') as f:
|
|
|
config = json.load(f)
|
|
|
return config
|
|
|
|
|
|
|
|
|
def get_s3_config(bucket_name: str):
|
|
|
"""~/magic-pdf.json 读出来."""
|
|
|
config = read_config()
|
|
|
|
|
|
bucket_info = config.get('bucket_info')
|
|
|
if bucket_name not in bucket_info:
|
|
|
access_key, secret_key, storage_endpoint = bucket_info['[default]']
|
|
|
else:
|
|
|
access_key, secret_key, storage_endpoint = bucket_info[bucket_name]
|
|
|
|
|
|
if access_key is None or secret_key is None or storage_endpoint is None:
|
|
|
raise Exception(f'ak, sk or endpoint not found in {CONFIG_FILE_NAME}')
|
|
|
|
|
|
# logger.info(f"get_s3_config: ak={access_key}, sk={secret_key}, endpoint={storage_endpoint}")
|
|
|
|
|
|
return access_key, secret_key, storage_endpoint
|
|
|
|
|
|
|
|
|
def get_s3_config_dict(path: str):
|
|
|
access_key, secret_key, storage_endpoint = get_s3_config(get_bucket_name(path))
|
|
|
return {'ak': access_key, 'sk': secret_key, 'endpoint': storage_endpoint}
|
|
|
|
|
|
|
|
|
def get_bucket_name(path):
|
|
|
bucket, key = parse_bucket_key(path)
|
|
|
return bucket
|
|
|
|
|
|
|
|
|
def get_local_models_dir():
|
|
|
config = read_config()
|
|
|
models_dir = config.get('models-dir')
|
|
|
if models_dir is None:
|
|
|
logger.warning(f"'models-dir' not found in {CONFIG_FILE_NAME}, use '/tmp/models' as default")
|
|
|
return '/tmp/models'
|
|
|
else:
|
|
|
return models_dir
|
|
|
|
|
|
|
|
|
def get_local_layoutreader_model_dir():
|
|
|
config = read_config()
|
|
|
layoutreader_model_dir = config.get('layoutreader-model-dir')
|
|
|
if layoutreader_model_dir is None or not os.path.exists(layoutreader_model_dir):
|
|
|
home_dir = os.path.expanduser('~')
|
|
|
layoutreader_at_modelscope_dir_path = os.path.join(home_dir, '.cache/modelscope/hub/ppaanngggg/layoutreader')
|
|
|
logger.warning(f"'layoutreader-model-dir' not exists, use {layoutreader_at_modelscope_dir_path} as default")
|
|
|
return layoutreader_at_modelscope_dir_path
|
|
|
else:
|
|
|
return layoutreader_model_dir
|
|
|
|
|
|
|
|
|
def get_device():
|
|
|
config = read_config()
|
|
|
device = config.get('device-mode')
|
|
|
if device is None:
|
|
|
logger.warning(f"'device-mode' not found in {CONFIG_FILE_NAME}, use 'cpu' as default")
|
|
|
return 'cpu'
|
|
|
else:
|
|
|
return device
|
|
|
|
|
|
|
|
|
def get_table_recog_config():
|
|
|
config = read_config()
|
|
|
table_config = config.get('table-config')
|
|
|
if table_config is None:
|
|
|
logger.warning(f"'table-config' not found in {CONFIG_FILE_NAME}, use 'False' as default")
|
|
|
return json.loads(f'{{"model": "{MODEL_NAME.RAPID_TABLE}","enable": false, "max_time": 400}}')
|
|
|
else:
|
|
|
return table_config
|
|
|
|
|
|
|
|
|
def get_layout_config():
|
|
|
config = read_config()
|
|
|
layout_config = config.get('layout-config')
|
|
|
if layout_config is None:
|
|
|
logger.warning(f"'layout-config' not found in {CONFIG_FILE_NAME}, use '{MODEL_NAME.LAYOUTLMv3}' as default")
|
|
|
return json.loads(f'{{"model": "{MODEL_NAME.LAYOUTLMv3}"}}')
|
|
|
else:
|
|
|
return layout_config
|
|
|
|
|
|
|
|
|
def get_formula_config():
|
|
|
config = read_config()
|
|
|
formula_config = config.get('formula-config')
|
|
|
if formula_config is None:
|
|
|
logger.warning(f"'formula-config' not found in {CONFIG_FILE_NAME}, use 'True' as default")
|
|
|
return json.loads(f'{{"mfd_model": "{MODEL_NAME.YOLO_V8_MFD}","mfr_model": "{MODEL_NAME.UniMerNet_v2_Small}","enable": true}}')
|
|
|
else:
|
|
|
return formula_config
|
|
|
|
|
|
def get_llm_aided_config():
|
|
|
config = read_config()
|
|
|
llm_aided_config = config.get('llm-aided-config')
|
|
|
if llm_aided_config is None:
|
|
|
logger.warning(f"'llm-aided-config' not found in {CONFIG_FILE_NAME}, use 'None' as default")
|
|
|
return None
|
|
|
else:
|
|
|
return llm_aided_config
|
|
|
|
|
|
def get_latex_delimiter_config():
|
|
|
config = read_config()
|
|
|
latex_delimiter_config = config.get('latex-delimiter-config')
|
|
|
if latex_delimiter_config is None:
|
|
|
logger.warning(f"'latex-delimiter-config' not found in {CONFIG_FILE_NAME}, use 'None' as default")
|
|
|
return None
|
|
|
else:
|
|
|
return latex_delimiter_config
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
ak, sk, endpoint = get_s3_config('llm-raw')
|