You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
208 lines
12 KiB
Plaintext
208 lines
12 KiB
Plaintext
LICENSE.md
|
|
README.md
|
|
setup.py
|
|
magic_pdf/__init__.py
|
|
magic_pdf/pdf_parse_union_core_v2.py
|
|
magic_pdf.egg-info/PKG-INFO
|
|
magic_pdf.egg-info/SOURCES.txt
|
|
magic_pdf.egg-info/dependency_links.txt
|
|
magic_pdf.egg-info/entry_points.txt
|
|
magic_pdf.egg-info/not-zip-safe
|
|
magic_pdf.egg-info/requires.txt
|
|
magic_pdf.egg-info/top_level.txt
|
|
magic_pdf/config/__init__.py
|
|
magic_pdf/config/constants.py
|
|
magic_pdf/config/drop_reason.py
|
|
magic_pdf/config/drop_tag.py
|
|
magic_pdf/config/enums.py
|
|
magic_pdf/config/exceptions.py
|
|
magic_pdf/config/make_content_config.py
|
|
magic_pdf/config/model_block_type.py
|
|
magic_pdf/config/ocr_content_type.py
|
|
magic_pdf/data/__init__.py
|
|
magic_pdf/data/batch_build_dataset.py
|
|
magic_pdf/data/dataset.py
|
|
magic_pdf/data/read_api.py
|
|
magic_pdf/data/schemas.py
|
|
magic_pdf/data/utils.py
|
|
magic_pdf/data/data_reader_writer/__init__.py
|
|
magic_pdf/data/data_reader_writer/base.py
|
|
magic_pdf/data/data_reader_writer/filebase.py
|
|
magic_pdf/data/data_reader_writer/multi_bucket_s3.py
|
|
magic_pdf/data/data_reader_writer/s3.py
|
|
magic_pdf/data/io/__init__.py
|
|
magic_pdf/data/io/base.py
|
|
magic_pdf/data/io/http.py
|
|
magic_pdf/data/io/s3.py
|
|
magic_pdf/dict2md/__init__.py
|
|
magic_pdf/dict2md/ocr_mkcontent.py
|
|
magic_pdf/filter/__init__.py
|
|
magic_pdf/filter/pdf_classify_by_type.py
|
|
magic_pdf/filter/pdf_meta_scan.py
|
|
magic_pdf/integrations/__init__.py
|
|
magic_pdf/integrations/rag/__init__.py
|
|
magic_pdf/integrations/rag/api.py
|
|
magic_pdf/integrations/rag/type.py
|
|
magic_pdf/integrations/rag/utils.py
|
|
magic_pdf/libs/__init__.py
|
|
magic_pdf/libs/boxbase.py
|
|
magic_pdf/libs/clean_memory.py
|
|
magic_pdf/libs/commons.py
|
|
magic_pdf/libs/config_reader.py
|
|
magic_pdf/libs/convert_utils.py
|
|
magic_pdf/libs/coordinate_transform.py
|
|
magic_pdf/libs/draw_bbox.py
|
|
magic_pdf/libs/hash_utils.py
|
|
magic_pdf/libs/json_compressor.py
|
|
magic_pdf/libs/language.py
|
|
magic_pdf/libs/local_math.py
|
|
magic_pdf/libs/markdown_utils.py
|
|
magic_pdf/libs/path_utils.py
|
|
magic_pdf/libs/pdf_check.py
|
|
magic_pdf/libs/pdf_image_tools.py
|
|
magic_pdf/libs/performance_stats.py
|
|
magic_pdf/libs/safe_filename.py
|
|
magic_pdf/libs/version.py
|
|
magic_pdf/model/__init__.py
|
|
magic_pdf/model/batch_analyze.py
|
|
magic_pdf/model/doc_analyze_by_custom_model.py
|
|
magic_pdf/model/magic_model.py
|
|
magic_pdf/model/model_list.py
|
|
magic_pdf/model/pdf_extract_kit.py
|
|
magic_pdf/model/pp_structure_v2.py
|
|
magic_pdf/model/sub_modules/__init__.py
|
|
magic_pdf/model/sub_modules/model_init.py
|
|
magic_pdf/model/sub_modules/model_utils.py
|
|
magic_pdf/model/sub_modules/language_detection/__init__.py
|
|
magic_pdf/model/sub_modules/language_detection/utils.py
|
|
magic_pdf/model/sub_modules/language_detection/yolov11/YOLOv11.py
|
|
magic_pdf/model/sub_modules/language_detection/yolov11/__init__.py
|
|
magic_pdf/model/sub_modules/layout/__init__.py
|
|
magic_pdf/model/sub_modules/layout/doclayout_yolo/DocLayoutYOLO.py
|
|
magic_pdf/model/sub_modules/layout/doclayout_yolo/__init__.py
|
|
magic_pdf/model/sub_modules/layout/layoutlmv3/__init__.py
|
|
magic_pdf/model/sub_modules/layout/layoutlmv3/backbone.py
|
|
magic_pdf/model/sub_modules/layout/layoutlmv3/beit.py
|
|
magic_pdf/model/sub_modules/layout/layoutlmv3/deit.py
|
|
magic_pdf/model/sub_modules/layout/layoutlmv3/model_init.py
|
|
magic_pdf/model/sub_modules/layout/layoutlmv3/rcnn_vl.py
|
|
magic_pdf/model/sub_modules/layout/layoutlmv3/visualizer.py
|
|
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/__init__.py
|
|
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/__init__.py
|
|
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/cord.py
|
|
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/data_collator.py
|
|
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/funsd.py
|
|
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/image_utils.py
|
|
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/xfund.py
|
|
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/__init__.py
|
|
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/__init__.py
|
|
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/configuration_layoutlmv3.py
|
|
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/modeling_layoutlmv3.py
|
|
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3.py
|
|
magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3_fast.py
|
|
magic_pdf/model/sub_modules/mfd/__init__.py
|
|
magic_pdf/model/sub_modules/mfd/yolov8/YOLOv8.py
|
|
magic_pdf/model/sub_modules/mfd/yolov8/__init__.py
|
|
magic_pdf/model/sub_modules/mfr/__init__.py
|
|
magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py
|
|
magic_pdf/model/sub_modules/mfr/unimernet/__init__.py
|
|
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/__init__.py
|
|
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/modeling_unimernet.py
|
|
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/__init__.py
|
|
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/configuration_unimer_mbart.py
|
|
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/modeling_unimer_mbart.py
|
|
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/tokenization_unimer_mbart.py
|
|
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/__init__.py
|
|
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/configuration_unimer_swin.py
|
|
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/image_processing_unimer_swin.py
|
|
magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/modeling_unimer_swin.py
|
|
magic_pdf/model/sub_modules/ocr/__init__.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/__init__.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/ocr_utils.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/__init__.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/base_ocr_v20.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/__init__.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/__init__.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/operators.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/__init__.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/common.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/__init__.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/base_model.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_mobilenet_v3.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_hgnet.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_lcnetv3.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mobilenet_v3.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mv1_enhance.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_svtrnet.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/__init__.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/cls_head.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_db_head.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_ctc_head.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_multi_head.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/__init__.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/db_fpn.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/intracl.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/rnn.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/__init__.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/cls_postprocess.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/db_postprocess.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/rec_postprocess.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/__init__.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/arabic_dict.txt
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/chinese_cht_dict.txt
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/cyrillic_dict.txt
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/devanagari_dict.txt
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/en_dict.txt
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/japan_dict.txt
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ka_dict.txt
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/korean_dict.txt
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/latin_dict.txt
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv4_doc_dict.txt
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ta_dict.txt
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/te_dict.txt
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/__init__.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/__init__.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_cls.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_det.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_rec.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_system.py
|
|
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/pytorchocr_utility.py
|
|
magic_pdf/model/sub_modules/reading_oreder/__init__.py
|
|
magic_pdf/model/sub_modules/reading_oreder/layoutreader/__init__.py
|
|
magic_pdf/model/sub_modules/reading_oreder/layoutreader/helpers.py
|
|
magic_pdf/model/sub_modules/reading_oreder/layoutreader/xycut.py
|
|
magic_pdf/model/sub_modules/table/__init__.py
|
|
magic_pdf/model/sub_modules/table/table_utils.py
|
|
magic_pdf/model/sub_modules/table/rapidtable/__init__.py
|
|
magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py
|
|
magic_pdf/operators/__init__.py
|
|
magic_pdf/operators/models.py
|
|
magic_pdf/operators/pipes.py
|
|
magic_pdf/post_proc/__init__.py
|
|
magic_pdf/post_proc/llm_aided.py
|
|
magic_pdf/post_proc/para_split_v3.py
|
|
magic_pdf/pre_proc/__init__.py
|
|
magic_pdf/pre_proc/construct_page_dict.py
|
|
magic_pdf/pre_proc/cut_image.py
|
|
magic_pdf/pre_proc/ocr_detect_all_bboxes.py
|
|
magic_pdf/pre_proc/ocr_dict_merge.py
|
|
magic_pdf/pre_proc/ocr_span_list_modify.py
|
|
magic_pdf/pre_proc/remove_bbox_overlap.py
|
|
magic_pdf/resources/fasttext-langdetect/lid.176.ftz
|
|
magic_pdf/resources/model_config/model_configs.yaml
|
|
magic_pdf/resources/slanet_plus/slanet-plus.onnx
|
|
magic_pdf/resources/yolov11-langdetect/yolo_v11_ft.pt
|
|
magic_pdf/spark/__init__.py
|
|
magic_pdf/spark/spark_api.py
|
|
magic_pdf/tools/__init__.py
|
|
magic_pdf/tools/cli.py
|
|
magic_pdf/tools/cli_dev.py
|
|
magic_pdf/tools/common.py
|
|
magic_pdf/utils/__init__.py
|
|
magic_pdf/utils/annotations.py
|
|
magic_pdf/utils/office_to_pdf.py |