[tool.poetry] name = "marker-pdf" version = "1.6.2" description = "Convert documents to markdown with high speed and accuracy." authors = ["Vik Paruchuri "] readme = "README.md" license = "GPL-3.0-or-later" repository = "https://github.com/VikParuchuri/marker" keywords = ["pdf", "markdown", "ocr", "nlp"] packages = [ {include = "marker"} ] include = [ "marker/scripts/*.sh", "marker/scripts/*.html", ] [tool.poetry.dependencies] python = "^3.10" Pillow = "^10.1.0" pydantic = "^2.4.2" pydantic-settings = "^2.0.3" transformers = "^4.45.2" python-dotenv = "^1.0.0" torch = "^2.5.1" tqdm = "^4.66.1" ftfy = "^6.1.1" rapidfuzz = "^3.8.1" surya-ocr = "~0.13.1" regex = "^2024.4.28" pdftext = "~0.6.2" markdownify = "^0.13.1" click = "^8.1.7" markdown2 = "^2.5.2" filetype = "^1.2.0" scikit-learn = "^1.6.1" google-genai = "^1.0.0" anthropic = "^0.46.0" pre-commit = "^4.2.0" # Optional dependencies for documents mammoth = {version = "^1.9.0", optional = true} openpyxl = {version = "^3.1.5", optional = true} python-pptx = {version = "^1.0.2", optional = true} ebooklib = {version = "^0.18", optional = true} weasyprint = {version = "^63.1", optional = true} openai = "^1.65.2" [tool.poetry.group.dev.dependencies] jupyter = "^1.0.0" datasets = "^2.21.0" streamlit = "^1.37.1" fastapi = "^0.115.4" uvicorn = "^0.32.0" python-multipart = "^0.0.16" pytest = "^8.3.3" pytest-mock = "^3.14.0" apted = "1.0.3" distance = "0.1.3" lxml = "5.3.0" tabulate = "^0.9.0" latex2mathml = "^3.77.0" playwright = "^1.49.1" [tool.poetry.extras] full = ["mammoth", "openpyxl", "python-pptx", "ebooklib", "weasyprint"] [tool.poetry.scripts] marker = "marker.scripts.convert:convert_cli" marker_single = "marker.scripts.convert_single:convert_single_cli" marker_chunk_convert = "marker.scripts.chunk_convert:chunk_convert_cli" marker_gui = "marker.scripts.run_streamlit_app:streamlit_app_cli" marker_server = "marker.scripts.server:server_cli" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api"