You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
77 lines
2.0 KiB
TOML
77 lines
2.0 KiB
TOML
1 month ago
|
[tool.poetry]
|
||
|
name = "marker-pdf"
|
||
|
version = "1.6.2"
|
||
|
description = "Convert documents to markdown with high speed and accuracy."
|
||
|
authors = ["Vik Paruchuri <github@vikas.sh>"]
|
||
|
readme = "README.md"
|
||
|
license = "GPL-3.0-or-later"
|
||
|
repository = "https://github.com/VikParuchuri/marker"
|
||
|
keywords = ["pdf", "markdown", "ocr", "nlp"]
|
||
|
packages = [
|
||
|
{include = "marker"}
|
||
|
]
|
||
|
include = [
|
||
|
"marker/scripts/*.sh",
|
||
|
"marker/scripts/*.html",
|
||
|
]
|
||
|
|
||
|
[tool.poetry.dependencies]
|
||
|
python = "^3.10"
|
||
|
Pillow = "^10.1.0"
|
||
|
pydantic = "^2.4.2"
|
||
|
pydantic-settings = "^2.0.3"
|
||
|
transformers = "^4.45.2"
|
||
|
python-dotenv = "^1.0.0"
|
||
|
torch = "^2.5.1"
|
||
|
tqdm = "^4.66.1"
|
||
|
ftfy = "^6.1.1"
|
||
|
rapidfuzz = "^3.8.1"
|
||
|
surya-ocr = "~0.13.1"
|
||
|
regex = "^2024.4.28"
|
||
|
pdftext = "~0.6.2"
|
||
|
markdownify = "^0.13.1"
|
||
|
click = "^8.1.7"
|
||
|
markdown2 = "^2.5.2"
|
||
|
filetype = "^1.2.0"
|
||
|
scikit-learn = "^1.6.1"
|
||
|
google-genai = "^1.0.0"
|
||
|
anthropic = "^0.46.0"
|
||
|
pre-commit = "^4.2.0"
|
||
|
|
||
|
# Optional dependencies for documents
|
||
|
mammoth = {version = "^1.9.0", optional = true}
|
||
|
openpyxl = {version = "^3.1.5", optional = true}
|
||
|
python-pptx = {version = "^1.0.2", optional = true}
|
||
|
ebooklib = {version = "^0.18", optional = true}
|
||
|
weasyprint = {version = "^63.1", optional = true}
|
||
|
openai = "^1.65.2"
|
||
|
|
||
|
[tool.poetry.group.dev.dependencies]
|
||
|
jupyter = "^1.0.0"
|
||
|
datasets = "^2.21.0"
|
||
|
streamlit = "^1.37.1"
|
||
|
fastapi = "^0.115.4"
|
||
|
uvicorn = "^0.32.0"
|
||
|
python-multipart = "^0.0.16"
|
||
|
pytest = "^8.3.3"
|
||
|
pytest-mock = "^3.14.0"
|
||
|
apted = "1.0.3"
|
||
|
distance = "0.1.3"
|
||
|
lxml = "5.3.0"
|
||
|
tabulate = "^0.9.0"
|
||
|
latex2mathml = "^3.77.0"
|
||
|
playwright = "^1.49.1"
|
||
|
|
||
|
[tool.poetry.extras]
|
||
|
full = ["mammoth", "openpyxl", "python-pptx", "ebooklib", "weasyprint"]
|
||
|
|
||
|
[tool.poetry.scripts]
|
||
|
marker = "marker.scripts.convert:convert_cli"
|
||
|
marker_single = "marker.scripts.convert_single:convert_single_cli"
|
||
|
marker_chunk_convert = "marker.scripts.chunk_convert:chunk_convert_cli"
|
||
|
marker_gui = "marker.scripts.run_streamlit_app:streamlit_app_cli"
|
||
|
marker_server = "marker.scripts.server:server_cli"
|
||
|
|
||
|
[build-system]
|
||
|
requires = ["poetry-core"]
|
||
|
build-backend = "poetry.core.masonry.api"
|