You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

71 lines
2.5 KiB
Python

import pytest
from marker.converters.pdf import PdfConverter
from marker.renderers.markdown import MarkdownOutput
@pytest.mark.output_format("markdown")
@pytest.mark.config({"page_range": [0, 1, 2, 3, 7]})
def test_pdf_converter(pdf_converter: PdfConverter, temp_doc):
markdown_output: MarkdownOutput = pdf_converter(temp_doc.name)
markdown = markdown_output.markdown
# Basic assertions
assert len(markdown) > 0
assert "# Subspace Adversarial Training" in markdown
# Some assertions for line joining across pages
assert "AT solutions. However, these methods highly rely on specifically" in markdown # pgs: 1-2
assert "(with adversarial perturbations), which harms natural accuracy, " in markdown # pgs: 3-4
# Some assertions for line joining across columns
assert "remain similar across a wide range of choices." in markdown # pg: 2
assert "a new scheme for designing more robust and efficient" in markdown # pg: 8
@pytest.mark.filename("manual.epub")
@pytest.mark.config({"page_range": [0]})
def test_epub_converter(pdf_converter: PdfConverter, temp_doc):
markdown_output: MarkdownOutput = pdf_converter(temp_doc.name)
markdown = markdown_output.markdown
# Basic assertions
assert "Simple Sabotage Field Manual" in markdown
@pytest.mark.filename("single_sheet.xlsx")
@pytest.mark.config({"page_range": [0]})
def test_xlsx_converter(pdf_converter: PdfConverter, temp_doc):
markdown_output: MarkdownOutput = pdf_converter(temp_doc.name)
markdown = markdown_output.markdown
# Basic assertions
assert "four" in markdown
@pytest.mark.filename("china.html")
@pytest.mark.config({"page_range": [10]})
def test_html_converter(pdf_converter: PdfConverter, temp_doc):
markdown_output: MarkdownOutput = pdf_converter(temp_doc.name)
markdown = markdown_output.markdown
# Basic assertions
assert "Republic of China" in markdown
@pytest.mark.filename("gatsby.docx")
@pytest.mark.config({"page_range": [0]})
def test_docx_converter(pdf_converter: PdfConverter, temp_doc):
markdown_output: MarkdownOutput = pdf_converter(temp_doc.name)
markdown = markdown_output.markdown
# Basic assertions
assert "The Decline of the American Dream in the 1920s" in markdown
@pytest.mark.filename("lambda.pptx")
@pytest.mark.config({"page_range": [0]})
def test_pptx_converter(pdf_converter: PdfConverter, temp_doc):
markdown_output: MarkdownOutput = pdf_converter(temp_doc.name)
markdown = markdown_output.markdown
# Basic assertions
assert "Adam Doupé" in markdown