import pytest from marker.converters.pdf import PdfConverter from marker.renderers.markdown import MarkdownOutput @pytest.mark.output_format("markdown") @pytest.mark.config({"page_range": [0, 1, 2, 3, 7]}) def test_pdf_converter(pdf_converter: PdfConverter, temp_doc): markdown_output: MarkdownOutput = pdf_converter(temp_doc.name) markdown = markdown_output.markdown # Basic assertions assert len(markdown) > 0 assert "# Subspace Adversarial Training" in markdown # Some assertions for line joining across pages assert "AT solutions. However, these methods highly rely on specifically" in markdown # pgs: 1-2 assert "(with adversarial perturbations), which harms natural accuracy, " in markdown # pgs: 3-4 # Some assertions for line joining across columns assert "remain similar across a wide range of choices." in markdown # pg: 2 assert "a new scheme for designing more robust and efficient" in markdown # pg: 8 @pytest.mark.filename("manual.epub") @pytest.mark.config({"page_range": [0]}) def test_epub_converter(pdf_converter: PdfConverter, temp_doc): markdown_output: MarkdownOutput = pdf_converter(temp_doc.name) markdown = markdown_output.markdown # Basic assertions assert "Simple Sabotage Field Manual" in markdown @pytest.mark.filename("single_sheet.xlsx") @pytest.mark.config({"page_range": [0]}) def test_xlsx_converter(pdf_converter: PdfConverter, temp_doc): markdown_output: MarkdownOutput = pdf_converter(temp_doc.name) markdown = markdown_output.markdown # Basic assertions assert "four" in markdown @pytest.mark.filename("china.html") @pytest.mark.config({"page_range": [10]}) def test_html_converter(pdf_converter: PdfConverter, temp_doc): markdown_output: MarkdownOutput = pdf_converter(temp_doc.name) markdown = markdown_output.markdown # Basic assertions assert "Republic of China" in markdown @pytest.mark.filename("gatsby.docx") @pytest.mark.config({"page_range": [0]}) def test_docx_converter(pdf_converter: PdfConverter, temp_doc): markdown_output: MarkdownOutput = pdf_converter(temp_doc.name) markdown = markdown_output.markdown # Basic assertions assert "The Decline of the American Dream in the 1920s" in markdown @pytest.mark.filename("lambda.pptx") @pytest.mark.config({"page_range": [0]}) def test_pptx_converter(pdf_converter: PdfConverter, temp_doc): markdown_output: MarkdownOutput = pdf_converter(temp_doc.name) markdown = markdown_output.markdown # Basic assertions assert "Adam Doupé" in markdown