You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

17 lines
522 B
Python

import pytest
from marker.processors.ignoretext import IgnoreTextProcessor
from marker.schema import BlockTypes
@pytest.mark.filename("bio_pdf.pdf")
@pytest.mark.config({"page_range": list(range(10))})
def test_ignoretext_processor(pdf_document):
processor = IgnoreTextProcessor()
processor(pdf_document)
page1_header = pdf_document.pages[1].contained_blocks(pdf_document, [BlockTypes.Text])[0]
assert "bioRxiv" in page1_header.raw_text(pdf_document)
assert page1_header.ignore_for_output is True