You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

16 lines
551 B
Python

1 month ago
import pytest
from marker.builders.document import DocumentBuilder
from marker.builders.layout import LayoutBuilder
from marker.schema import BlockTypes
@pytest.mark.config({"page_range": [0], "strip_existing_ocr": True})
@pytest.mark.filename("handwritten.pdf")
def test_strip_ocr(doc_provider):
# Ensure that the OCR text isn't extracted
assert len(doc_provider.page_lines) == 0
@pytest.mark.config({"page_range": [0]})
@pytest.mark.filename("handwritten.pdf")
def test_keep_ocr(doc_provider):
assert len(doc_provider.page_lines) == 1