You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

43 lines
1.6 KiB
Python

import pytest
from marker.builders.document import DocumentBuilder
from marker.builders.layout import LayoutBuilder
from marker.builders.line import LineBuilder
from marker.renderers.markdown import MarkdownRenderer
from marker.schema import BlockTypes
from marker.schema.registry import get_block_class
@pytest.mark.config({"page_range": [0]})
def test_layout_replace(request, config, doc_provider, layout_model, ocr_error_model, detection_model, inline_detection_model):
# The llm layout builder replaces blocks - this makes sure text is still merged properly
layout_builder = LayoutBuilder(layout_model, config)
line_builder = LineBuilder(detection_model, inline_detection_model, ocr_error_model, config)
builder = DocumentBuilder(config)
document = builder.build_document(doc_provider)
layout_builder(document, doc_provider)
page = document.pages[0]
new_blocks = []
for block in page.contained_blocks(document, (BlockTypes.Text,)):
generated_block_class = get_block_class(BlockTypes.TextInlineMath)
generated_block = generated_block_class(
polygon=block.polygon,
page_id=block.page_id,
structure=block.structure,
)
page.replace_block(block, generated_block)
new_blocks.append(generated_block)
line_builder(document, doc_provider)
for block in new_blocks:
assert block.raw_text(document).strip()
renderer = MarkdownRenderer(config)
rendered = renderer(document)
assert "worst-case perturbations" in rendered.markdown
assert "projected gradient descent" in rendered.markdown