You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
43 lines
1.6 KiB
Python
43 lines
1.6 KiB
Python
import pytest
|
|
|
|
from marker.builders.document import DocumentBuilder
|
|
from marker.builders.layout import LayoutBuilder
|
|
from marker.builders.line import LineBuilder
|
|
from marker.renderers.markdown import MarkdownRenderer
|
|
from marker.schema import BlockTypes
|
|
from marker.schema.registry import get_block_class
|
|
|
|
|
|
@pytest.mark.config({"page_range": [0]})
|
|
def test_layout_replace(request, config, doc_provider, layout_model, ocr_error_model, detection_model, inline_detection_model):
|
|
# The llm layout builder replaces blocks - this makes sure text is still merged properly
|
|
layout_builder = LayoutBuilder(layout_model, config)
|
|
line_builder = LineBuilder(detection_model, inline_detection_model, ocr_error_model, config)
|
|
builder = DocumentBuilder(config)
|
|
document = builder.build_document(doc_provider)
|
|
layout_builder(document, doc_provider)
|
|
page = document.pages[0]
|
|
new_blocks = []
|
|
for block in page.contained_blocks(document, (BlockTypes.Text,)):
|
|
generated_block_class = get_block_class(BlockTypes.TextInlineMath)
|
|
generated_block = generated_block_class(
|
|
polygon=block.polygon,
|
|
page_id=block.page_id,
|
|
structure=block.structure,
|
|
)
|
|
page.replace_block(block, generated_block)
|
|
new_blocks.append(generated_block)
|
|
line_builder(document, doc_provider)
|
|
|
|
for block in new_blocks:
|
|
assert block.raw_text(document).strip()
|
|
|
|
renderer = MarkdownRenderer(config)
|
|
rendered = renderer(document)
|
|
|
|
assert "worst-case perturbations" in rendered.markdown
|
|
assert "projected gradient descent" in rendered.markdown
|
|
|
|
|
|
|