You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
31 lines
1.1 KiB
Python
31 lines
1.1 KiB
Python
1 month ago
|
import pytest
|
||
|
|
||
|
from marker.processors.line_merge import LineMergeProcessor
|
||
|
from marker.schema import BlockTypes
|
||
|
|
||
|
@pytest.mark.config({"page_range": [1]})
|
||
|
def test_inline_box_nomerging(pdf_document, config):
|
||
|
first_page = pdf_document.pages[0]
|
||
|
block = pdf_document.get_block(first_page.structure[1]) # First inline math block
|
||
|
line_count = len(block.contained_blocks(pdf_document, (BlockTypes.Line,)))
|
||
|
assert line_count == 46
|
||
|
|
||
|
merger = LineMergeProcessor(config)
|
||
|
merger(pdf_document)
|
||
|
|
||
|
line_count = len(block.contained_blocks(pdf_document, (BlockTypes.Line,)))
|
||
|
assert line_count == 46
|
||
|
|
||
|
|
||
|
@pytest.mark.config({"page_range": [1], "use_llm": True})
|
||
|
def test_inline_box_merging(pdf_document, config):
|
||
|
first_page = pdf_document.pages[0]
|
||
|
block = pdf_document.get_block(first_page.structure[1]) # First inline math block
|
||
|
line_count = len(block.contained_blocks(pdf_document, (BlockTypes.Line,)))
|
||
|
assert line_count == 21
|
||
|
|
||
|
merger = LineMergeProcessor(config)
|
||
|
merger(pdf_document)
|
||
|
|
||
|
line_count = len(block.contained_blocks(pdf_document, (BlockTypes.Line,)))
|
||
|
assert line_count == 21
|