You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
25 lines
1.1 KiB
Python
25 lines
1.1 KiB
Python
1 month ago
|
from surya.layout.schema import LayoutResult
|
||
|
|
||
|
from marker.builders.document import DocumentBuilder
|
||
|
from marker.builders.layout import LayoutBuilder
|
||
|
from marker.builders.line import LineBuilder
|
||
|
|
||
|
|
||
|
def test_blank_page(config, doc_provider, layout_model, ocr_error_model, recognition_model, detection_model, inline_detection_model):
|
||
|
layout_builder = LayoutBuilder(layout_model, config)
|
||
|
line_builder = LineBuilder(detection_model, inline_detection_model, ocr_error_model)
|
||
|
builder = DocumentBuilder(config)
|
||
|
document = builder.build_document(doc_provider)
|
||
|
|
||
|
layout_results = [LayoutResult(
|
||
|
bboxes=[],
|
||
|
image_bbox=p.polygon.bbox,
|
||
|
) for p in document.pages]
|
||
|
provider_lines = {p.page_id: [] for p in document.pages}
|
||
|
ocr_lines = {p.page_id: [] for p in document.pages}
|
||
|
|
||
|
layout_builder.add_blocks_to_pages(document.pages, layout_results)
|
||
|
line_builder.merge_blocks(document, provider_lines, ocr_lines)
|
||
|
|
||
|
assert all([isinstance(p.children, list) for p in document.pages])
|
||
|
assert all([isinstance(p.structure, list) for p in document.pages])
|