from marker.providers.pdf import PdfProvider import tempfile import datasets def setup_pdf_provider( filename='adversarial.pdf', config=None, ) -> PdfProvider: dataset = datasets.load_dataset("datalab-to/pdfs", split="train") idx = dataset['filename'].index(filename) temp_pdf = tempfile.NamedTemporaryFile(suffix=".pdf") temp_pdf.write(dataset['pdf'][idx]) temp_pdf.flush() provider = PdfProvider(temp_pdf.name, config) return provider