You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
40 lines
1.1 KiB
Python
40 lines
1.1 KiB
Python
1 month ago
|
import time
|
||
|
import torch
|
||
|
|
||
|
import click
|
||
|
import pypdfium2 as pdfium
|
||
|
from tqdm import tqdm
|
||
|
|
||
|
from marker.converters.pdf import PdfConverter
|
||
|
from marker.models import create_model_dict
|
||
|
|
||
|
|
||
|
@click.command(help="Benchmark PDF to MD conversion throughput.")
|
||
|
@click.argument("pdf_path", type=str)
|
||
|
def main(pdf_path):
|
||
|
print(f"Converting {pdf_path} to markdown...")
|
||
|
pdf = pdfium.PdfDocument(pdf_path)
|
||
|
page_count = len(pdf)
|
||
|
pdf.close()
|
||
|
model_dict = create_model_dict()
|
||
|
torch.cuda.reset_peak_memory_stats()
|
||
|
|
||
|
times = []
|
||
|
for i in tqdm(range(10), desc="Benchmarking"):
|
||
|
block_converter = PdfConverter(
|
||
|
artifact_dict=model_dict,
|
||
|
config={"disable_tqdm": True}
|
||
|
)
|
||
|
start = time.time()
|
||
|
block_converter(pdf_path)
|
||
|
total = time.time() - start
|
||
|
times.append(total)
|
||
|
|
||
|
max_gpu_vram = torch.cuda.max_memory_allocated() / 1024 ** 3
|
||
|
|
||
|
print(f"Converted {page_count} pages in {sum(times)/len(times):.2f} seconds.")
|
||
|
print(f"Max GPU VRAM: {max_gpu_vram:.2f} GB")
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|