You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
127 lines
3.9 KiB
Python
127 lines
3.9 KiB
Python
4 weeks ago
|
import os
|
||
|
import shutil
|
||
|
import tempfile
|
||
|
|
||
|
from click.testing import CliRunner
|
||
|
|
||
|
from magic_pdf.tools.cli import cli
|
||
|
|
||
|
|
||
|
def test_cli_pdf():
|
||
|
# setup
|
||
|
unitest_dir = '/tmp/magic_pdf/unittest/tools'
|
||
|
filename = 'cli_test_01'
|
||
|
os.makedirs(unitest_dir, exist_ok=True)
|
||
|
temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools')
|
||
|
|
||
|
# run
|
||
|
runner = CliRunner()
|
||
|
result = runner.invoke(
|
||
|
cli,
|
||
|
[
|
||
|
'-p',
|
||
|
'tests/unittest/test_tools/assets/cli/pdf/cli_test_01.pdf',
|
||
|
'-o',
|
||
|
temp_output_dir,
|
||
|
],
|
||
|
)
|
||
|
|
||
|
# check
|
||
|
assert result.exit_code == 0
|
||
|
|
||
|
base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto')
|
||
|
|
||
|
r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
|
||
|
assert r.st_size > 7000
|
||
|
|
||
|
r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
|
||
|
assert r.st_size > 200000
|
||
|
|
||
|
r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
|
||
|
assert r.st_size > 15000
|
||
|
|
||
|
r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
|
||
|
assert r.st_size > 400000
|
||
|
|
||
|
r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
|
||
|
assert r.st_size > 400000
|
||
|
|
||
|
r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
|
||
|
assert r.st_size > 400000
|
||
|
|
||
|
assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
|
||
|
assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
|
||
|
assert os.path.exists(os.path.join(base_output_dir, f'{filename}_content_list.json')) is True
|
||
|
|
||
|
# teardown
|
||
|
shutil.rmtree(temp_output_dir)
|
||
|
|
||
|
|
||
|
def test_cli_path():
|
||
|
# setup
|
||
|
unitest_dir = '/tmp/magic_pdf/unittest/tools'
|
||
|
os.makedirs(unitest_dir, exist_ok=True)
|
||
|
temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools')
|
||
|
|
||
|
# run
|
||
|
runner = CliRunner()
|
||
|
result = runner.invoke(
|
||
|
cli, ['-p', 'tests/unittest/test_tools/assets/cli/path', '-o', temp_output_dir]
|
||
|
)
|
||
|
|
||
|
# check
|
||
|
assert result.exit_code == 0
|
||
|
|
||
|
filename = 'cli_test_01'
|
||
|
base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto')
|
||
|
|
||
|
r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
|
||
|
assert r.st_size > 7000
|
||
|
|
||
|
r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
|
||
|
assert r.st_size > 200000
|
||
|
|
||
|
r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
|
||
|
assert r.st_size > 15000
|
||
|
|
||
|
r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
|
||
|
assert r.st_size > 400000
|
||
|
|
||
|
r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
|
||
|
assert r.st_size > 400000
|
||
|
|
||
|
r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
|
||
|
assert r.st_size > 400000
|
||
|
|
||
|
assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
|
||
|
assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
|
||
|
assert os.path.exists(os.path.join(base_output_dir, f'{filename}_content_list.json')) is True
|
||
|
|
||
|
base_output_dir = os.path.join(temp_output_dir, 'cli_test_02/auto')
|
||
|
filename = 'cli_test_02'
|
||
|
|
||
|
r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
|
||
|
assert r.st_size > 5000
|
||
|
|
||
|
r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
|
||
|
assert r.st_size > 200000
|
||
|
|
||
|
r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
|
||
|
assert r.st_size > 15000
|
||
|
|
||
|
r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
|
||
|
assert r.st_size > 400000
|
||
|
|
||
|
r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
|
||
|
assert r.st_size > 400000
|
||
|
|
||
|
r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
|
||
|
assert r.st_size > 400000
|
||
|
|
||
|
assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
|
||
|
assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
|
||
|
assert os.path.exists(os.path.join(base_output_dir, f'{filename}_content_list.json')) is True
|
||
|
|
||
|
# teardown
|
||
|
shutil.rmtree(temp_output_dir)
|