From 80b906e6d3f05ce2301219809abf9e7098abe185 Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Tue, 21 Dec 2021 14:11:38 -0800 Subject: [PATCH 1/3] tests(nox): Adding a developer test suite After installing nox, the whole test stack can be run by simply running `nox`. This will create a venv, install the correct version of pytorch and tests deps, build and install torch-tensorrt download models and run the developer test suite. The env is persistent so the step up steps are cached Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- noxfile.py | 100 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 71 insertions(+), 29 deletions(-) diff --git a/noxfile.py b/noxfile.py index 1414a3dd8c..08189f3fdc 100644 --- a/noxfile.py +++ b/noxfile.py @@ -4,8 +4,54 @@ # Use system installed Python packages PYT_PATH='/opt/conda/lib/python3.8/site-packages' if not 'PYT_PATH' in os.environ else os.environ["PYT_PATH"] -# Root directory for torch_tensorrt. Set according to docker container by default -TOP_DIR='/torchtrt' if not 'TOP_DIR' in os.environ else os.environ["TOP_DIR"] +# Set the root directory to the directory of the noxfile unless the user wants to +# TOP_DIR +TOP_DIR=os.path.dirname(os.path.realpath(__file__)) if not 'TOP_DIR' in os.environ else os.environ["TOP_DIR"] + +nox.options.sessions = ["developer_tests-3"] + +def install_deps(session): + print("Installing deps") + session.install("-r", os.path.join(TOP_DIR, "py", "requirements.txt")) + session.install("-r", os.path.join(TOP_DIR, "tests", "py", "requirements.txt")) + +def download_models(session, use_host_env=False): + print("Downloading test models") + session.install('timm') + print(TOP_DIR) + session.chdir(os.path.join(TOP_DIR, "tests", "modules")) + if use_host_env: + session.run_always('python', 'hub.py', env={'PYTHONPATH': PYT_PATH}) + else: + session.run_always('python', 'hub.py') + +def install_torch_trt(session): + print("Installing latest torch-tensorrt build") + session.chdir(os.path.join(TOP_DIR, "py")) + session.run("python", "setup.py", "develop") + +def run_base_tests(session, use_host_env=False): + print("Running basic tests") + session.chdir(os.path.join(TOP_DIR, 'tests/py')) + tests = [ + "test_api.py", + "test_to_backend_api.py" + ] + for test in tests: + if use_host_env: + session.run_always('python', test, env={'PYTHONPATH': PYT_PATH}) + else: + session.run_always("python", test) + + +# Install the latest build of torch-tensorrt +@nox.session(python=["3"], reuse_venv=True) +def developer_tests(session): + """Basic set of tests that need to pass for code to get merged""" + install_deps(session) + install_torch_trt(session) + download_models(session) + run_base_tests(session) # Download the dataset @nox.session(python=["3"], reuse_venv=True) @@ -14,33 +60,29 @@ def download_datasets(session): session.chdir(os.path.join(TOP_DIR, 'examples/int8/training/vgg16')) session.run_always('wget', 'https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz', external=True) session.run_always('tar', '-xvzf', 'cifar-10-binary.tar.gz', external=True) - session.run_always('mkdir', '-p', + session.run_always('mkdir', '-p', os.path.join(TOP_DIR, 'tests/accuracy/datasets/data'), external=True) - session.run_always('cp', '-rpf', + session.run_always('cp', '-rpf', os.path.join(TOP_DIR, 'examples/int8/training/vgg16/cifar-10-batches-bin'), os.path.join(TOP_DIR, 'tests/accuracy/datasets/data/cidar-10-batches-bin'), external=True) # Download the model @nox.session(python=["3"], reuse_venv=True) -def download_models(session): - session.install('timm') - session.chdir('tests/modules') - session.run_always('python', - 'hub.py', - env={'PYTHONPATH': PYT_PATH}) +def download_test_models(session): + download_models(session, use_host_env=True) # Train the model @nox.session(python=["3"], reuse_venv=True) def train_model(session): session.chdir(os.path.join(TOP_DIR, 'examples/int8/training/vgg16')) - session.run_always('python', - 'main.py', - '--lr', '0.01', - '--batch-size', '128', - '--drop-ratio', '0.15', - '--ckpt-dir', 'vgg16_ckpts', + session.run_always('python', + 'main.py', + '--lr', '0.01', + '--batch-size', '128', + '--drop-ratio', '0.15', + '--ckpt-dir', 'vgg16_ckpts', '--epochs', '25', env={'PYTHONPATH': PYT_PATH}) @@ -57,17 +99,17 @@ def finetune_model(session): session.install('pytorch-quantization', '--extra-index-url', 'https://pypi.ngc.nvidia.com') session.chdir(os.path.join(TOP_DIR, 'examples/int8/training/vgg16')) - session.run_always('python', - 'finetune_qat.py', - '--lr', '0.01', - '--batch-size', '128', - '--drop-ratio', '0.15', - '--ckpt-dir', 'vgg16_ckpts', - '--start-from', '25', + session.run_always('python', + 'finetune_qat.py', + '--lr', '0.01', + '--batch-size', '128', + '--drop-ratio', '0.15', + '--ckpt-dir', 'vgg16_ckpts', + '--start-from', '25', '--epochs', '26', env={'PYTHONPATH': PYT_PATH}) - - # Export model + + # Export model session.run_always('python', 'export_qat.py', 'vgg16_ckpts/ckpt_epoch26.pth', @@ -77,8 +119,8 @@ def finetune_model(session): @nox.session(python=["3"], reuse_venv=True) def ptq_test(session): session.chdir(os.path.join(TOP_DIR, 'tests/py')) - session.run_always('cp', '-rf', - os.path.join(TOP_DIR, 'examples/int8/training/vgg16', 'trained_vgg16.jit.pt'), + session.run_always('cp', '-rf', + os.path.join(TOP_DIR, 'examples/int8/training/vgg16', 'trained_vgg16.jit.pt'), '.', external=True) tests = [ @@ -94,8 +136,8 @@ def ptq_test(session): @nox.session(python=["3"], reuse_venv=True) def qat_test(session): session.chdir(os.path.join(TOP_DIR, 'tests/py')) - session.run_always('cp', '-rf', - os.path.join(TOP_DIR, 'examples/int8/training/vgg16', 'trained_vgg16_qat.jit.pt'), + session.run_always('cp', '-rf', + os.path.join(TOP_DIR, 'examples/int8/training/vgg16', 'trained_vgg16_qat.jit.pt'), '.', external=True) From 858042319cd8c0cd322b1ef02f65db97cc997e09 Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Mon, 24 Jan 2022 13:48:32 -0800 Subject: [PATCH 2/3] tests(//py): Restructing the nox file Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- noxfile.py | 346 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 238 insertions(+), 108 deletions(-) diff --git a/noxfile.py b/noxfile.py index 08189f3fdc..5cc1b06b17 100644 --- a/noxfile.py +++ b/noxfile.py @@ -1,3 +1,4 @@ +from distutils.command.clean import clean import nox import os @@ -8,7 +9,7 @@ # TOP_DIR TOP_DIR=os.path.dirname(os.path.realpath(__file__)) if not 'TOP_DIR' in os.environ else os.environ["TOP_DIR"] -nox.options.sessions = ["developer_tests-3"] +nox.options.sessions = ["l0_api_tests-3"] def install_deps(session): print("Installing deps") @@ -30,31 +31,6 @@ def install_torch_trt(session): session.chdir(os.path.join(TOP_DIR, "py")) session.run("python", "setup.py", "develop") -def run_base_tests(session, use_host_env=False): - print("Running basic tests") - session.chdir(os.path.join(TOP_DIR, 'tests/py')) - tests = [ - "test_api.py", - "test_to_backend_api.py" - ] - for test in tests: - if use_host_env: - session.run_always('python', test, env={'PYTHONPATH': PYT_PATH}) - else: - session.run_always("python", test) - - -# Install the latest build of torch-tensorrt -@nox.session(python=["3"], reuse_venv=True) -def developer_tests(session): - """Basic set of tests that need to pass for code to get merged""" - install_deps(session) - install_torch_trt(session) - download_models(session) - run_base_tests(session) - -# Download the dataset -@nox.session(python=["3"], reuse_venv=True) def download_datasets(session): print("Downloading dataset to path", os.path.join(TOP_DIR, 'examples/int8/training/vgg16')) session.chdir(os.path.join(TOP_DIR, 'examples/int8/training/vgg16')) @@ -68,98 +44,70 @@ def download_datasets(session): os.path.join(TOP_DIR, 'tests/accuracy/datasets/data/cidar-10-batches-bin'), external=True) -# Download the model -@nox.session(python=["3"], reuse_venv=True) -def download_test_models(session): - download_models(session, use_host_env=True) - -# Train the model -@nox.session(python=["3"], reuse_venv=True) -def train_model(session): +def train_model(session, use_host_env=False): session.chdir(os.path.join(TOP_DIR, 'examples/int8/training/vgg16')) - session.run_always('python', - 'main.py', - '--lr', '0.01', - '--batch-size', '128', - '--drop-ratio', '0.15', - '--ckpt-dir', 'vgg16_ckpts', - '--epochs', '25', - env={'PYTHONPATH': PYT_PATH}) - - # Export model - session.run_always('python', + if use_host_env: + session.run_always('python', + 'main.py', + '--lr', '0.01', + '--batch-size', '128', + '--drop-ratio', '0.15', + '--ckpt-dir', 'vgg16_ckpts', + '--epochs', '25', + env={'PYTHONPATH': PYT_PATH}) + + session.run_always('python', 'export_ckpt.py', - 'vgg16_ckpts/ckpt_epoch25.pth', - env={'PYTHONPATH': PYT_PATH}) + 'vgg16_ckpts/ckpt_epoch25.pth') + else: + session.run_always('python', + 'main.py', + '--lr', '0.01', + '--batch-size', '128', + '--drop-ratio', '0.15', + '--ckpt-dir', 'vgg16_ckpts', + '--epochs', '25') -# Finetune the model -@nox.session(python=["3"], reuse_venv=True) -def finetune_model(session): + session.run_always('python', + 'export_ckpt.py', + 'vgg16_ckpts/ckpt_epoch25.pth') + +def finetune_model(session, use_host_env=False): # Install pytorch-quantization dependency session.install('pytorch-quantization', '--extra-index-url', 'https://pypi.ngc.nvidia.com') - session.chdir(os.path.join(TOP_DIR, 'examples/int8/training/vgg16')) - session.run_always('python', - 'finetune_qat.py', - '--lr', '0.01', - '--batch-size', '128', - '--drop-ratio', '0.15', - '--ckpt-dir', 'vgg16_ckpts', - '--start-from', '25', - '--epochs', '26', - env={'PYTHONPATH': PYT_PATH}) - - # Export model - session.run_always('python', - 'export_qat.py', - 'vgg16_ckpts/ckpt_epoch26.pth', - env={'PYTHONPATH': PYT_PATH}) - -# Run PTQ tests -@nox.session(python=["3"], reuse_venv=True) -def ptq_test(session): - session.chdir(os.path.join(TOP_DIR, 'tests/py')) - session.run_always('cp', '-rf', - os.path.join(TOP_DIR, 'examples/int8/training/vgg16', 'trained_vgg16.jit.pt'), - '.', - external=True) - tests = [ - 'test_ptq_dataloader_calibrator.py', - 'test_ptq_to_backend.py', - 'test_ptq_trt_calibrator.py' - ] - for test in tests: - session.run_always('python', test, - env={'PYTHONPATH': PYT_PATH}) -# Run QAT tests -@nox.session(python=["3"], reuse_venv=True) -def qat_test(session): - session.chdir(os.path.join(TOP_DIR, 'tests/py')) - session.run_always('cp', '-rf', - os.path.join(TOP_DIR, 'examples/int8/training/vgg16', 'trained_vgg16_qat.jit.pt'), - '.', - external=True) - - session.run_always('python', - 'test_qat_trt_accuracy.py', - env={'PYTHONPATH': PYT_PATH}) + if use_host_env: + session.run_always('python', + 'finetune_qat.py', + '--lr', '0.01', + '--batch-size', '128', + '--drop-ratio', '0.15', + '--ckpt-dir', 'vgg16_ckpts', + '--start-from', '25', + '--epochs', '26', + env={'PYTHONPATH': PYT_PATH}) -# Run Python API tests -@nox.session(python=["3"], reuse_venv=True) -def api_test(session): - session.chdir(os.path.join(TOP_DIR, 'tests/py')) - tests = [ - "test_api.py", - "test_to_backend_api.py" - ] - for test in tests: + # Export model session.run_always('python', - test, + 'export_qat.py', + 'vgg16_ckpts/ckpt_epoch26.pth', env={'PYTHONPATH': PYT_PATH}) + else: + session.run_always('python', + 'finetune_qat.py', + '--lr', '0.01', + '--batch-size', '128', + '--drop-ratio', '0.15', + '--ckpt-dir', 'vgg16_ckpts', + '--start-from', '25', + '--epochs', '26') + + # Export model + session.run_always('python', + 'export_qat.py', + 'vgg16_ckpts/ckpt_epoch26.pth') -# Clean up -@nox.session(reuse_venv=True) def cleanup(session): target = [ 'examples/int8/training/vgg16/*.jit.pt', @@ -173,4 +121,186 @@ def cleanup(session): target = ' '.join(x for x in [os.path.join(TOP_DIR, i) for i in target]) session.run_always('bash', '-c', str('rm -rf ') + target, - external=True) \ No newline at end of file + external=True) + +def run_base_tests(session, use_host_env=False): + print("Running basic tests") + session.chdir(os.path.join(TOP_DIR, 'tests/py')) + tests = [ + "test_api.py", + "test_to_backend_api.py" + ] + for test in tests: + if use_host_env: + session.run_always('python', test, env={'PYTHONPATH': PYT_PATH}) + else: + session.run_always("python", test) + +def run_accuracy_tests(session, use_host_env=False): + print("Running accuracy tests") + session.chdir(os.path.join(TOP_DIR, 'tests/py')) + tests = [] + for test in tests: + if use_host_env: + session.run_always('python', test, env={'PYTHONPATH': PYT_PATH}) + else: + session.run_always("python", test) + +def run_int8_accuracy_tests(session, use_host_env=False): + print("Running accuracy tests") + session.chdir(os.path.join(TOP_DIR, 'tests/py')) + tests = [ + "test_ptq_dataloader.py", + "test_ptq_to_backend.py", + "test_qat_trt_accuracy", + ] + for test in tests: + if use_host_env: + session.run_always('python', test, env={'PYTHONPATH': PYT_PATH}) + else: + session.run_always("python", test) + +def run_trt_compatibility_tests(session, use_host_env=False): + print("Running TensorRT compatibility tests") + session.chdir(os.path.join(TOP_DIR, 'tests/py')) + tests = [ + "test_trt_intercompatibilty.py", + "test_ptq_trt_calibrator.py", + ] + for test in tests: + if use_host_env: + session.run_always('python', test, env={'PYTHONPATH': PYT_PATH}) + else: + session.run_always("python", test) + +def run_dla_tests(session, use_host_env=False): + print("Running DLA tests") + session.chdir(os.path.join(TOP_DIR, 'tests/py')) + tests = [ + "test_api_dla.py", + ] + for test in tests: + if use_host_env: + session.run_always('python', test, env={'PYTHONPATH': PYT_PATH}) + else: + session.run_always("python", test) + +def run_multi_gpu_tests(session, use_host_env=False): + print("Running multi GPU tests") + session.chdir(os.path.join(TOP_DIR, 'tests/py')) + tests = [ + "test_multi_gpu.py", + ] + for test in tests: + if use_host_env: + session.run_always('python', test, env={'PYTHONPATH': PYT_PATH}) + else: + session.run_always("python", test) + +def run_l0_api_tests(session, use_host_env=False): + if not use_host_env: + install_deps(session) + install_torch_trt(session) + download_models(session, use_host_env) + run_base_tests(session, use_host_env) + cleanup(session) + +def run_l0_dla_tests(session, use_host_env=False): + if not use_host_env: + install_deps(session) + install_torch_trt(session) + download_models(session, use_host_env) + run_base_tests(session, use_host_env) + cleanup(session) + +def run_l1_accuracy_tests(session, use_host_env=False): + if not use_host_env: + install_deps(session) + install_torch_trt(session) + download_models(session, use_host_env) + download_datasets(session, use_host_env) + train_model(session, use_host_env) + run_accuracy_tests(session, use_host_env) + cleanup(session) + +def run_l1_int8_accuracy_tests(session, use_host_env=False): + if not use_host_env: + install_deps(session) + install_torch_trt(session) + download_models(session, use_host_env) + download_datasets(session, use_host_env) + train_model(session, use_host_env) + finetune_model(session, use_host_env) + run_int8_accuracy_tests(session, use_host_env) + cleanup(session) + +def run_l2_trt_compatibility_tests(session, use_host_env=False): + if not use_host_env: + install_deps(session) + install_torch_trt(session) + download_models(session, use_host_env) + run_trt_compatibility_tests(session, use_host_env) + cleanup(session) + +def run_l2_multi_gpu_tests(session, use_host_env=False): + if not use_host_env: + install_deps(session) + install_torch_trt(session) + download_models(session, use_host_env) + run_multi_gpu_tests(session, use_host_env) + cleanup(session) + +@nox.session(python=["3"], reuse_venv=True) +def l0_api_tests(session): + """When a developer needs to check correctness for a PR or something""" + run_l0_api_tests(session, use_host_env=False) + +@nox.session(python=["3"], reuse_venv=True) +def l0_api_tests_host_deps(session): + """When a developer needs to check basic api functionality using host dependencies""" + run_l0_api_tests(session, use_host_env=True) + +@nox.session(python=["3"], reuse_venv=True) +def l0_dla_tests_host_deps(session): + """When a developer needs to check basic api functionality using host dependencies""" + run_l0_dla_tests(session, use_host_env=True) + +@nox.session(python=["3"], reuse_venv=True) +def l1_accuracy_tests(session): + """Checking accuracy performance on various usecases""" + run_l1_accuracy_tests(session, use_host_env=False) + +@nox.session(python=["3"], reuse_venv=True) +def l1_accuracy_tests_host_deps(session): + """Checking accuracy performance on various usecases using host dependencies""" + run_l1_accuracy_tests(session, use_host_env=True) + +@nox.session(python=["3"], reuse_venv=True) +def l1_int8_accuracy_tests(session): + """Checking accuracy performance on various usecases""" + run_l1_int8_accuracy_tests(session, use_host_env=False) + +@nox.session(python=["3"], reuse_venv=True) +def l1_int8_accuracy_tests_host_deps(session): + """Checking accuracy performance on various usecases using host dependencies""" + run_l1_int8_accuracy_tests(session, use_host_env=True) + +@nox.session(python=["3"], reuse_venv=True) +def l2_trt_compatibility_tests(session): + """Makes sure that TensorRT Python and Torch-TensorRT can work together""" + run_l2_trt_compatibility_tests(session, use_host_env=False) + +@nox.session(python=["3"], reuse_venv=True) +def l2_trt_compatibility_tests_host_deps(session): + """Makes sure that TensorRT Python and Torch-TensorRT can work together using host dependencies""" + run_l2_trt_compatibility_tests(session, use_host_env=True) + +@nox.session(python=["3"], reuse_venv=True) +def l2_multi_gpu_tests(session): + """Makes sure that Torch-TensorRT can operate on multi-gpu systems""" + run_l2_multi_gpu_tests(session, use_host_env=False) + +@nox.session(python=["3"], reuse_venv=True) +def l2_multi_gpu_tests_host_deps(session): + """Makes sure that Torch-TensorRT can operate on multi-gpu systems using host dependencies""" + run_l2_multi_gpu_tests(session, use_host_env=True) From ec2232f92b1be795434df440c5099e58029b2eba Mon Sep 17 00:00:00 2001 From: Anurag Dixit Date: Tue, 1 Mar 2022 19:36:21 -0800 Subject: [PATCH 3/3] fix: Fixed failures for host deps sessions Signed-off-by: Anurag Dixit --- noxfile.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/noxfile.py b/noxfile.py index 5cc1b06b17..f7e16a7107 100644 --- a/noxfile.py +++ b/noxfile.py @@ -58,7 +58,8 @@ def train_model(session, use_host_env=False): session.run_always('python', 'export_ckpt.py', - 'vgg16_ckpts/ckpt_epoch25.pth') + 'vgg16_ckpts/ckpt_epoch25.pth', + env={'PYTHONPATH': PYT_PATH}) else: session.run_always('python', 'main.py', @@ -146,13 +147,27 @@ def run_accuracy_tests(session, use_host_env=False): else: session.run_always("python", test) +def copy_model(session): + model_files = [ 'trained_vgg16.jit.pt', + 'trained_vgg16_qat.jit.pt'] + + for file_name in model_files: + src_file = os.path.join(TOP_DIR, str('examples/int8/training/vgg16/') + file_name) + if os.path.exists(src_file): + session.run_always('cp', + '-rpf', + os.path.join(TOP_DIR, src_file), + os.path.join(TOP_DIR, str('tests/py/') + file_name), + external=True) + def run_int8_accuracy_tests(session, use_host_env=False): print("Running accuracy tests") + copy_model(session) session.chdir(os.path.join(TOP_DIR, 'tests/py')) tests = [ - "test_ptq_dataloader.py", + "test_ptq_dataloader_calibrator.py", "test_ptq_to_backend.py", - "test_qat_trt_accuracy", + "test_qat_trt_accuracy.py", ] for test in tests: if use_host_env: @@ -162,9 +177,10 @@ def run_int8_accuracy_tests(session, use_host_env=False): def run_trt_compatibility_tests(session, use_host_env=False): print("Running TensorRT compatibility tests") + copy_model(session) session.chdir(os.path.join(TOP_DIR, 'tests/py')) tests = [ - "test_trt_intercompatibilty.py", + "test_trt_intercompatability.py", "test_ptq_trt_calibrator.py", ] for test in tests: @@ -218,7 +234,7 @@ def run_l1_accuracy_tests(session, use_host_env=False): install_deps(session) install_torch_trt(session) download_models(session, use_host_env) - download_datasets(session, use_host_env) + download_datasets(session) train_model(session, use_host_env) run_accuracy_tests(session, use_host_env) cleanup(session) @@ -228,7 +244,7 @@ def run_l1_int8_accuracy_tests(session, use_host_env=False): install_deps(session) install_torch_trt(session) download_models(session, use_host_env) - download_datasets(session, use_host_env) + download_datasets(session) train_model(session, use_host_env) finetune_model(session, use_host_env) run_int8_accuracy_tests(session, use_host_env) @@ -239,6 +255,8 @@ def run_l2_trt_compatibility_tests(session, use_host_env=False): install_deps(session) install_torch_trt(session) download_models(session, use_host_env) + download_datasets(session) + train_model(session, use_host_env) run_trt_compatibility_tests(session, use_host_env) cleanup(session)