fix: Upgrade main to TRT 8.6, CUDA 11.8, Torch Dev

gs-olive · gs-olive · commit ba9337728fb9 · 2023-04-26T17:06:48.000-07:00
- Upgrade versions, make required code edits for functionality and
coverage for tests and CI
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -102,7 +102,7 @@ commands:
             sudo apt-get --purge remove "*nvidia*"
 
   install-cudnn:
-    description: "Install CUDNN 8.5.0"
+    description: "Install CUDNN 8.8.0"
     parameters:
       os:
         type: string
@@ -112,10 +112,10 @@ commands:
         default: "x86_64"
       cudnn-version:
         type: string
-        default: "8.5.0.96"
+        default: "8.8.0.121"
       cuda-version:
         type: string
-        default: "cuda11.7"
+        default: "cuda11.8"
     steps:
       - run:
           name: Install CUDNN
@@ -200,7 +200,7 @@ commands:
         default: "cuda11.8"
       cudnn-version:
         type: string
-        default: "8.5.0.96"
+        default: "8.8.0.121"
       trt-version-short:
         type: string
         default: "8.6.0"
@@ -252,7 +252,7 @@ commands:
           default: "8.6.0"
         cudnn-version-long:
           type: string
-          default: "8.5.0.96"
+          default: "8.8.0.121"
       steps:
         - run:
             name: Set up python environment
@@ -269,16 +269,19 @@ commands:
     parameters:
       torch-build:
         type: string
-        default: "2.1.0.dev20230421+cu118"
+        default: "2.1.0.dev20230419+cu118"
+      torchvision-build:
+        type: string
+        default: "0.16.0.dev20230419+cu118"
       torch-build-index:
         type: string
-        default: "https://download.pytorch.org/whl/cu118"
+        default: "https://download.pytorch.org/whl/nightly/cu118"
     steps:
       - run:
           name: Install Torch
           command: |
             pip3 install --upgrade pip
-            pip3 install torch==<< parameters.torch-build >>  torchvision torchaudio --extra-index-url << parameters.torch-build-index >>
+            pip3 install torch==<< parameters.torch-build >>  torchvision==<< parameters.torchvision-build >> --extra-index-url << parameters.torch-build-index >>
 
   build-py:
     description: "Build the torch-tensorrt python release (pre-cxx11-abi)"
@@ -474,6 +477,7 @@ commands:
       - run: mkdir -p /tmp/artifacts
       - run:
           name: Run core / C++ tests
+          no_output_timeout: 15m
           environment:
             LD_LIBRARY_PATH: "/opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages/torch_tensorrt.libs:/home/circleci/project/bazel-project/external/libtorch_pre_cxx11_abi/lib/:/home/circleci/project/bazel-project/external/tensorrt/lib/:/usr/local/cuda-11.8/lib64/:$LD_LIBRARY_PATH"
           command: |
@@ -1205,19 +1209,19 @@ parameters:
   # Nightly platform config
   torch-build:
     type: string
-    default: "2.1.0.dev20230421+cu118"
+    default: "2.1.0.dev20230419+cu118"
   torch-build-index:
     type: string
-    default: "https://download.pytorch.org/whl/cu118"
+    default: "https://download.pytorch.org/whl/nightly/cu118"
   torch-build-legacy:
     type: string
-    default: "1.13.1+cu118"
+    default: "1.13.1+cu117"
   torch-build-index-legacy:
     type: string
-    default: "https://download.pytorch.org/whl/cu118"
+    default: "https://download.pytorch.org/whl/cu117"
   cudnn-version:
     type: string
-    default: "8.5.0.96"
+    default: "8.8.0.121"
   trt-version-short:
     type: string
     default: "8.6.0"
diff --git a/README.md b/README.md
@@ -31,12 +31,7 @@ In the case of building on top of a custom base container, you first must determ
 version of the PyTorch C++ ABI. If your source of PyTorch is pytorch.org, likely this is the pre-cxx11-abi in which case you must modify `//docker/dist-build.sh` to not build the
 C++11 ABI version of Torch-TensorRT.
 
-You can then build the container using:
-
-
-```bash
-docker build --build-arg BASE_IMG=<IMAGE> -f docker/Dockerfile -t torch_tensorrt:latest .
-```
+You can then build the container using the build command in the [docker README](docker/README.md#instructions)
 
 If you would like to build outside a docker container, please follow the section [Compiling Torch-TensorRT](#compiling-torch-tensorrt)
 
@@ -121,10 +116,10 @@ torch.jit.save(trt_ts_module, "trt_torchscript_module.ts") # save the TRT embedd
 These are the following dependencies used to verify the testcases. Torch-TensorRT can work with other versions, but the tests are not guaranteed to pass.
 
 - Bazel 5.2.0
-- Libtorch 2.1.0.dev20230314 (built with CUDA 11.7)
-- CUDA 11.7
-- cuDNN 8.5.0
-- TensorRT 8.5.1.7
+- Libtorch 2.1.0.dev20230419 (built with CUDA 11.8)
+- CUDA 11.8
+- cuDNN 8.8.0
+- TensorRT 8.6.0
 
 ## Prebuilt Binaries and Wheel files
 
@@ -252,7 +247,7 @@ A tarball with the include files and library can then be found in bazel-bin
 ### Running Torch-TensorRT on a JIT Graph
 
 > Make sure to add LibTorch to your LD_LIBRARY_PATH <br>
-> `export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/bazel-Torch-TensorRT/external/libtorch/lib`
+> `export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/bazel-TensorRT/external/libtorch/lib`
 
 ``` shell
 bazel run //cpp/bin/torchtrtc -- $(realpath <PATH TO GRAPH>) out.ts <input-size>
diff --git a/WORKSPACE b/WORKSPACE
@@ -51,17 +51,17 @@ new_local_repository(
 http_archive(
     name = "libtorch",
     build_file = "@//third_party/libtorch:BUILD",
-    sha256 = "486aeb5cf498f3df8713a96a0d178660828fc579e740c39b054bef0adef6315b",
+    sha256 = "1a526a9cd19c1015674d26921dbb94bcd2d632a6f9c431a21c43f4e24768d834",
     strip_prefix = "libtorch",
-    urls = ["https://download.pytorch.org/libtorch/nightly/cu118/libtorch-cxx11-abi-shared-with-deps-2.1.0.dev20230421%2Bcu118.zip"],
+    urls = ["https://download.pytorch.org/libtorch/nightly/cu118/libtorch-cxx11-abi-shared-with-deps-2.1.0.dev20230419%2Bcu118.zip"],
 )
 
 http_archive(
     name = "libtorch_pre_cxx11_abi",
     build_file = "@//third_party/libtorch:BUILD",
-    sha256 = "03b1c29a2a2e412ab6cdb957b5fd48a64aeed1b3551ee7679908c4ac177b89ab",
+    sha256 = "60c5912a5085a6a7073b3804b10d41d6cc054693bbeb7a45e0247050c2837bac",
     strip_prefix = "libtorch",
-    urls = ["https://download.pytorch.org/libtorch/nightly/cu118/libtorch-shared-with-deps-2.1.0.dev20230421%2Bcu118.zip"],
+    urls = ["https://download.pytorch.org/libtorch/nightly/cu118/libtorch-shared-with-deps-2.1.0.dev20230419%2Bcu118.zip"],
 )
 
 # Download these tarballs manually from the NVIDIA website
@@ -71,10 +71,10 @@ http_archive(
 http_archive(
     name = "cudnn",
     build_file = "@//third_party/cudnn/archive:BUILD",
-    sha256 = "5454a6fd94f008728caae9adad993c4e85ef36302e26bce43bea7d458a5e7b6d",
-    strip_prefix = "cudnn-linux-x86_64-8.5.0.96_cuda11-archive",
+    sha256 = "36fff137153ef73e6ee10bfb07f4381240a86fb9fb78ce372414b528cbab2293",
+    strip_prefix = "cudnn-linux-x86_64-8.8.0.121_cuda11-archive",
     urls = [
-        "https://developer.nvidia.com/compute/cudnn/secure/8.5.0/local_installers/11.7/cudnn-linux-x86_64-8.5.0.96_cuda11-archive.tar.xz",
+        "https://developer.download.nvidia.com/compute/cudnn/secure/8.8.0/local_installers/11.8/cudnn-linux-x86_64-8.8.0.121_cuda11-archive.tar.xz",
     ],
 )
 
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -1,5 +1,5 @@
 # Base image starts with CUDA
-ARG BASE_IMG=nvidia/cuda:11.7.1-devel-ubuntu22.04
+ARG BASE_IMG=nvidia/cuda:11.8.0-devel-ubuntu22.04
 FROM ${BASE_IMG} as base
 
 ARG TENSORRT_VERSION
diff --git a/docker/README.md b/docker/README.md
@@ -4,7 +4,7 @@
 
 * The `Dockerfile` currently uses <a href="https://github.com/bazelbuild/bazelisk">Bazelisk</a> to select the Bazel version, and uses the exact library versions of Torch and CUDA listed in <a href="https://github.com/pytorch/TensorRT#dependencies">dependencies</a>.
   * The desired versions of CUDNN and TensorRT must be specified as build-args, with major, minor, and patch versions as in: `--build-arg TENSORRT_VERSION=a.b.c --build-arg CUDNN_VERSION=x.y.z`
-  * [**Optional**] The desired base image be changed by explicitly setting a base image, as in `--build-arg BASE_IMG=nvidia/cuda:11.7.1-devel-ubuntu22.04`, though this is optional
+  * [**Optional**] The desired base image be changed by explicitly setting a base image, as in `--build-arg BASE_IMG=nvidia/cuda:11.8.0-devel-ubuntu22.04`, though this is optional
   * [**Optional**] Additionally, the desired Python version can be changed by explicitly setting a version, as in `--build-arg PYTHON_VERSION=3.10`, though this is optional as well.
 
 * This `Dockerfile` installs `pre-cxx11-abi` versions of Pytorch and builds Torch-TRT using `pre-cxx11-abi` libtorch as well.
@@ -17,14 +17,14 @@ Note: By default the container uses the `pre-cxx11-abi` version of Torch + Torch
 
 ### Instructions
 
-- The example below uses CUDNN 8.5.0 and TensorRT 8.5.1
+- The example below uses CUDNN 8.8.0 and TensorRT 8.6.0
 - See <a href="https://github.com/pytorch/TensorRT#dependencies">dependencies</a> for a list of current default dependencies.
 
 > From root of Torch-TensorRT repo
 
 Build:
 ```
-DOCKER_BUILDKIT=1 docker build --build-arg TENSORRT_VERSION=8.5.1 --build-arg CUDNN_VERSION=8.5.0 -f docker/Dockerfile -t torch_tensorrt:latest .
+DOCKER_BUILDKIT=1 docker build --build-arg TENSORRT_VERSION=8.6.0 --build-arg CUDNN_VERSION=8.8.0 -f docker/Dockerfile -t torch_tensorrt:latest .
 ```
 
 Run:
diff --git a/py/requirements.txt b/py/requirements.txt
@@ -2,7 +2,7 @@ numpy
 packaging
 pybind11==2.6.2
 --extra-index-url https://download.pytorch.org/whl/nightly/cu118
-torch==2.1.0.dev20230421+cu118
-torchvision==0.16.0.dev20230421+cu118
+torch==2.1.0.dev20230419+cu118
+torchvision==0.16.0.dev20230419+cu118
 --extra-index-url https://pypi.ngc.nvidia.com
 tensorrt==8.6.0
diff --git a/py/setup.py b/py/setup.py
@@ -31,6 +31,8 @@
 
 FX_ONLY = False
 
+LEGACY = False
+
 RELEASE = False
 
 CI_RELEASE = False
@@ -48,6 +50,10 @@ def get_git_revision_short_hash() -> str:
     FX_ONLY = True
     sys.argv.remove("--fx-only")
 
+if "--legacy" in sys.argv:
+    LEGACY = True
+    sys.argv.remove("--legacy")
+
 if "--release" not in sys.argv:
     __version__ = __version__ + "+" + get_git_revision_short_hash()
 else:
@@ -380,7 +386,7 @@ def run(self):
     long_description=long_description,
     ext_modules=ext_modules,
     install_requires=[
-        "torch==2.0.0",
+        "torch >=2.0.1" if not LEGACY else "torch >=1.13.0,<2.0",
     ],
     setup_requires=[],
     cmdclass={
@@ -409,7 +415,7 @@ def run(self):
         "Topic :: Software Development",
         "Topic :: Software Development :: Libraries",
     ],
-    python_requires=">=3.7",
+    python_requires=">=3.8",
     include_package_data=True,
     package_data={
         "torch_tensorrt": [
diff --git a/py/torch_tensorrt/fx/README.md b/py/torch_tensorrt/fx/README.md
@@ -8,14 +8,14 @@ FX2TRT is merged as FX module in Torch-TensorRT
 ```
     $ conda create --name python_env python=3.8
     $ conda activate python_env
-    # Recommend to install PyTorch 1.12 and later
-    $ conda install pytorch torchvision torchtext cudatoolkit=11.3 -c pytorch-nightly
+    # Recommend to install PyTorch 2.0 and later
+    $ conda install pytorch torchvision torchtext cudatoolkit=11.8 -c pytorch-nightly
     # Install TensorRT python package
     $ pip3 install nvidia-pyindex
-    $ pip3 install tensorrt==8.5.1.7
+    $ pip3 install tensorrt==8.6.0.12
     $ git clone https://github.com/pytorch/TensorRT.git
     $ cd TensorRT/py && python setup.py install --fx-only && cd ..
-    $ pyton -c "import torch_tensorrt.fx"
+    $ python -c "import torch_tensorrt.fx"
     # Test an example by
     $ python py/torch_tensorrt/fx/example/lower_example.py
 ```
diff --git a/py/torch_tensorrt/fx/test/passes/test_fuse_permute_linear_trt.py b/py/torch_tensorrt/fx/test/passes/test_fuse_permute_linear_trt.py
@@ -54,11 +54,6 @@ def forward(self, x):
             apply_passes=[fuse_permute_linear],
         )
 
-    # TODO: The following test has been disabled due to a bug in TRT 8.5.1.7
-    # with self.linear2. Issue : https://github.com/pytorch/TensorRT/issues/1444
-    @unittest.skip(
-        reason="test_multi_fuse_permute_linear has been disabled due to a bug in TRT 8.5.1.7 https://github.com/pytorch/TensorRT/issues/1444"
-    )
     def test_multi_fuse_permute_linear(self):
         """
         Fusion when permute output is shared by multiple linears
diff --git a/py/versions.py b/py/versions.py
@@ -1,4 +1,4 @@
 __version__ = "1.4.0.dev0"
-__cuda_version__ = "11.7"
-__cudnn_version__ = "8.5"
-__tensorrt_version__ = "8.5"
+__cuda_version__ = "11.8"
+__cudnn_version__ = "8.8"
+__tensorrt_version__ = "8.6"
diff --git a/pyproject.toml b/pyproject.toml
@@ -9,8 +9,7 @@ requires = [
     "cffi",
     "typing_extensions",
     "future",
-    "nvidia-pyindex",
-    "nvidia-tensorrt==8.4.3.1"
+    "tensorrt >=8.6,<8.7"
 ]
 
 # Use legacy backend to import local packages in setup.py
@@ -20,7 +19,7 @@ requires = [
 [tool.black]
 # Uncomment if pyproject.toml worked fine to ensure consistency with flake8
 # line-length = 120
-target-versions = ["py37", "py38", "py39", "py310"]
+target-versions = ["py38", "py39", "py310"]
 force-exclude = """
 elu_converter/setup.py
 """
diff --git a/toolchains/ci_workspaces/WORKSPACE.x86_64.release.rhel b/toolchains/ci_workspaces/WORKSPACE.x86_64.release.rhel
@@ -56,17 +56,17 @@ new_local_repository(
 http_archive(
     name = "libtorch",
     build_file = "@//third_party/libtorch:BUILD",
-    sha256 = "486aeb5cf498f3df8713a96a0d178660828fc579e740c39b054bef0adef6315b",
+    sha256 = "1a526a9cd19c1015674d26921dbb94bcd2d632a6f9c431a21c43f4e24768d834",
     strip_prefix = "libtorch",
-    urls = ["https://download.pytorch.org/libtorch/nightly/cu118/libtorch-cxx11-abi-shared-with-deps-2.1.0.dev20230421%2Bcu118.zip"],
+    urls = ["https://download.pytorch.org/libtorch/nightly/cu118/libtorch-cxx11-abi-shared-with-deps-2.1.0.dev20230419%2Bcu118.zip"],
 )
 
 http_archive(
     name = "libtorch_pre_cxx11_abi",
     build_file = "@//third_party/libtorch:BUILD",
-    sha256 = "03b1c29a2a2e412ab6cdb957b5fd48a64aeed1b3551ee7679908c4ac177b89ab",
+    sha256 = "60c5912a5085a6a7073b3804b10d41d6cc054693bbeb7a45e0247050c2837bac",
     strip_prefix = "libtorch",
-    urls = ["https://download.pytorch.org/libtorch/nightly/cu118/libtorch-shared-with-deps-2.1.0.dev20230421%2Bcu118.zip"],
+    urls = ["https://download.pytorch.org/libtorch/nightly/cu118/libtorch-shared-with-deps-2.1.0.dev20230419%2Bcu118.zip"],
 )
 
 ####################################################################################
diff --git a/toolchains/ci_workspaces/WORKSPACE.x86_64.release.ubuntu b/toolchains/ci_workspaces/WORKSPACE.x86_64.release.ubuntu
@@ -56,17 +56,17 @@ new_local_repository(
 http_archive(
     name = "libtorch",
     build_file = "@//third_party/libtorch:BUILD",
-    sha256 = "486aeb5cf498f3df8713a96a0d178660828fc579e740c39b054bef0adef6315b",
+    sha256 = "1a526a9cd19c1015674d26921dbb94bcd2d632a6f9c431a21c43f4e24768d834",
     strip_prefix = "libtorch",
-    urls = ["https://download.pytorch.org/libtorch/nightly/cu118/libtorch-cxx11-abi-shared-with-deps-2.1.0.dev20230421%2Bcu118.zip"],
+    urls = ["https://download.pytorch.org/libtorch/nightly/cu118/libtorch-cxx11-abi-shared-with-deps-2.1.0.dev20230419%2Bcu118.zip"],
 )
 
 http_archive(
     name = "libtorch_pre_cxx11_abi",
     build_file = "@//third_party/libtorch:BUILD",
-    sha256 = "03b1c29a2a2e412ab6cdb957b5fd48a64aeed1b3551ee7679908c4ac177b89ab",
+    sha256 = "60c5912a5085a6a7073b3804b10d41d6cc054693bbeb7a45e0247050c2837bac",
     strip_prefix = "libtorch",
-    urls = ["https://download.pytorch.org/libtorch/nightly/cu118/libtorch-shared-with-deps-2.1.0.dev20230421%2Bcu118.zip"],
+    urls = ["https://download.pytorch.org/libtorch/nightly/cu118/libtorch-shared-with-deps-2.1.0.dev20230419%2Bcu118.zip"],
 )
 
 ####################################################################################
diff --git a/tools/cpp_benchmark/README.md b/tools/cpp_benchmark/README.md
@@ -6,7 +6,7 @@ This is a quick benchmarking application for Torch-TensorRT. It lets you run sup
 
 Run with bazel:
 
-> Note: Make sure libtorch and TensorRT are in your LD_LIBRARY_PATH before running, if you need a location you can `export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:[WORKSPACE ROOT]/bazel-Torch-TensorRT/external/libtorch/lib:[WORKSPACE ROOT]/bazel-Torch-TensorRT/external/tensorrt/lib`
+> Note: Make sure libtorch and TensorRT are in your LD_LIBRARY_PATH before running, if you need a location you can `export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:[WORKSPACE ROOT]/bazel-TensorRT/external/libtorch/lib:[WORKSPACE ROOT]/bazel-TensorRT/external/tensorrt/lib`
 
 ``` sh
 bazel run //tools/cpp_benchmark --cxxopt="-DNDEBUG" --cxxopt="-DJIT" --cxxopt="-DTRT" -- [PATH TO JIT MODULE FILE] [INPUT SIZE (explicit batch)]