Skip to content

Commit

Permalink
[CI] Use MAX_JOBS=1 with nvcc 12.3, don't need OLD_GENERATOR_PATH
Browse files Browse the repository at this point in the history
  • Loading branch information
tridao committed Dec 7, 2024
1 parent b7d29fb commit f86e3dd
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 11 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,8 @@ jobs:
export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
# Limit MAX_JOBS otherwise the github runner goes OOM
MAX_JOBS=2 FLASH_ATTENTION_FORCE_BUILD="TRUE" FLASH_ATTENTION_FORCE_CXX11_ABI=${{ matrix.cxx11_abi}} python setup.py bdist_wheel --dist-dir=dist
# nvcc 11.8 can compile with 2 jobs, but nvcc 12.3 goes OOM
MAX_JOBS=$([ "$MATRIX_CUDA_VERSION" == "123" ] && echo 1 || echo 2) FLASH_ATTENTION_FORCE_BUILD="TRUE" FLASH_ATTENTION_FORCE_CXX11_ABI=${{ matrix.cxx11_abi}} python setup.py bdist_wheel --dist-dir=dist
tmpname=cu${WHEEL_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ matrix.cxx11_abi }}
wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
ls dist/*whl |xargs -I {} mv {} dist/${wheel_name}
Expand Down
2 changes: 1 addition & 1 deletion flash_attn/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "2.7.2"
__version__ = "2.7.2.post1"

from flash_attn.flash_attn_interface import (
flash_attn_func,
Expand Down
10 changes: 1 addition & 9 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,13 +149,6 @@ def validate_and_update_archs(archs):
TORCH_MAJOR = int(torch.__version__.split(".")[0])
TORCH_MINOR = int(torch.__version__.split(".")[1])

# Check, if ATen/CUDAGeneratorImpl.h is found, otherwise use ATen/cuda/CUDAGeneratorImpl.h
# See https://github.com/pytorch/pytorch/pull/70650
generator_flag = []
torch_dir = torch.__path__[0]
if os.path.exists(os.path.join(torch_dir, "include", "ATen", "CUDAGeneratorImpl.h")):
generator_flag = ["-DOLD_GENERATOR_PATH"]

check_if_cuda_home_none("flash_attn")
# Check, if CUDA11 is installed for compute capability 8.0
cc_flag = []
Expand Down Expand Up @@ -271,7 +264,7 @@ def validate_and_update_archs(archs):
"csrc/flash_attn/src/flash_fwd_split_hdim256_bf16_causal_sm80.cu",
],
extra_compile_args={
"cxx": ["-O3", "-std=c++17"] + generator_flag,
"cxx": ["-O3", "-std=c++17"],
"nvcc": append_nvcc_threads(
[
"-O3",
Expand All @@ -293,7 +286,6 @@ def validate_and_update_archs(archs):
# "-DFLASHATTENTION_DISABLE_UNEVEN_K",
# "-DFLASHATTENTION_DISABLE_LOCAL",
]
+ generator_flag
+ cc_flag
),
},
Expand Down

0 comments on commit f86e3dd

Please # to comment.