diff --git a/.github/workflows/release_wheel.yml b/.github/workflows/release_wheel.yml new file mode 100644 index 00000000..60fab503 --- /dev/null +++ b/.github/workflows/release_wheel.yml @@ -0,0 +1,106 @@ +# Adapted from https://github.com/punica-ai/punica/blob/591b59899f0a20760821785d06b331c8a2e5cb86/.github/workflows/release_wheel.yml +name: Release +on: + workflow_call: + inputs: + tag_name: + required: true + type: string + secrets: + WHL_TOKEN: + required: true + # PYPI_TEST_TOKEN: + # required: true + +env: + TORCH_CUDA_ARCH_LIST: "8.0 8.6 8.9 9.0+PTX" + FLASHINFER_CI_TORCH_VERSION: "2.1.0" + +jobs: + build: + strategy: + fail-fast: false + matrix: + python: ["3.10", "3.11", "3.12"] + cuda: ["11.8", "12.1"] + runs-on: [self-hosted] + steps: + - uses: actions/checkout@v4 + with: + submodules: true + + - name: Build wheel + run: | + chown -R $CI_UID:$CI_GID "$GITHUB_WORKSPACE" + version="$(cat version.txt)" + docker run --rm -t \ + -v "$CI_RUNNER_CACHE_DIR":/ci-cache \ + -v "$GITHUB_WORKSPACE":/app \ + -e FLASHINFER_CI_PYTHON_VERSION=${{ matrix.python }} \ + -e FLASHINFER_CI_CUDA_VERSION=${{ matrix.cuda }} \ + -e FLASHINFER_CI_TORCH_VERSION=$FLASHINFER_CI_TORCH_VERSION \ + -e FLASHINFER_BUILD_VERSION=$version \ + -e TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST" \ + --user $CI_UID:$CI_GID \ + pytorch/manylinux-builder:cuda${{ matrix.cuda }} \ + bash /app/scripts/run-ci-build-wheel.sh + timeout-minutes: 120 + - run: du -h python/dist/* + + - uses: actions/upload-artifact@v4 + with: + name: wheel-cuda${{ matrix.cuda }}-python${{ matrix.python }} + path: python/dist/* + + release: + needs: build + runs-on: ubuntu-latest + steps: + - uses: actions/download-artifact@v4 + with: + path: ptyhon/dist/ + merge-multiple: true + pattern: wheel-* + + - run: ls -lah python/dist/ + + - uses: softprops/action-gh-release@v1 + with: + tag_name: ${{ inputs.tag_name }} + files: | + python/dist/flashinfer-*.whl + python/dist/flashinfer-*.tar.gz + + - name: Clone wheel index + run: git clone https://oauth2:${WHL_TOKEN}@github.com/flashinfer-ai/whl.git flashinfer-whl + env: + WHL_TOKEN: ${{ secrets.WHL_TOKEN }} + + - name: Update wheel index + shell: python + run: | + import pathlib + import hashlib + import re + for path in sorted(pathlib.Path("python/dist").glob("*.whl")): + with open(path, "rb") as f: + sha256 = hashlib.sha256(f.read()).hexdigest() + ver, cu = re.findall(r"flashinfer-([0-9.]+)\+cu(\d+)-", path.name)[0] + with open(f"flashinfer-whl/cu{cu}/flashinfer/index.html", "a") as f: + f.write(f'{path.name}
\n') + + - name: Push wheel index + run: | + cd flashinfer-whl + git config --local user.name "github-actions[bot]" + git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" + git add -A + git commit -m "update whl" + git push + + # - name: Upload sdist to pypi + # run: | + # pip install twine + # python -m twine upload --repository testpypi --username=__token__ dist/*.tar.gz + # env: + # TWINE_PASSWORD: ${{ secrets.PYPI_TEST_TOKEN }} \ No newline at end of file diff --git a/python/MANIFEST.in b/python/MANIFEST.in new file mode 100644 index 00000000..5b44b48e --- /dev/null +++ b/python/MANIFEST.in @@ -0,0 +1,12 @@ +# sdist & wheel +include version.txt +recursive-include include * +recursive-include csrc * + +# wheel-only +exclude flashinfer/_build_meta.py +exclude tests/ + +# Unneeded files +prune */__pycache__ +global-exclude *.so diff --git a/python/flashinfer/__init__.py b/python/flashinfer/__init__.py index 385c7865..39242819 100644 --- a/python/flashinfer/__init__.py +++ b/python/flashinfer/__init__.py @@ -28,5 +28,3 @@ BatchDecodeWithPagedKVCacheWrapper, BatchPrefillWithPagedKVCacheWrapper, ) - -__version__ = "0.0.1" diff --git a/python/include b/python/include new file mode 120000 index 00000000..3a1af68f --- /dev/null +++ b/python/include @@ -0,0 +1 @@ +../include/ \ No newline at end of file diff --git a/python/setup.py b/python/setup.py index b65c3682..de01bc61 100644 --- a/python/setup.py +++ b/python/setup.py @@ -14,19 +14,45 @@ limitations under the License. """ import pathlib +import os +import re +import datetime +import subprocess +import platform import setuptools +import torch import torch.utils.cpp_extension as torch_cpp_ext root = pathlib.Path(__name__).parent -def get_version(path): - with open(path) as f: - for line in f: - if line.startswith("__version__"): - return line.split("=", maxsplit=1)[1].replace('"', "").strip() - raise ValueError("Version not found") +def get_version(): + with open(root / "version.txt") as f: + version = f.read().strip() + return version + + +def get_cuda_version() -> tuple[int, int]: + if torch_cpp_ext.CUDA_HOME is None: + nvcc = "nvcc" + else: + nvcc = os.path.join(torch_cpp_ext.CUDA_HOME, "bin/nvcc") + txt = subprocess.check_output([nvcc, "--version"], text=True) + major, minor = map(int, re.findall(r"release (\d+)\.(\d+),", txt)[0]) + return major, minor + + +def generate_build_meta() -> None: + d = {} + version = get_version() + d["cuda_major"], d["cuda_minor"] = get_cuda_version() + d["torch"] = torch.__version__ + d["python"] = platform.python_version() + d["TORCH_CUDA_ARCH_LIST"] = os.environ.get("TORCH_CUDA_ARCH_LIST", None) + with open(root / "flashinfer/_build_meta.py", "w") as f: + f.write(f"__version__ = {version!r}\n") + f.write(f"build_meta = {d!r}") def remove_unwanted_pytorch_nvcc_flags(): @@ -43,38 +69,40 @@ def remove_unwanted_pytorch_nvcc_flags(): pass -remove_unwanted_pytorch_nvcc_flags() -ext_modules = [] -ext_modules.append( - torch_cpp_ext.CUDAExtension( - name="flashinfer.ops._kernels", - sources=[ - "csrc/single_decode.cu", - "csrc/single_prefill.cu", - "csrc/cascade.cu", - "csrc/batch_decode.cu", - "csrc/flashinfer_ops.cu", - "csrc/batch_prefill.cu", - ], - include_dirs=[ - str(root.resolve().parent / "include"), - ], - extra_compile_args={ - "cxx": ["-O3"], - "nvcc": ["-O3", "--threads", "8"], - }, +if __name__ == "__main__": + remove_unwanted_pytorch_nvcc_flags() + generate_build_meta() + ext_modules = [] + ext_modules.append( + torch_cpp_ext.CUDAExtension( + name="flashinfer.ops._kernels", + sources=[ + "csrc/single_decode.cu", + "csrc/single_prefill.cu", + "csrc/cascade.cu", + "csrc/batch_decode.cu", + "csrc/flashinfer_ops.cu", + "csrc/batch_prefill.cu", + ], + include_dirs=[ + str(root.resolve() / "include"), + ], + extra_compile_args={ + "cxx": ["-O3"], + "nvcc": ["-O3", "--threads", "8"], + }, + ) + ) + + setuptools.setup( + name="flashinfer", + version=get_version(), + packages=setuptools.find_packages(), + author="FlashInfer team", + license="Apache License 2.0", + description="FlashInfer: Kernel Library for LLM Serving", + url="https://github.com/flashinfer-ai/flashinfer", + python_requires=">=3.9", + ext_modules=ext_modules, + cmdclass={"build_ext": torch_cpp_ext.BuildExtension}, ) -) - -setuptools.setup( - name="flashinfer", - version=get_version(root / "flashinfer/__init__.py"), - packages=setuptools.find_packages(), - author="FlashInfer team", - license="Apache License 2.0", - description="FlashInfer: Kernel Library for LLM Serving", - url="https://github.com/flashinfer-ai/flashinfer", - python_requires=">=3.9", - ext_modules=ext_modules, - cmdclass={"build_ext": torch_cpp_ext.BuildExtension}, -) diff --git a/python/version.txt b/python/version.txt new file mode 120000 index 00000000..aa4e5bec --- /dev/null +++ b/python/version.txt @@ -0,0 +1 @@ +../version.txt \ No newline at end of file diff --git a/scripts/ci-flashinfer.env.example b/scripts/ci-flashinfer.env.example new file mode 100644 index 00000000..d7c879a6 --- /dev/null +++ b/scripts/ci-flashinfer.env.example @@ -0,0 +1,8 @@ +RUNNER_SCOPE=repo +REPO_URL=https://github.com/flashinfer-ai/flashinfer +#LABELS=gpu,sm80 +ACCESS_TOKEN=foo-access-token +RUNNER_WORKDIR=/tmp/ci-flashinfer +CI_RUNNER_CACHE_DIR=/data/ci-flashinfer-cache +DISABLE_AUTO_UPDATE=1 +EPHEMERAL=1 diff --git a/scripts/ci-flashinfer.service b/scripts/ci-flashinfer.service new file mode 100644 index 00000000..81e04080 --- /dev/null +++ b/scripts/ci-flashinfer.service @@ -0,0 +1,27 @@ +# https://github.com/myoung34/docker-github-actions-runner/wiki/Usage +# Install with: +# install -m 644 ci-flashinfer.service $HOME/.config/systemd/user/ +# systemctl --user daemon-reload +# Run with: +# systemctl --user start ci-flashinfer +# Stop with: +# systemctl --user stop ci-flashinfer +# See live logs with: +# journalctl -f -u ci-flashinfer.service --no-hostname --no-tail +[Unit] +Description=Ephemeral GitHub Actions Runner Container for flashinfer-ai/flashinfer +[Service] +TimeoutStartSec=0 +Restart=always +ExecStartPre=-/usr/bin/docker stop %N +ExecStartPre=-/usr/bin/docker rm %N +ExecStartPre=-/usr/bin/docker pull myoung34/github-runner:latest +ExecStart=/usr/bin/docker run --rm \ + --env-file %h/.config/ci-flashinfer.env \ + -e RUNNER_NAME=%H \ + -e CI_UID=%U \ + -e CI_GID=%G \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp/ci-flashinfer:/tmp/ci-flashinfer \ + --name %N \ + myoung34/github-runner:latest diff --git a/scripts/run-ci-build-wheel.sh b/scripts/run-ci-build-wheel.sh new file mode 100644 index 00000000..16750526 --- /dev/null +++ b/scripts/run-ci-build-wheel.sh @@ -0,0 +1,46 @@ +#!/bin/bash +# adapted from https://github.com/punica-ai/punica/blob/591b59899f0a20760821785d06b331c8a2e5cb86/ci/run-ci-build-wheel.bash +set -e + +assert_env() { + local var_name="$1" + if [ -z "${!var_name}" ]; then + echo "Error: Environment variable '$var_name' is not set." + exit 1 + fi +} + +assert_env FLASHINFER_CI_PYTHON_VERSION +assert_env FLASHINFER_CI_TORCH_VERSION +assert_env FLASHINFER_CI_CUDA_VERSION +assert_env FLASHINFER_BUILD_VERSION +assert_env TORCH_CUDA_ARCH_LIST +PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" +export CONDA_pkgs_dirs=/ci-cache/conda-pkgs +export XDG_CACHE_HOME=/ci-cache/xdg-cache +mkdir -p "$CONDA_pkgs_dirs" "$XDG_CACHE_HOME" +export HOME=/tmp/home +mkdir -p $HOME +export PATH="$HOME/.local/bin:$PATH" +CUDA_MAJOR="${FLASHINFER_CI_CUDA_VERSION%.*}" +CUDA_MINOR="${FLASHINFER_CI_CUDA_VERSION#*.}" +PYVER="${FLASHINFER_CI_PYTHON_VERSION//./}" +export PATH="/opt/python/cp${PYVER}-cp${PYVER}/bin:$PATH" + + +echo "::group::Install PyTorch" +pip install torch==$FLASHINFER_CI_TORCH_VERSION --index-url "https://download.pytorch.org/whl/cu${CUDA_MAJOR}${CUDA_MINOR}" +echo "::endgroup::" + +echo "::group::Install build system" +pip install ninja numpy +pip install --upgrade setuptools wheel build +echo "::endgroup::" + + +echo "::group::Build wheel for FlashInfer" +cd "$PROJECT_ROOT/python" +FLASHINFER_BUILD_VERSION="${FLASHINFER_BUILD_VERSION}+cu${CUDA_MAJOR}${CUDA_MINOR}" python -m build --no-isolation +rm -f dist/*.tar.gz +python -m build --no-isolation --sdist +echo "::endgroup::" diff --git a/version.txt b/version.txt new file mode 100644 index 00000000..8a9ecc2e --- /dev/null +++ b/version.txt @@ -0,0 +1 @@ +0.0.1 \ No newline at end of file