Skip to content

Commit

Permalink
[CI] Setup github actions for release wheel (#91)
Browse files Browse the repository at this point in the history
Adapted from Punica.

---------

Co-authored-by: Lequn Chen <lqchen@cs.washington.edu>
  • Loading branch information
yzh119 and abcdabcd987 authored Jan 29, 2024
1 parent 51b88d2 commit 064efdd
Show file tree
Hide file tree
Showing 10 changed files with 270 additions and 42 deletions.
106 changes: 106 additions & 0 deletions .github/workflows/release_wheel.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# Adapted from https://github.com/punica-ai/punica/blob/591b59899f0a20760821785d06b331c8a2e5cb86/.github/workflows/release_wheel.yml
name: Release
on:
workflow_call:
inputs:
tag_name:
required: true
type: string
secrets:
WHL_TOKEN:
required: true
# PYPI_TEST_TOKEN:
# required: true

env:
TORCH_CUDA_ARCH_LIST: "8.0 8.6 8.9 9.0+PTX"
FLASHINFER_CI_TORCH_VERSION: "2.1.0"

jobs:
build:
strategy:
fail-fast: false
matrix:
python: ["3.10", "3.11", "3.12"]
cuda: ["11.8", "12.1"]
runs-on: [self-hosted]
steps:
- uses: actions/checkout@v4
with:
submodules: true

- name: Build wheel
run: |
chown -R $CI_UID:$CI_GID "$GITHUB_WORKSPACE"
version="$(cat version.txt)"
docker run --rm -t \
-v "$CI_RUNNER_CACHE_DIR":/ci-cache \
-v "$GITHUB_WORKSPACE":/app \
-e FLASHINFER_CI_PYTHON_VERSION=${{ matrix.python }} \
-e FLASHINFER_CI_CUDA_VERSION=${{ matrix.cuda }} \
-e FLASHINFER_CI_TORCH_VERSION=$FLASHINFER_CI_TORCH_VERSION \
-e FLASHINFER_BUILD_VERSION=$version \
-e TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST" \
--user $CI_UID:$CI_GID \
pytorch/manylinux-builder:cuda${{ matrix.cuda }} \
bash /app/scripts/run-ci-build-wheel.sh
timeout-minutes: 120
- run: du -h python/dist/*

- uses: actions/upload-artifact@v4
with:
name: wheel-cuda${{ matrix.cuda }}-python${{ matrix.python }}
path: python/dist/*

release:
needs: build
runs-on: ubuntu-latest
steps:
- uses: actions/download-artifact@v4
with:
path: ptyhon/dist/
merge-multiple: true
pattern: wheel-*

- run: ls -lah python/dist/

- uses: softprops/action-gh-release@v1
with:
tag_name: ${{ inputs.tag_name }}
files: |
python/dist/flashinfer-*.whl
python/dist/flashinfer-*.tar.gz
- name: Clone wheel index
run: git clone https://oauth2:${WHL_TOKEN}@github.com/flashinfer-ai/whl.git flashinfer-whl
env:
WHL_TOKEN: ${{ secrets.WHL_TOKEN }}

- name: Update wheel index
shell: python
run: |
import pathlib
import hashlib
import re
for path in sorted(pathlib.Path("python/dist").glob("*.whl")):
with open(path, "rb") as f:
sha256 = hashlib.sha256(f.read()).hexdigest()
ver, cu = re.findall(r"flashinfer-([0-9.]+)\+cu(\d+)-", path.name)[0]
with open(f"flashinfer-whl/cu{cu}/flashinfer/index.html", "a") as f:
f.write(f'<a href="https://github.com/flashinfer-ai/flashinfer/releases/download/v{ver}/{path.name}#sha256={sha256}">{path.name}</a><br>\n')
- name: Push wheel index
run: |
cd flashinfer-whl
git config --local user.name "github-actions[bot]"
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add -A
git commit -m "update whl"
git push
# - name: Upload sdist to pypi
# run: |
# pip install twine
# python -m twine upload --repository testpypi --username=__token__ dist/*.tar.gz
# env:
# TWINE_PASSWORD: ${{ secrets.PYPI_TEST_TOKEN }}
12 changes: 12 additions & 0 deletions python/MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# sdist & wheel
include version.txt
recursive-include include *
recursive-include csrc *

# wheel-only
exclude flashinfer/_build_meta.py
exclude tests/

# Unneeded files
prune */__pycache__
global-exclude *.so
2 changes: 0 additions & 2 deletions python/flashinfer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,3 @@
BatchDecodeWithPagedKVCacheWrapper,
BatchPrefillWithPagedKVCacheWrapper,
)

__version__ = "0.0.1"
1 change: 1 addition & 0 deletions python/include
108 changes: 68 additions & 40 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,45 @@
limitations under the License.
"""
import pathlib
import os
import re
import datetime
import subprocess
import platform

import setuptools
import torch
import torch.utils.cpp_extension as torch_cpp_ext

root = pathlib.Path(__name__).parent


def get_version(path):
with open(path) as f:
for line in f:
if line.startswith("__version__"):
return line.split("=", maxsplit=1)[1].replace('"', "").strip()
raise ValueError("Version not found")
def get_version():
with open(root / "version.txt") as f:
version = f.read().strip()
return version


def get_cuda_version() -> tuple[int, int]:
if torch_cpp_ext.CUDA_HOME is None:
nvcc = "nvcc"
else:
nvcc = os.path.join(torch_cpp_ext.CUDA_HOME, "bin/nvcc")
txt = subprocess.check_output([nvcc, "--version"], text=True)
major, minor = map(int, re.findall(r"release (\d+)\.(\d+),", txt)[0])
return major, minor


def generate_build_meta() -> None:
d = {}
version = get_version()
d["cuda_major"], d["cuda_minor"] = get_cuda_version()
d["torch"] = torch.__version__
d["python"] = platform.python_version()
d["TORCH_CUDA_ARCH_LIST"] = os.environ.get("TORCH_CUDA_ARCH_LIST", None)
with open(root / "flashinfer/_build_meta.py", "w") as f:
f.write(f"__version__ = {version!r}\n")
f.write(f"build_meta = {d!r}")


def remove_unwanted_pytorch_nvcc_flags():
Expand All @@ -43,38 +69,40 @@ def remove_unwanted_pytorch_nvcc_flags():
pass


remove_unwanted_pytorch_nvcc_flags()
ext_modules = []
ext_modules.append(
torch_cpp_ext.CUDAExtension(
name="flashinfer.ops._kernels",
sources=[
"csrc/single_decode.cu",
"csrc/single_prefill.cu",
"csrc/cascade.cu",
"csrc/batch_decode.cu",
"csrc/flashinfer_ops.cu",
"csrc/batch_prefill.cu",
],
include_dirs=[
str(root.resolve().parent / "include"),
],
extra_compile_args={
"cxx": ["-O3"],
"nvcc": ["-O3", "--threads", "8"],
},
if __name__ == "__main__":
remove_unwanted_pytorch_nvcc_flags()
generate_build_meta()
ext_modules = []
ext_modules.append(
torch_cpp_ext.CUDAExtension(
name="flashinfer.ops._kernels",
sources=[
"csrc/single_decode.cu",
"csrc/single_prefill.cu",
"csrc/cascade.cu",
"csrc/batch_decode.cu",
"csrc/flashinfer_ops.cu",
"csrc/batch_prefill.cu",
],
include_dirs=[
str(root.resolve() / "include"),
],
extra_compile_args={
"cxx": ["-O3"],
"nvcc": ["-O3", "--threads", "8"],
},
)
)

setuptools.setup(
name="flashinfer",
version=get_version(),
packages=setuptools.find_packages(),
author="FlashInfer team",
license="Apache License 2.0",
description="FlashInfer: Kernel Library for LLM Serving",
url="https://github.com/flashinfer-ai/flashinfer",
python_requires=">=3.9",
ext_modules=ext_modules,
cmdclass={"build_ext": torch_cpp_ext.BuildExtension},
)
)

setuptools.setup(
name="flashinfer",
version=get_version(root / "flashinfer/__init__.py"),
packages=setuptools.find_packages(),
author="FlashInfer team",
license="Apache License 2.0",
description="FlashInfer: Kernel Library for LLM Serving",
url="https://github.com/flashinfer-ai/flashinfer",
python_requires=">=3.9",
ext_modules=ext_modules,
cmdclass={"build_ext": torch_cpp_ext.BuildExtension},
)
1 change: 1 addition & 0 deletions python/version.txt
8 changes: 8 additions & 0 deletions scripts/ci-flashinfer.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
RUNNER_SCOPE=repo
REPO_URL=https://github.com/flashinfer-ai/flashinfer
#LABELS=gpu,sm80
ACCESS_TOKEN=foo-access-token
RUNNER_WORKDIR=/tmp/ci-flashinfer
CI_RUNNER_CACHE_DIR=/data/ci-flashinfer-cache
DISABLE_AUTO_UPDATE=1
EPHEMERAL=1
27 changes: 27 additions & 0 deletions scripts/ci-flashinfer.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# https://github.com/myoung34/docker-github-actions-runner/wiki/Usage
# Install with:
# install -m 644 ci-flashinfer.service $HOME/.config/systemd/user/
# systemctl --user daemon-reload
# Run with:
# systemctl --user start ci-flashinfer
# Stop with:
# systemctl --user stop ci-flashinfer
# See live logs with:
# journalctl -f -u ci-flashinfer.service --no-hostname --no-tail
[Unit]
Description=Ephemeral GitHub Actions Runner Container for flashinfer-ai/flashinfer
[Service]
TimeoutStartSec=0
Restart=always
ExecStartPre=-/usr/bin/docker stop %N
ExecStartPre=-/usr/bin/docker rm %N
ExecStartPre=-/usr/bin/docker pull myoung34/github-runner:latest
ExecStart=/usr/bin/docker run --rm \
--env-file %h/.config/ci-flashinfer.env \
-e RUNNER_NAME=%H \
-e CI_UID=%U \
-e CI_GID=%G \
-v /var/run/docker.sock:/var/run/docker.sock \
-v /tmp/ci-flashinfer:/tmp/ci-flashinfer \
--name %N \
myoung34/github-runner:latest
46 changes: 46 additions & 0 deletions scripts/run-ci-build-wheel.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/bin/bash
# adapted from https://github.com/punica-ai/punica/blob/591b59899f0a20760821785d06b331c8a2e5cb86/ci/run-ci-build-wheel.bash
set -e

assert_env() {
local var_name="$1"
if [ -z "${!var_name}" ]; then
echo "Error: Environment variable '$var_name' is not set."
exit 1
fi
}

assert_env FLASHINFER_CI_PYTHON_VERSION
assert_env FLASHINFER_CI_TORCH_VERSION
assert_env FLASHINFER_CI_CUDA_VERSION
assert_env FLASHINFER_BUILD_VERSION
assert_env TORCH_CUDA_ARCH_LIST
PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
export CONDA_pkgs_dirs=/ci-cache/conda-pkgs
export XDG_CACHE_HOME=/ci-cache/xdg-cache
mkdir -p "$CONDA_pkgs_dirs" "$XDG_CACHE_HOME"
export HOME=/tmp/home
mkdir -p $HOME
export PATH="$HOME/.local/bin:$PATH"
CUDA_MAJOR="${FLASHINFER_CI_CUDA_VERSION%.*}"
CUDA_MINOR="${FLASHINFER_CI_CUDA_VERSION#*.}"
PYVER="${FLASHINFER_CI_PYTHON_VERSION//./}"
export PATH="/opt/python/cp${PYVER}-cp${PYVER}/bin:$PATH"


echo "::group::Install PyTorch"
pip install torch==$FLASHINFER_CI_TORCH_VERSION --index-url "https://download.pytorch.org/whl/cu${CUDA_MAJOR}${CUDA_MINOR}"
echo "::endgroup::"

echo "::group::Install build system"
pip install ninja numpy
pip install --upgrade setuptools wheel build
echo "::endgroup::"


echo "::group::Build wheel for FlashInfer"
cd "$PROJECT_ROOT/python"
FLASHINFER_BUILD_VERSION="${FLASHINFER_BUILD_VERSION}+cu${CUDA_MAJOR}${CUDA_MINOR}" python -m build --no-isolation
rm -f dist/*.tar.gz
python -m build --no-isolation --sdist
echo "::endgroup::"
1 change: 1 addition & 0 deletions version.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.0.1

0 comments on commit 064efdd

Please # to comment.