Skip to content

Commit

Permalink
Add file lock before op JIT. (#539)
Browse files Browse the repository at this point in the history
When we run multi-GPU inference with flashinfer JIT mode, multiple
processes will simultaneously JIT the same op, which causes a "File
exists" error. This PR adds file lock for the build directory before op
JIT to avoid this error.
  • Loading branch information
yuxianq authored Oct 18, 2024
1 parent 3f5f20f commit d0b0b7f
Showing 1 changed file with 14 additions and 11 deletions.
25 changes: 14 additions & 11 deletions python/flashinfer/jit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import logging
import subprocess
import torch.utils.cpp_extension as torch_cpp_ext
from filelock import FileLock
from typing import List, Tuple
from .env import (
FLASHINFER_WORKSPACE_DIR,
Expand Down Expand Up @@ -140,14 +141,16 @@ def load_cuda_ops(
FLASHINFER_INCLUDE_DIR,
FLASHINFER_CSRC_DIR,
] + CUTLASS_INCLUDE_DIRS
return torch_cpp_ext.load(
name,
list(map(lambda _: str(_), sources)),
extra_cflags=cflags,
extra_cuda_cflags=cuda_cflags,
extra_ldflags=extra_ldflags,
extra_include_paths=list(map(lambda _: str(_), extra_include_paths)),
build_directory=build_directory,
verbose=verbose,
with_cuda=True,
)
lock = FileLock(FLASHINFER_JIT_DIR / f"{name}.lock", thread_local=False)
with lock:
return torch_cpp_ext.load(
name,
list(map(lambda _: str(_), sources)),
extra_cflags=cflags,
extra_cuda_cflags=cuda_cflags,
extra_ldflags=extra_ldflags,
extra_include_paths=list(map(lambda _: str(_), extra_include_paths)),
build_directory=build_directory,
verbose=verbose,
with_cuda=True,
)

0 comments on commit d0b0b7f

Please # to comment.