From 5a72fcdcbe7bd84f3665a140eeb30f5b27be11d3 Mon Sep 17 00:00:00 2001 From: Chen Zhang Date: Tue, 14 Jan 2025 23:45:05 +0800 Subject: [PATCH] [Bugfix][Kernel] Give unique name to BlockSparseFlashAttention (#12040) Signed-off-by: Chen Zhang --- vllm/attention/backends/blocksparse_attn.py | 3 +-- vllm/platforms/interface.py | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/attention/backends/blocksparse_attn.py b/vllm/attention/backends/blocksparse_attn.py index 7089d59392c36..77cfa8490172b 100644 --- a/vllm/attention/backends/blocksparse_attn.py +++ b/vllm/attention/backends/blocksparse_attn.py @@ -89,8 +89,7 @@ class BlocksparseFlashAttentionBackend(AttentionBackend): @staticmethod def get_name() -> str: - # For attention layer compatibility - return "FLASH_ATTN" + return "BLOCK_SPARSE_FLASH_ATTN" @staticmethod def get_impl_cls() -> Type["BlocksparseFlashAttentionImpl"]: diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index 3c2ec9636df91..85fde76796901 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -33,6 +33,7 @@ class _Backend(enum.Enum): HPU_ATTN = enum.auto() PALLAS = enum.auto() IPEX = enum.auto() + BLOCK_SPARSE_FLASH_ATTN = enum.auto() NO_ATTENTION = enum.auto()