From 5a72fcdcbe7bd84f3665a140eeb30f5b27be11d3 Mon Sep 17 00:00:00 2001
From: Chen Zhang <zhangch99@outlook.com>
Date: Tue, 14 Jan 2025 23:45:05 +0800
Subject: [PATCH] [Bugfix][Kernel] Give unique name to
 BlockSparseFlashAttention (#12040)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
---
 vllm/attention/backends/blocksparse_attn.py | 3 +--
 vllm/platforms/interface.py                 | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/attention/backends/blocksparse_attn.py b/vllm/attention/backends/blocksparse_attn.py
index 7089d59392c36..77cfa8490172b 100644
--- a/vllm/attention/backends/blocksparse_attn.py
+++ b/vllm/attention/backends/blocksparse_attn.py
@@ -89,8 +89,7 @@ class BlocksparseFlashAttentionBackend(AttentionBackend):
 
     @staticmethod
     def get_name() -> str:
-        # For attention layer compatibility
-        return "FLASH_ATTN"
+        return "BLOCK_SPARSE_FLASH_ATTN"
 
     @staticmethod
     def get_impl_cls() -> Type["BlocksparseFlashAttentionImpl"]:
diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
index 3c2ec9636df91..85fde76796901 100644
--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@@ -33,6 +33,7 @@ class _Backend(enum.Enum):
     HPU_ATTN = enum.auto()
     PALLAS = enum.auto()
     IPEX = enum.auto()
+    BLOCK_SPARSE_FLASH_ATTN = enum.auto()
     NO_ATTENTION = enum.auto()