Skip to content

Commit f9ecbb1

Browse files
authored
[Misc] Allow passing logits_soft_cap for xformers backend (vllm-project#11252)
Signed-off-by: Isotr0py <2037008807@qq.com>
1 parent 02222a0 commit f9ecbb1

File tree

1 file changed

+3
-5
lines changed

1 file changed

+3
-5
lines changed

vllm/attention/backends/xformers.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,7 @@
1717
is_all_cross_attn_metadata_set, is_all_encoder_attn_metadata_set)
1818
from vllm.attention.ops.paged_attn import (PagedAttention,
1919
PagedAttentionMetadata)
20-
from vllm.logger import init_logger
21-
22-
logger = init_logger(__name__)
20+
from vllm.utils import print_warning_once
2321

2422

2523
class XFormersBackend(AttentionBackend):
@@ -386,8 +384,8 @@ def __init__(
386384
raise ValueError(
387385
"XFormers does not support block-sparse attention.")
388386
if logits_soft_cap is not None:
389-
raise ValueError(
390-
"XFormers does not support attention logits soft capping.")
387+
print_warning_once("XFormers does not support logits soft cap. "
388+
"Outputs may be slightly off.")
391389
self.num_heads = num_heads
392390
self.head_size = head_size
393391
self.scale = float(scale)

0 commit comments

Comments
 (0)