@@ -980,18 +980,19 @@ def use_mla(self) -> bool:
980
980
# have fp8 for both weights and activations.
981
981
if self .quantization == "compressed-tensors" :
982
982
quant_config = self ._parse_quant_hf_config ()
983
- for group_name , group_cfg in quant_config . get ( "config_groups" ,
984
- {}). items ():
985
- input_act_type = group_cfg .get ("input_activations" , {})\
986
- . get ( "type" , "unknown" ). lower ()
987
- weights_type = group_cfg .get ("weights " , {})\
988
- .get ("type" , "unknown" ). lower ( )
989
- if input_act_type != "fp8" or weights_type != "fp8" :
983
+ if self . quantization == "compressed-tensors" :
984
+ quant_config = self . _parse_quant_hf_config ()
985
+ for group_name , cfg in quant_config .get ("config_groups" ,
986
+ {}). items ():
987
+ act_type = cfg .get ("input_activations " , {}). get ( "type" , "" )
988
+ weight_type = cfg . get ( "weights" , {}) .get ("type" , "" )
989
+ if act_type != "fp8" or weight_type != "fp8" :
990
990
logger .warning (
991
991
"compressed-tensors MLA support requires fp8 "
992
992
"activations and weights in group '%s', but got "
993
- "activations type '%s' and weights type '%s'." ,
994
- group_name , input_act_type , weights_type )
993
+ "activations type '%s' and weights type '%s'.\n "
994
+ "Full config: %s" , group_name , act_type , weight_type ,
995
+ quant_config )
995
996
return False
996
997
997
998
use_mla = (self .is_deepseek_mla and not envs .VLLM_MLA_DISABLE )
0 commit comments