[GPTNeoX] Fix GPTNeoX + Flash Attention 2 issue (#28645)

Update modeling_gpt_neox.py
huggingface · Jan 22, 2024 · e201864 · e201864
1 parent dafd595
commit e201864
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/src/transformers/models/gpt_neox/modeling_gpt_neox.py b/src/transformers/models/gpt_neox/modeling_gpt_neox.py
@@ -390,7 +390,7 @@ def forward(
             elif hasattr(self.config, "_pre_quantization_dtype"):
                 target_dtype = self.config._pre_quantization_dtype
             else:
-                target_dtype = self.q_proj.weight.dtype
+                target_dtype = self.query_key_value.weight.dtype
 
             logger.warning_once(
                 f"The input hidden states seems to be silently casted in float32, this might be related to"