From 0633ea77559c14960a438c1f7153e8033de873e1 Mon Sep 17 00:00:00 2001 From: nickfraser Date: Thu, 15 Feb 2024 17:51:07 +0000 Subject: [PATCH] Fix (examples/llm): set group_size only for groupwise quantization (#853) --- src/brevitas_examples/common/generative/quantize.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/brevitas_examples/common/generative/quantize.py b/src/brevitas_examples/common/generative/quantize.py index f19431605..21cb46a19 100644 --- a/src/brevitas_examples/common/generative/quantize.py +++ b/src/brevitas_examples/common/generative/quantize.py @@ -187,9 +187,12 @@ def quantize_model( **{ 'bit_width': weight_bit_width, 'narrow_range': False, - 'group_size': weight_group_size, 'quantize_zero_point': quantize_weight_zero_point}, **weight_float_format) + + # Set the group_size is we're doing groupwise quantization + if weight_quant_granularity == 'per_group': + weight_quant = weight_quant.let(**{'group_size': weight_group_size}) # weight scale is converted to a standalone parameter # This is done already by default in the per_group quantizer if weight_quant_granularity != 'per_group':