From fb20e003c7a29369fc0cb35c23c663e91ffa09d8 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Thu, 5 Sep 2024 17:53:08 +0100 Subject: [PATCH] Switch SDPA quantization to FP8 FNUZ --- src/brevitas_examples/stable_diffusion/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/brevitas_examples/stable_diffusion/main.py b/src/brevitas_examples/stable_diffusion/main.py index 032b60539..82b2b9e98 100644 --- a/src/brevitas_examples/stable_diffusion/main.py +++ b/src/brevitas_examples/stable_diffusion/main.py @@ -364,7 +364,7 @@ def input_zp_stats_type(): dtype=dtype, device=args.device, weight_bit_width=weight_bit_width, - weight_quant_format='float_ocp_e4m3', + weight_quant_format='float_fnuz_e4m3', weight_quant_type='sym', weight_param_method=args.weight_param_method, weight_scale_precision=args.weight_scale_precision, @@ -373,7 +373,7 @@ def input_zp_stats_type(): quantize_weight_zero_point=args.quantize_weight_zero_point, quantize_input_zero_point=args.quantize_input_zero_point, input_bit_width=args.linear_output_bit_width, - input_quant_format='float_ocp_e4m3', + input_quant_format='float_fnuz_e4m3', input_scale_type=args.input_scale_type, input_scale_precision=args.input_scale_precision, input_param_method=args.input_param_method,