From 58c3ba91095ac79bc60c1a05ee5fd91bfc34427c Mon Sep 17 00:00:00 2001 From: hkayann <75617629+hkayann@users.noreply.github.com> Date: Fri, 7 Feb 2025 13:20:25 +0000 Subject: [PATCH] Feat (brevitas_examples/imagenet): FP16 support to ptq_evaluate.py (#1174) --------- Signed-off-by: hkayann --- .../imagenet_classification/ptq/README.md | 81 +++++++++++++------ .../ptq/ptq_evaluate.py | 5 +- 2 files changed, 62 insertions(+), 24 deletions(-) diff --git a/src/brevitas_examples/imagenet_classification/ptq/README.md b/src/brevitas_examples/imagenet_classification/ptq/README.md index 74653f96b..57c73b42e 100644 --- a/src/brevitas_examples/imagenet_classification/ptq/README.md +++ b/src/brevitas_examples/imagenet_classification/ptq/README.md @@ -71,7 +71,7 @@ usage: ptq_evaluate.py [-h] --calibration-dir CALIBRATION_DIR --validation-dir [--batch-size-validation BATCH_SIZE_VALIDATION] [--export-dir EXPORT_DIR] [--gpu GPU] [--calibration-samples CALIBRATION_SAMPLES] - [--model-name ARCH] [--dtype {float,bfloat16}] + [--model-name ARCH] [--dtype {float,bfloat16,float16}] [--target-backend {fx,layerwise,flexml}] [--scale-factor-type {float_scale,po2_scale}] [--act-bit-width ACT_BIT_WIDTH] @@ -82,13 +82,19 @@ usage: ptq_evaluate.py [-h] --calibration-dir CALIBRATION_DIR --validation-dir [--weight-quant-type {sym,asym}] [--weight-quant-granularity {per_tensor,per_channel,per_group}] [--act-quant-granularity {per_tensor,per_group}] - [--weight-quant-calibration-type {stats,mse}] + [--weight-quant-calibration-type {stats,mse,hqo}] [--act-equalization {fx,layerwise,None}] [--act-quant-calibration-type {stats,mse}] [--act-scale-computation-type {static,dynamic}] [--graph-eq-iterations GRAPH_EQ_ITERATIONS] + [--learned-round {None,linear_round,hard_sigmoid_round,sigmoid_round}] + [--learned-round-block-name LEARNED_ROUND_BLOCK_NAME] + [--learned-round-loss {regularised_mse,mse}] + [--learned-round-mode {layerwise,blockwise}] [--learned-round-iters LEARNED_ROUND_ITERS] + [--learned-round-lr-scheduler {None,linear}] [--learned-round-lr LEARNED_ROUND_LR] + [--learned-round-batch-size LEARNED_ROUND_BATCH_SIZE] [--act-quant-percentile ACT_QUANT_PERCENTILE] [--export-onnx-qcdq] [--export-torch-qcdq] [--bias-corr | --no-bias-corr] @@ -102,22 +108,24 @@ usage: ptq_evaluate.py [-h] --calibration-dir CALIBRATION_DIR --validation-dir [--weight-exponent-bit-width WEIGHT_EXPONENT_BIT_WIDTH] [--act-mantissa-bit-width ACT_MANTISSA_BIT_WIDTH] [--act-exponent-bit-width ACT_EXPONENT_BIT_WIDTH] - [--accumulator-bit-width ACCUMULATOR_BIT_WIDTH] + [--gpxq-accumulator-bit-width GPXQ_ACCUMULATOR_BIT_WIDTH] + [--gpxq-accumulator-tile-size GPXQ_ACCUMULATOR_TILE_SIZE] [--onnx-opset-version ONNX_OPSET_VERSION] [--channel-splitting-ratio CHANNEL_SPLITTING_RATIO] - [--compression-rate COMPRESSION_RATE] - [--gptq | --no-gptq] [--gpfq | --no-gpfq] - [--gpfa2q | --no-gpfa2q] + [--optimizer {adam,sign_sgd}] [--gptq | --no-gptq] + [--gpfq | --no-gpfq] [--gpxq-act-order | --no-gpxq-act-order] - [--learned-round | --no-learned-round] + [--gptq-use-quant-activations | --no-gptq-use-quant-activations] + [--gpxq-create-weight-orig | --no-gpxq-create-weight-orig] [--calibrate-bn | --no-calibrate-bn] [--channel-splitting-split-input | --no-channel-splitting-split-input] [--merge-bn | --no-merge-bn] - [--uint_sym_act_for_unsigned_values | --no-uint_sym_act_for_unsigned_values] + [--uint-sym-act-for-unsigned-values | --no-uint-sym-act-for-unsigned-values] + [--compile | --no-compile] PyTorch ImageNet PTQ Validation -optional arguments: +options: -h, --help show this help message and exit --calibration-dir CALIBRATION_DIR Path to folder containing Imagenet calibration folder @@ -159,8 +167,9 @@ optional arguments: vgg16 | vgg16_bn | vgg19 | vgg19_bn | vit_b_16 | vit_b_32 | vit_h_14 | vit_l_16 | vit_l_32 | wide_resnet101_2 | wide_resnet50_2 (default: resnet18) - --dtype {float,bfloat16} - Data type to use + --dtype {float,bfloat16,float16) + Data type to use (float for FP32, bfloat16 for BF16, or float16 + for FP16) --target-backend {fx,layerwise,flexml} Backend to target for quantization (default: fx) --scale-factor-type {float_scale,po2_scale} @@ -182,7 +191,7 @@ optional arguments: Weight quantization type (default: per_tensor) --act-quant-granularity {per_tensor,per_group} Activation quantization type (default: per_tensor) - --weight-quant-calibration-type {stats,mse} + --weight-quant-calibration-type {stats,mse,hqo} Weight quantization calibration type (default: stats) --act-equalization {fx,layerwise,None} Activation equalization type (default: None) @@ -195,11 +204,25 @@ optional arguments: --graph-eq-iterations GRAPH_EQ_ITERATIONS Numbers of iterations for graph equalization (default: 20) + --learned-round {None,linear_round,hard_sigmoid_round,sigmoid_round} + Learned round type (default: None) + --learned-round-block-name LEARNED_ROUND_BLOCK_NAME + Block name for learned round. It works only if FX is + not needed (default: layer\d+) + --learned-round-loss {regularised_mse,mse} + Learned round type (default: none) + --learned-round-mode {layerwise,blockwise} + Learned round mode (default: none) --learned-round-iters LEARNED_ROUND_ITERS Numbers of iterations for learned round for each layer (default: 1000) + --learned-round-lr-scheduler {None,linear} + Learning rate scheduler for learned round (default: + None) --learned-round-lr LEARNED_ROUND_LR Learning rate for learned round (default: 1e-3) + --learned-round-batch-size LEARNED_ROUND_BATCH_SIZE + Learning rate for learned round (default: 1) --act-quant-percentile ACT_QUANT_PERCENTILE Percentile to use for stats of activation quantization (default: 99.999) @@ -243,26 +266,36 @@ optional arguments: --act-exponent-bit-width ACT_EXPONENT_BIT_WIDTH Exponent bit width used with float quantization for activations (default: 3) - --accumulator-bit-width ACCUMULATOR_BIT_WIDTH - Accumulator Bit Width for GPFA2Q (default: None) + --gpxq-accumulator-bit-width GPXQ_ACCUMULATOR_BIT_WIDTH + Accumulator Bit Width for GPxQ (default: None) + --gpxq-accumulator-tile-size GPXQ_ACCUMULATOR_TILE_SIZE + Accumulator tile size for GPxQ (default: None) --onnx-opset-version ONNX_OPSET_VERSION ONNX opset version --channel-splitting-ratio CHANNEL_SPLITTING_RATIO Split Ratio for Channel Splitting. When set to 0.0, Channel Splitting will not be applied. (default: 0.0) - --compression-rate COMPRESSION_RATE - Specify compression rate < 1.0 for random projection. - Default is 0.0 and does not use RP. + --optimizer {adam,sign_sgd} + Optimizer to use with learnable rounding (default: + adam) --gptq Enable GPTQ (default: disabled) --no-gptq Disable GPTQ (default: disabled) --gpfq Enable GPFQ (default: disabled) --no-gpfq Disable GPFQ (default: disabled) - --gpfa2q Enable GPFA2Q (default: disabled) - --no-gpfa2q Disable GPFA2Q (default: disabled) --gpxq-act-order Enable GPxQ Act order heuristic (default: disabled) --no-gpxq-act-order Disable GPxQ Act order heuristic (default: disabled) - --learned-round Enable Learned round (default: disabled) - --no-learned-round Disable Learned round (default: disabled) + --gptq-use-quant-activations + Enable Use quant activations for GPTQ (default: + disabled) + --no-gptq-use-quant-activations + Disable Use quant activations for GPTQ (default: + disabled) + --gpxq-create-weight-orig + Enable Maintain original weights for non-quant forward + pass (default: disabled) + --no-gpxq-create-weight-orig + Disable Maintain original weights for non-quant + forward pass (default: disabled) --calibrate-bn Enable Calibrate BN (default: disabled) --no-calibrate-bn Disable Calibrate BN (default: disabled) --channel-splitting-split-input @@ -275,12 +308,14 @@ optional arguments: (default: enabled) --no-merge-bn Disable Merge BN layers before quantizing the model (default: enabled) - --uint_sym_act_for_unsigned_values + --uint-sym-act-for-unsigned-values Enable Use unsigned act quant when possible (default: enabled) - --no-uint_sym_act_for_unsigned_values + --no-uint-sym-act-for-unsigned-values Disable Use unsigned act quant when possible (default: enabled) + --compile Enable Use torch.compile (default: disabled) + --no-compile Disable Use torch.compile (default: disabled) ``` The script requires to specify the calibration folder (`--calibration-dir`), from which the calibration samples will be taken (configurable with the `--calibration-samples` argument), and a validation folder (`--validation-dir`). diff --git a/src/brevitas_examples/imagenet_classification/ptq/ptq_evaluate.py b/src/brevitas_examples/imagenet_classification/ptq/ptq_evaluate.py index ff221e5ba..6a731b7af 100644 --- a/src/brevitas_examples/imagenet_classification/ptq/ptq_evaluate.py +++ b/src/brevitas_examples/imagenet_classification/ptq/ptq_evaluate.py @@ -87,7 +87,10 @@ def validate_args(args): choices=model_names, help='model architecture: ' + ' | '.join(model_names) + ' (default: resnet18)') parser.add_argument( - '--dtype', default='float', choices=['float', 'bfloat16'], help='Data type to use') + '--dtype', + default='float', + choices=['float', 'bfloat16', 'float16'], + help='Data type to use (float for FP32, bfloat16 for BF16, or float16 for FP16)') parser.add_argument( '--target-backend', default='fx',