diff --git a/src/finn/transformation/lower_convs_to_matmul.py b/src/finn/transformation/lower_convs_to_matmul.py index 9916de6..2eec628 100644 --- a/src/finn/transformation/lower_convs_to_matmul.py +++ b/src/finn/transformation/lower_convs_to_matmul.py @@ -34,16 +34,21 @@ from finn.util.basic import get_by_name -def _auto_pad_to_explicit_padding(autopad_str, idim, k, stride, n_dims): - pad_total = (stride - 1) * idim - stride + k - pad_half_small = int((pad_total / 2)) - pad_half_large = pad_total - pad_half_small +def _auto_pad_to_explicit_padding( + autopad_str, idim_H, idim_W, k_H, k_W, stride, n_dims +): + pad_total_H = (stride - 1) * idim_H - stride + k_H + pad_total_W = (stride - 1) * idim_W - stride + k_W + pad_half_small_H = int((pad_total_H / 2)) + pad_half_small_W = int((pad_total_W / 2)) + pad_half_large_H = pad_total_H - pad_half_small_H + pad_half_large_W = pad_total_W - pad_half_small_W if autopad_str == "VALID": return [0 for i in range(2 * n_dims)] elif autopad_str == "SAME_UPPER": - return [pad_half_small, pad_half_large] * n_dims + return [pad_half_small_H, pad_half_small_W, pad_half_large_H, pad_half_large_W] elif autopad_str == "SAME_LOWER": - return [pad_half_large, pad_half_small] * n_dims + return [pad_half_large_H, pad_half_large_W, pad_half_small_H, pad_half_small_W] else: raise Exception("Unsupported auto_pad: " + autopad_str) @@ -65,15 +70,23 @@ def apply(self, model): idt = model.get_tensor_datatype(cnv_input) odt = model.get_tensor_datatype(cnv_output) # extract conv parameters - k = get_by_name(n.attribute, "kernel_shape").ints[-1] + k = get_by_name(n.attribute, "kernel_shape").ints + if len(k) == 1: # assume square kernel + k_H = k[0] + k_W = k[0] + else: + k_H = k[0] + k_W = k[1] stride = get_by_name(n.attribute, "strides").ints[-1] group = get_by_name(n.attribute, "group").i weight_name = n.input[1] W_conv = model.get_initializer(weight_name) ifm_ch = model.get_tensor_shape(n.input[0])[1] # assume NCHW ofm_ch = model.get_tensor_shape(n.output[0])[1] # assume NCHW - ifm_dim = model.get_tensor_shape(n.input[0])[-1] # assume NCHW - ofm_dim = model.get_tensor_shape(n.output[0])[-1] # assume NCHW + ifm_dim_H = model.get_tensor_shape(n.input[0])[2] # assume NCHW + ifm_dim_W = model.get_tensor_shape(n.input[0])[3] + ofm_dim_H = model.get_tensor_shape(n.output[0])[2] # assume NCHW + ofm_dim_W = model.get_tensor_shape(n.output[0])[3] # handle both auto_pad and explicit padding auto_pad = get_by_name(n.attribute, "auto_pad") if auto_pad is not None: @@ -83,36 +96,53 @@ def apply(self, model): # use specified padding pad = get_by_name(n.attribute, "pads").ints else: + assert auto_pad != "NOTSET", print("AUTOPAD NOT SUPPORTED YET") pad = _auto_pad_to_explicit_padding( auto_pad, - ifm_dim, - k, + ifm_dim_H, + ifm_dim_W, + k_H, + k_W, stride, len(model.get_tensor_shape(n.input[0])) - 2, ) else: # use specified padding pad = get_by_name(n.attribute, "pads").ints - # ensure all pads are equal for now - assert ( - len(set(pad)) <= 1 - ), "Only all-equal padding supported for now: " + str(pad) - pad = pad[-1] + + # If len(pad) == 2, assume no padding for other dimension + if len(pad) == 2: # only one dimension should be padded + assert ( + ifm_dim_H == 1 or ifm_dim_W == 1 + ), "Padding is assumed to be 1D, image is 2D" + if ifm_dim_H == 1: # Assumption: dim H is not padded + pad_2D = [0, 0, 0, 0] + pad_2D[1] = pad[0] + pad_2D[3] = pad[1] + elif ifm_dim_W == 1: # Assumption: dim W is not padded + pad_2D = [0, 0, 0, 0] + pad_2D[0] = pad[0] + pad_2D[2] = pad[1] + pad = pad_2D # if depthwise conv create sparse matrix and variable "dw" # to store as attribute in Im2Col that indicates that the created # Im2Col node belongs to a depthwise convolution dw = False if group == ifm_ch and ofm_ch == ifm_ch: - W_sparse = np.zeros((ofm_ch, ifm_ch, k, k)) + W_sparse = np.zeros( + (ofm_ch, ifm_ch, k_H, k_W) + ) # (OFM, IFM, k_H, k_W) for ch in range(ifm_ch): - W_sparse[ch][ch] = W_conv[ch][0] + W_sparse[ch][ch] = W_conv[ch][ + 0 + ] # W_conv = [OFM, IFM, k_H, k_W] W_conv = W_sparse.astype(np.float32) # we need to store information of the # sparsity of the weight matrix. For this # we use the sparsity annotation of the # weight tensor - sparsity = {"dw": {"kernel_shape": k}} + sparsity = {"dw": {"kernel_shape": k_H}} model.set_tensor_sparsity(weight_name, sparsity) # additionally create variable "dw" to store # as attribute in Im2Col that indicates that the created @@ -123,9 +153,9 @@ def apply(self, model): # conv weights are [OFM][IFM][k][k] # first convert to [OFM][k][k][IFM] (to remain compatible with # finn-hlslib and how it does im2col/sliding window) - W_matmul = W_conv.transpose(0, 2, 3, 1) + W_matmul = W_conv.transpose(0, 2, 3, 1) # W_conv = [OFM, IFM, k_H, k_W] # reshape into [OFM][k*k*IFM] matrix - W_matmul = W_matmul.reshape(ofm_ch, ifm_ch * k * k) + W_matmul = W_matmul.reshape(ofm_ch, ifm_ch * k_H * k_W) # transpose to get ONNX-compatible [k*k*IFM][OFM] matrix W_matmul = W_matmul.T model.set_initializer(weight_name, W_matmul) @@ -134,21 +164,25 @@ def apply(self, model): inp_trans_out = helper.make_tensor_value_info( model.make_new_valueinfo_name(), TensorProto.FLOAT, - (1, ifm_dim, ifm_dim, ifm_ch), # NHWC + (1, ifm_dim_H, ifm_dim_W, ifm_ch), # NHWC ) graph.value_info.append(inp_trans_out) inp_trans_out = inp_trans_out.name model.set_tensor_datatype(inp_trans_out, idt) need_im2col = True - if k == 1 and pad == 0 and stride == 1: + if all(p == 0 for p in pad): + padding = 0 + + # k_H=k_W==1: pointwise convolution, thus no im2col needed + if k_H == 1 and k_W == 1 and padding == 0 and stride == 1: need_im2col = False if need_im2col: im2col_out = helper.make_tensor_value_info( model.make_new_valueinfo_name(), TensorProto.FLOAT, - (1, ofm_dim, ofm_dim, ifm_ch * k * k), + (1, ofm_dim_H, ofm_dim_W, ifm_ch * k_H * k_W), ) graph.value_info.append(im2col_out) im2col_out = im2col_out.name @@ -157,7 +191,7 @@ def apply(self, model): matmul_out = helper.make_tensor_value_info( model.make_new_valueinfo_name(), TensorProto.FLOAT, - (1, ofm_dim, ofm_dim, ofm_ch), + (1, ofm_dim_H, ofm_dim_W, ofm_ch), ) graph.value_info.append(matmul_out) matmul_out = matmul_out.name @@ -178,9 +212,9 @@ def apply(self, model): [im2col_out], domain="finn.custom_op.general", stride=stride, - kernel_size=[k], + kernel_size=[k_H, k_W], pad_amount=pad, - input_shape="(1,{},{},{})".format(ifm_dim, ifm_dim, ifm_ch), + input_shape="(1,{},{},{})".format(ifm_dim_H, ifm_dim_W, ifm_ch), depthwise=dw, ) diff --git a/tests/transformation/test_conv_lowering.py b/tests/transformation/test_conv_lowering.py index 90a9e53..75c8d1a 100644 --- a/tests/transformation/test_conv_lowering.py +++ b/tests/transformation/test_conv_lowering.py @@ -38,7 +38,7 @@ import finn.core.onnx_exec as oxe from finn.core.datatype import DataType from finn.core.modelwrapper import ModelWrapper -from finn.custom_op.general.im2col import compute_conv_output_dim +from finn.custom_op.general.im2col import compute_conv_output_dim_2D_padding from finn.custom_op.registry import getCustomOp from finn.transformation.infer_shapes import InferShapes from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul @@ -72,37 +72,140 @@ def test_conv_lowering_convmnist(): # input datatype @pytest.mark.parametrize("idt", [DataType.INT2, DataType.INT4]) # kernel size -@pytest.mark.parametrize("k", [2, 4]) +@pytest.mark.parametrize("k_H", [2]) +@pytest.mark.parametrize("k_W", [2]) # input dimension -@pytest.mark.parametrize("ifm_dim", [4, 6]) +@pytest.mark.parametrize("ifm_dim_H", [4]) +@pytest.mark.parametrize("ifm_dim_W", [4]) +# input channels +@pytest.mark.parametrize("ifm_ch", [2]) +# stride +@pytest.mark.parametrize("stride", [1, 2]) +# padding +@pytest.mark.parametrize( + "padding", + [ + [0, 0, 0, 0], + [0, 0, 0, 1], + [0, 0, 1, 0], + [0, 0, 1, 1], + [0, 1, 0, 0], + [0, 1, 0, 1], + [0, 1, 1, 0], + [0, 1, 1, 1], + [1, 0, 0, 0], + [1, 0, 0, 1], + [1, 0, 1, 0], + [1, 0, 1, 1], + [1, 1, 0, 0], + [1, 1, 0, 1], + [1, 1, 1, 0], + [1, 1, 1, 1], + ], +) +def test_non_equal_padding( + idt, k_H, k_W, ifm_dim_H, ifm_dim_W, ifm_ch, stride, padding +): + wdt = idt + odt = DataType.INT32 + ofm_ch = ifm_ch + pad_H = padding[0] + padding[2] + pad_W = padding[1] + padding[3] + ofm_dim_H = compute_conv_output_dim_2D_padding(ifm_dim_H, k_H, stride, pad_H) + ofm_dim_W = compute_conv_output_dim_2D_padding(ifm_dim_W, k_W, stride, pad_W) + + # set up onnx model + inp = oh.make_tensor_value_info( + "inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim_H, ifm_dim_W] + ) + outp = oh.make_tensor_value_info( + "outp", TensorProto.FLOAT, [1, ofm_ch, ofm_dim_H, ofm_dim_W] + ) + + W = oh.make_tensor_value_info("W", TensorProto.FLOAT, [ofm_ch, ifm_ch, k_H, k_W]) + + dw_cnv = oh.make_node( + "Conv", + inputs=["inp", "W"], + outputs=["outp"], + kernel_shape=[k_H, k_W], + pads=padding, + strides=[stride, stride], + group=1, + ) + graph = oh.make_graph( + nodes=[dw_cnv], + name="dw_cnv_graph", + inputs=[inp], + outputs=[outp], + value_info=[W], + ) + + model = oh.make_model(graph, producer_name="dws_cnv-model") + model = ModelWrapper(model) + model.set_tensor_datatype("inp", idt) + model.set_tensor_datatype("outp", odt) + model.set_tensor_datatype("W", wdt) + w_tensor = gen_finn_dt_tensor(wdt, [ofm_ch, ifm_ch, k_H, k_W]) + model.set_initializer("W", w_tensor) + model = model.transform(InferShapes()) + + input_tensor = gen_finn_dt_tensor(idt, [1, ifm_ch, ifm_dim_H, ifm_dim_W]) + input_dict = {"inp": input_tensor} + output_dict = oxe.execute_onnx(model, input_dict) + expected = output_dict["outp"] + + model = model.transform(LowerConvsToMatMul()) + output_dict = oxe.execute_onnx(model, input_dict) + produced = output_dict["outp"] + assert (produced == expected).all() + + +# input datatype +@pytest.mark.parametrize("idt", [DataType.INT2, DataType.INT4]) +# kernel size +@pytest.mark.parametrize("k_H", [2, 4]) +@pytest.mark.parametrize("k_W", [2, 4]) +# input dimension +@pytest.mark.parametrize("ifm_dim_H", [4, 6]) +@pytest.mark.parametrize("ifm_dim_W", [4, 6]) # input channels @pytest.mark.parametrize("ifm_ch", [2, 3]) # stride @pytest.mark.parametrize("stride", [1, 2]) # padding @pytest.mark.parametrize("padding", [[0, 0, 0, 0], [1, 1, 1, 1]]) -def test_depthwise_conv_lowering(idt, k, ifm_dim, ifm_ch, stride, padding): +def test_depthwise_conv_lowering( + idt, k_H, k_W, ifm_dim_H, ifm_dim_W, ifm_ch, stride, padding +): + if k_H > ifm_dim_H: + pytest.skip("Kernel height must be smaller than image height") + if k_W > ifm_dim_W: + pytest.skip("Kernel width must be smaller than image height") wdt = idt odt = DataType.INT32 ofm_ch = ifm_ch - ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad=padding[0]) + pad_H = padding[0] + padding[2] + pad_W = padding[1] + padding[3] + ofm_dim_H = compute_conv_output_dim_2D_padding(ifm_dim_H, k_H, stride, pad_H) + ofm_dim_W = compute_conv_output_dim_2D_padding(ifm_dim_W, k_W, stride, pad_W) # set up onnx model inp = oh.make_tensor_value_info( - "inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim, ifm_dim] + "inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim_H, ifm_dim_W] ) outp = oh.make_tensor_value_info( - "outp", TensorProto.FLOAT, [1, ofm_ch, ofm_dim, ofm_dim] + "outp", TensorProto.FLOAT, [1, ofm_ch, ofm_dim_H, ofm_dim_W] ) - W = oh.make_tensor_value_info("W", TensorProto.FLOAT, [ofm_ch, 1, k, k]) + W = oh.make_tensor_value_info("W", TensorProto.FLOAT, [ofm_ch, 1, k_H, k_W]) dw_cnv = oh.make_node( "Conv", inputs=["inp", "W"], outputs=["outp"], - kernel_shape=[k, k], + kernel_shape=[k_H, k_W], pads=padding, strides=[stride, stride], group=ifm_ch, @@ -120,11 +223,11 @@ def test_depthwise_conv_lowering(idt, k, ifm_dim, ifm_ch, stride, padding): model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) model.set_tensor_datatype("W", wdt) - w_tensor = gen_finn_dt_tensor(wdt, [ofm_ch, 1, k, k]) + w_tensor = gen_finn_dt_tensor(wdt, [ofm_ch, 1, k_H, k_W]) model.set_initializer("W", w_tensor) model = model.transform(InferShapes()) - input_tensor = gen_finn_dt_tensor(idt, [1, ifm_ch, ifm_dim, ifm_dim]) + input_tensor = gen_finn_dt_tensor(idt, [1, ifm_ch, ifm_dim_H, ifm_dim_W]) input_dict = {"inp": input_tensor} output_dict = oxe.execute_onnx(model, input_dict) expected = output_dict["outp"] @@ -140,17 +243,103 @@ def test_depthwise_conv_lowering(idt, k, ifm_dim, ifm_ch, stride, padding): assert im2col_node.get_nodeattr("depthwise") == 1 +# input datatype +@pytest.mark.parametrize("idt", [DataType.INT2, DataType.INT4]) +# kernel size +@pytest.mark.parametrize("k_H", [2, 4]) +@pytest.mark.parametrize("k_W", [2, 4, 1]) +# input dimension +@pytest.mark.parametrize("ifm_dim_H", [4, 5]) +@pytest.mark.parametrize("ifm_dim_W", [4, 5, 1]) +# input channels +@pytest.mark.parametrize("ifm_ch", [2, 3]) +# stride +@pytest.mark.parametrize("stride", [1, 2]) +# padding +@pytest.mark.parametrize("padding", [[0, 0, 0, 0], [1, 1, 1, 1]]) +def test_regular_conv_lowering( + idt, k_H, k_W, ifm_dim_H, ifm_dim_W, ifm_ch, stride, padding +): + if k_H > ifm_dim_H: + pytest.skip("Kernel height must be smaller than image height") + if k_W > ifm_dim_W: + pytest.skip("Kernel width must be smaller than image height") + # Ensure the right padding parameters are set + if ifm_dim_H == 1: + padding[0] = 0 + padding[2] = 0 + if ifm_dim_W == 1: + padding[1] = 0 + padding[3] = 0 + + wdt = idt + odt = DataType.INT32 + ofm_ch = ifm_ch + pad_H = padding[0] + padding[2] + pad_W = padding[1] + padding[3] + ofm_dim_H = compute_conv_output_dim_2D_padding(ifm_dim_H, k_H, stride, pad_H) + ofm_dim_W = compute_conv_output_dim_2D_padding(ifm_dim_W, k_W, stride, pad_W) + + # set up onnx model + inp = oh.make_tensor_value_info( + "inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim_H, ifm_dim_W] + ) + outp = oh.make_tensor_value_info( + "outp", TensorProto.FLOAT, [1, ofm_ch, ofm_dim_H, ofm_dim_W] + ) + + W = oh.make_tensor_value_info("W", TensorProto.FLOAT, [ofm_ch, ifm_ch, k_H, k_W]) + + dw_cnv = oh.make_node( + "Conv", + inputs=["inp", "W"], + outputs=["outp"], + kernel_shape=[k_H, k_W], + pads=padding, + strides=[stride, stride], + group=1, + ) + graph = oh.make_graph( + nodes=[dw_cnv], + name="dw_cnv_graph", + inputs=[inp], + outputs=[outp], + value_info=[W], + ) + + model = oh.make_model(graph, producer_name="dws_cnv-model") + model = ModelWrapper(model) + model.set_tensor_datatype("inp", idt) + model.set_tensor_datatype("outp", odt) + model.set_tensor_datatype("W", wdt) + w_tensor = gen_finn_dt_tensor(wdt, [ofm_ch, ifm_ch, k_H, k_W]) + model.set_initializer("W", w_tensor) + model = model.transform(InferShapes()) + + input_tensor = gen_finn_dt_tensor(idt, [1, ifm_ch, ifm_dim_H, ifm_dim_W]) + input_dict = {"inp": input_tensor} + output_dict = oxe.execute_onnx(model, input_dict) + expected = output_dict["outp"] + + model = model.transform(LowerConvsToMatMul()) + output_dict = oxe.execute_onnx(model, input_dict) + produced = output_dict["outp"] + assert (produced == expected).all() + + def test_conv_lowering_conv_1x1(): np.random.seed(0) - in_feature_dim = 7 + in_feature_dim_H = 7 + in_feature_dim_W = 7 in_chn = 3 kernel_size = 1 - out_feature_dim = in_feature_dim + out_feature_dim_H = in_feature_dim_H + out_feature_dim_W = in_feature_dim_W - input_shape = [1, in_chn, in_feature_dim, in_feature_dim] - output_shape = [1, in_chn, out_feature_dim, out_feature_dim] + input_shape = [1, in_chn, in_feature_dim_H, in_feature_dim_W] + output_shape = [1, in_chn, out_feature_dim_H, out_feature_dim_W] conv_param_shape = [in_chn, in_chn, kernel_size, kernel_size]