From 0b5d967850a1189ab455606b5a680a8ecf76cf42 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Fri, 4 Jun 2021 16:24:59 +0100 Subject: [PATCH] finn-base v0.0.2 (#34) * Modified set_nodeattr to allow using it on repeated fields (#18) * [base]: changed how the floats, ints, strings, tensors, graphs and sparse_tensors field of AttributeProto is set. * [Core] restrict attributes to tested types Co-authored-by: Yaman Umuroglu * Support for non-square input images and kernels for im2col node (#20) * [im2col.py]: support for non-square input images and kernels, [test_im2col]: added/modified several test cases for (non-)square images and kernels * [im2col]: support for non-square input images and kernels, [test_im2col]: added/modified several test cases for (non-)square images and kernels * [test_general_transformation]: changed kernel_size attribute to list instead of integer as required by im2col node * [base]: changed how the "ints" field of AttributeProto set. [test_general_transformation]: changed the type of the kernel_size attribute to list of integers * removed unused import * [base]: added support for writing repeated fields in AttributeProto * minor style changes * [im2col, test_im2col]: added support for non-equal padding * [lower_convs_to_matmul]: added support for non-square input images and kernels and non-equal padding. [test_conv_lowering]: added/modified test cases for non-equal padding, depthwise convolution and 'standard' convolution. * [test_conv_lowering]: included 1D depthwise and regular convolutions in tests * Revert "[test_conv_lowering]: included 1D depthwise and regular convolutions in tests" This reverts commit 3ff449c42d709e640ca904c41a241bb94fc9e335 * Revert "[lower_convs_to_matmul]: added support for non-square input images and kernels and non-equal padding." This reverts commit 15e34ed8d07d4a55996f162bd3bd1aa24b33c3ac. * Revert "[im2col, test_im2col]: added support for non-equal padding" This reverts commit c524020ee8a7b363eb0c30d70cf21332e9c73678. * [im2col]: changed how a square kernel is instantiated. [lower_convs_to_matmul]: changed how the kernel size attribute is read (based on how a square kernel is instantiated). * [im2col]: minor change in style. * [Im2Col] style fixes and comments Co-authored-by: Yaman Umuroglu * Update AUTHORS.rst * Support for non-square input images and kernels in LowerConvsToMatMul transformation (#16) * [im2col.py]: support for non-square input images and kernels, [test_im2col]: added/modified several test cases for (non-)square images and kernels * [im2col]: support for non-square input images and kernels, [test_im2col]: added/modified several test cases for (non-)square images and kernels * [test_general_transformation]: changed kernel_size attribute to list instead of integer as required by im2col node * [base]: changed how the "ints" field of AttributeProto set. [test_general_transformation]: changed the type of the kernel_size attribute to list of integers * removed unused import * [base]: added support for writing repeated fields in AttributeProto * minor style changes * [im2col, test_im2col]: added support for non-equal padding * [lower_convs_to_matmul]: added support for non-square input images and kernels and non-equal padding. [test_conv_lowering]: added/modified test cases for non-equal padding, depthwise convolution and 'standard' convolution. * [test_conv_lowering]: included 1D depthwise and regular convolutions in tests * Revert "[test_conv_lowering]: included 1D depthwise and regular convolutions in tests" This reverts commit 3ff449c42d709e640ca904c41a241bb94fc9e335 * Revert "[lower_convs_to_matmul]: added support for non-square input images and kernels and non-equal padding." This reverts commit 15e34ed8d07d4a55996f162bd3bd1aa24b33c3ac. * Revert "[im2col, test_im2col]: added support for non-equal padding" This reverts commit c524020ee8a7b363eb0c30d70cf21332e9c73678. * [im2col] function compute_conv_output_dim can now be called in case non-equal and equal padding is assumed. [test_im2col] changed function call to compute_conv_output_dim. [test_conv_lowering] changed function call to compute_conv_output_dim. [lower_convs_to_matmul] removed old assertion. * [im2col]: changed how a square kernel is instantiated. [lower_convs_to_matmul]: changed how the kernel size attribute is read (based on how a square kernel is instantiated). * [im2col]: changed how a square kernel is instantiated. [lower_convs_to_matmul]: changed how the kernel size attribute is read (based on how a square kernel is instantiated). * [im2col]: minor change in style. * [test_conv_lowering]: minor fix for test case depthwise and regular convolutions * Support for non-square input images and kernels for im2col node (#20) * [im2col.py]: support for non-square input images and kernels, [test_im2col]: added/modified several test cases for (non-)square images and kernels * [im2col]: support for non-square input images and kernels, [test_im2col]: added/modified several test cases for (non-)square images and kernels * [test_general_transformation]: changed kernel_size attribute to list instead of integer as required by im2col node * [base]: changed how the "ints" field of AttributeProto set. [test_general_transformation]: changed the type of the kernel_size attribute to list of integers * removed unused import * [base]: added support for writing repeated fields in AttributeProto * minor style changes * [im2col, test_im2col]: added support for non-equal padding * [lower_convs_to_matmul]: added support for non-square input images and kernels and non-equal padding. [test_conv_lowering]: added/modified test cases for non-equal padding, depthwise convolution and 'standard' convolution. * [test_conv_lowering]: included 1D depthwise and regular convolutions in tests * Revert "[test_conv_lowering]: included 1D depthwise and regular convolutions in tests" This reverts commit 3ff449c42d709e640ca904c41a241bb94fc9e335 * Revert "[lower_convs_to_matmul]: added support for non-square input images and kernels and non-equal padding." This reverts commit 15e34ed8d07d4a55996f162bd3bd1aa24b33c3ac. * Revert "[im2col, test_im2col]: added support for non-equal padding" This reverts commit c524020ee8a7b363eb0c30d70cf21332e9c73678. * [im2col]: changed how a square kernel is instantiated. [lower_convs_to_matmul]: changed how the kernel size attribute is read (based on how a square kernel is instantiated). * [im2col]: minor change in style. * [Im2Col] style fixes and comments Co-authored-by: Yaman Umuroglu * Update AUTHORS.rst Co-authored-by: Yaman Umuroglu * Added support for dilation value = 2 for 1D and 2D images/kernels (#17) * [im2col.py]: support for non-square input images and kernels, [test_im2col]: added/modified several test cases for (non-)square images and kernels * [im2col]: support for non-square input images and kernels, [test_im2col]: added/modified several test cases for (non-)square images and kernels * [test_general_transformation]: changed kernel_size attribute to list instead of integer as required by im2col node * [base]: changed how the "ints" field of AttributeProto set. [test_general_transformation]: changed the type of the kernel_size attribute to list of integers * removed unused import * [base]: added support for writing repeated fields in AttributeProto * minor style changes * [im2col, test_im2col]: added support for non-equal padding * [lower_convs_to_matmul]: added support for non-square input images and kernels and non-equal padding. [test_conv_lowering]: added/modified test cases for non-equal padding, depthwise convolution and 'standard' convolution. * [test_conv_lowering]: included 1D depthwise and regular convolutions in tests * Revert "[test_conv_lowering]: included 1D depthwise and regular convolutions in tests" This reverts commit 3ff449c42d709e640ca904c41a241bb94fc9e335 * Revert "[lower_convs_to_matmul]: added support for non-square input images and kernels and non-equal padding." This reverts commit 15e34ed8d07d4a55996f162bd3bd1aa24b33c3ac. * Revert "[im2col, test_im2col]: added support for non-equal padding" This reverts commit c524020ee8a7b363eb0c30d70cf21332e9c73678. * [im2col] added support for dilations = 2 for 2D and 1D images. Dilation value must be equal along each axis. [test_im2col] added several test cases in case dilations = 2 (a.o. cases where image and kernel are 2D and 1D, with and without padding, and with stride = 2). [lower_convs_to_matmul] added support for dilation value. Dilation value must be equal along each axis. [test_conv_lowering] added test case for dilations = 2 for 2D and 1D images and kernels, with and without padding, and with stride = 2. * [lower_convs_to_matmul] removed old assertion [test_conv_lowering] added more dilation values to test cases. Dilation values of {1, 2, 3, 4} are tested. * [im2col] function compute_conv_output_dim can now be called in case non-equal and equal padding is assumed. [test_im2col] changed function call to compute_conv_output_dim [test_conv_lowering] changed function call to compute_conv_output_dim * [im2col] function compute_conv_output_dim can now be called in case non-equal and equal padding is assumed. [test_im2col] changed function call to compute_conv_output_dim. [test_conv_lowering] changed function call to compute_conv_output_dim. [lower_convs_to_matmul] removed old assertion. * [im2col]: changed how a square kernel is instantiated. [lower_convs_to_matmul]: changed how the kernel size attribute is read (based on how a square kernel is instantiated). * [im2col]: changed how a square kernel is instantiated. [lower_convs_to_matmul]: changed how the kernel size attribute is read (based on how a square kernel is instantiated). * [im2col]: changed how a square kernel is instantiated. [lower_convs_to_matmul]: changed how the kernel size attribute is read (based on how a square kernel is instantiated). * [im2col]: minor change in style. * [test_conv_lowering]: minor fix for test case depthwise and regular convolutions * [im2col]: minor style adjustment. [test_conv_lowering]: merged test functions into one test function. * Support for non-square input images and kernels for im2col node (#20) * [im2col.py]: support for non-square input images and kernels, [test_im2col]: added/modified several test cases for (non-)square images and kernels * [im2col]: support for non-square input images and kernels, [test_im2col]: added/modified several test cases for (non-)square images and kernels * [test_general_transformation]: changed kernel_size attribute to list instead of integer as required by im2col node * [base]: changed how the "ints" field of AttributeProto set. [test_general_transformation]: changed the type of the kernel_size attribute to list of integers * removed unused import * [base]: added support for writing repeated fields in AttributeProto * minor style changes * [im2col, test_im2col]: added support for non-equal padding * [lower_convs_to_matmul]: added support for non-square input images and kernels and non-equal padding. [test_conv_lowering]: added/modified test cases for non-equal padding, depthwise convolution and 'standard' convolution. * [test_conv_lowering]: included 1D depthwise and regular convolutions in tests * Revert "[test_conv_lowering]: included 1D depthwise and regular convolutions in tests" This reverts commit 3ff449c42d709e640ca904c41a241bb94fc9e335 * Revert "[lower_convs_to_matmul]: added support for non-square input images and kernels and non-equal padding." This reverts commit 15e34ed8d07d4a55996f162bd3bd1aa24b33c3ac. * Revert "[im2col, test_im2col]: added support for non-equal padding" This reverts commit c524020ee8a7b363eb0c30d70cf21332e9c73678. * [im2col]: changed how a square kernel is instantiated. [lower_convs_to_matmul]: changed how the kernel size attribute is read (based on how a square kernel is instantiated). * [im2col]: minor change in style. * [Im2Col] style fixes and comments Co-authored-by: Yaman Umuroglu * Update AUTHORS.rst * Support for non-square input images and kernels in LowerConvsToMatMul transformation (#16) * [im2col.py]: support for non-square input images and kernels, [test_im2col]: added/modified several test cases for (non-)square images and kernels * [im2col]: support for non-square input images and kernels, [test_im2col]: added/modified several test cases for (non-)square images and kernels * [test_general_transformation]: changed kernel_size attribute to list instead of integer as required by im2col node * [base]: changed how the "ints" field of AttributeProto set. [test_general_transformation]: changed the type of the kernel_size attribute to list of integers * removed unused import * [base]: added support for writing repeated fields in AttributeProto * minor style changes * [im2col, test_im2col]: added support for non-equal padding * [lower_convs_to_matmul]: added support for non-square input images and kernels and non-equal padding. [test_conv_lowering]: added/modified test cases for non-equal padding, depthwise convolution and 'standard' convolution. * [test_conv_lowering]: included 1D depthwise and regular convolutions in tests * Revert "[test_conv_lowering]: included 1D depthwise and regular convolutions in tests" This reverts commit 3ff449c42d709e640ca904c41a241bb94fc9e335 * Revert "[lower_convs_to_matmul]: added support for non-square input images and kernels and non-equal padding." This reverts commit 15e34ed8d07d4a55996f162bd3bd1aa24b33c3ac. * Revert "[im2col, test_im2col]: added support for non-equal padding" This reverts commit c524020ee8a7b363eb0c30d70cf21332e9c73678. * [im2col] function compute_conv_output_dim can now be called in case non-equal and equal padding is assumed. [test_im2col] changed function call to compute_conv_output_dim. [test_conv_lowering] changed function call to compute_conv_output_dim. [lower_convs_to_matmul] removed old assertion. * [im2col]: changed how a square kernel is instantiated. [lower_convs_to_matmul]: changed how the kernel size attribute is read (based on how a square kernel is instantiated). * [im2col]: changed how a square kernel is instantiated. [lower_convs_to_matmul]: changed how the kernel size attribute is read (based on how a square kernel is instantiated). * [im2col]: minor change in style. * [test_conv_lowering]: minor fix for test case depthwise and regular convolutions * Support for non-square input images and kernels for im2col node (#20) * [im2col.py]: support for non-square input images and kernels, [test_im2col]: added/modified several test cases for (non-)square images and kernels * [im2col]: support for non-square input images and kernels, [test_im2col]: added/modified several test cases for (non-)square images and kernels * [test_general_transformation]: changed kernel_size attribute to list instead of integer as required by im2col node * [base]: changed how the "ints" field of AttributeProto set. [test_general_transformation]: changed the type of the kernel_size attribute to list of integers * removed unused import * [base]: added support for writing repeated fields in AttributeProto * minor style changes * [im2col, test_im2col]: added support for non-equal padding * [lower_convs_to_matmul]: added support for non-square input images and kernels and non-equal padding. [test_conv_lowering]: added/modified test cases for non-equal padding, depthwise convolution and 'standard' convolution. * [test_conv_lowering]: included 1D depthwise and regular convolutions in tests * Revert "[test_conv_lowering]: included 1D depthwise and regular convolutions in tests" This reverts commit 3ff449c42d709e640ca904c41a241bb94fc9e335 * Revert "[lower_convs_to_matmul]: added support for non-square input images and kernels and non-equal padding." This reverts commit 15e34ed8d07d4a55996f162bd3bd1aa24b33c3ac. * Revert "[im2col, test_im2col]: added support for non-equal padding" This reverts commit c524020ee8a7b363eb0c30d70cf21332e9c73678. * [im2col]: changed how a square kernel is instantiated. [lower_convs_to_matmul]: changed how the kernel size attribute is read (based on how a square kernel is instantiated). * [im2col]: minor change in style. * [Im2Col] style fixes and comments Co-authored-by: Yaman Umuroglu * Update AUTHORS.rst Co-authored-by: Yaman Umuroglu Co-authored-by: Yaman Umuroglu * Update quantavgpool2d.py (#22) Add "Copyright (c) 2021 Xilinx, Inc" heather * [batchnorm_to_affine]: epsilon value is now read out from the attributes. (#21) [test_batchnorm_to_affine]: added a test case for various epsilon values. * Added 3D to 4D (tensor) transformation (#19) * [im2col.py]: support for non-square input images and kernels, [test_im2col]: added/modified several test cases for (non-)square images and kernels * [im2col]: support for non-square input images and kernels, [test_im2col]: added/modified several test cases for (non-)square images and kernels * [test_general_transformation]: changed kernel_size attribute to list instead of integer as required by im2col node * [base]: changed how the "ints" field of AttributeProto set. [test_general_transformation]: changed the type of the kernel_size attribute to list of integers * removed unused import * [base]: added support for writing repeated fields in AttributeProto * minor style changes * [im2col, test_im2col]: added support for non-equal padding * [lower_convs_to_matmul]: added support for non-square input images and kernels and non-equal padding. [test_conv_lowering]: added/modified test cases for non-equal padding, depthwise convolution and 'standard' convolution. * [test_conv_lowering]: included 1D depthwise and regular convolutions in tests * Revert "[test_conv_lowering]: included 1D depthwise and regular convolutions in tests" This reverts commit 3ff449c42d709e640ca904c41a241bb94fc9e335 * Revert "[lower_convs_to_matmul]: added support for non-square input images and kernels and non-equal padding." This reverts commit 15e34ed8d07d4a55996f162bd3bd1aa24b33c3ac. * Revert "[im2col, test_im2col]: added support for non-equal padding" This reverts commit c524020ee8a7b363eb0c30d70cf21332e9c73678. * [im2col] added support for dilations = 2 for 2D and 1D images. Dilation value must be equal along each axis. [test_im2col] added several test cases in case dilations = 2 (a.o. cases where image and kernel are 2D and 1D, with and without padding, and with stride = 2). [lower_convs_to_matmul] added support for dilation value. Dilation value must be equal along each axis. [test_conv_lowering] added test case for dilations = 2 for 2D and 1D images and kernels, with and without padding, and with stride = 2. * [lower_convs_to_matmul] removed old assertion [test_conv_lowering] added more dilation values to test cases. Dilation values of {1, 2, 3, 4} are tested. * [im2col] function compute_conv_output_dim can now be called in case non-equal and equal padding is assumed. [test_im2col] changed function call to compute_conv_output_dim [test_conv_lowering] changed function call to compute_conv_output_dim * [im2col] function compute_conv_output_dim can now be called in case non-equal and equal padding is assumed. [test_im2col] changed function call to compute_conv_output_dim. [test_conv_lowering] changed function call to compute_conv_output_dim. [lower_convs_to_matmul] removed old assertion. * [im2col]: minor fix with assumption on kernel dimension [lower_convs_to_matmul]: minor fix with assumption on kernel dimension * [change_3d_tensors_to_4d]: added new transformation that transforms 3D tensors to 4D and changes the nodes accordingly [test_4d_conversion]: test function for 3D to 4D tensor transformation * [change_3d_tensors_to_4d]: added new transformation that changes 3D tensors to 4D. [test_4d_conversion]: added a test case for the 3D to 4D transformation. * [change_3d_tensors_to_4d]: added 3D to 4D transformation (for QuartzNet). [test_4d_conversion]: added test case for 3D to 4D transform. * [change_3d_tensors_to_4d]: changed how an invalid graph is handled. [test_4d_conversion]: changed the test case for an invalid graph. * [im2col]: changed how a square kernel is instantiated. [lower_convs_to_matmul]: changed how the kernel size attribute is read (based on how a square kernel is instantiated). * [im2col]: changed how a square kernel is instantiated. [lower_convs_to_matmul]: changed how the kernel size attribute is read (based on how a square kernel is instantiated). * [im2col]: changed how a square kernel is instantiated. [lower_convs_to_matmul]: changed how the kernel size attribute is read (based on how a square kernel is instantiated). * [im2col]: minor change in style. * [im2col]: minor style change and changed the way how a square kernel is instantiated. * [test_conv_lowering]: merged tests for depthwise and standard convolutions. * [test_conv_lowering]: minor fix for test case depthwise and regular convolutions * [im2col]: minor style adjustment. [test_conv_lowering]: merged test functions into one test function. * [change_3d_tensors_to_4d]: style fixes and comments. [test_4d_converions]: rearranged code. * [Transform] check invalid node list length * [change_3d_tensors_to_4d]: rearranged the code to make it more readable. Co-authored-by: Yaman Umuroglu * [Docs] update tutorials * [Util] experimental: fast_mode data packing for binary (#24) * Generic partitioning feature (#23) * Add basic partitioning functionality * Mount build dir within docker container * Support for non-linear models and multi in/out partitions * Remove dataflowpartition custom op from finn-base * Fix temporary build dir for CI * Fix docstring * [create_generic_partitions]: minor modification, removed redundant output value_info entries. (#26) * [extend_partition]: added a new transformation ExtendPartition. (#27) [test_extend_partition]: added a test case for the new transformation. * Added support for non-equal strides along different axes (#25) * [im2col]: added support for non-equal strides along different axes and cleaned up the code. [lower_convs_to_matmul]: added support for non-equal strides along different axes and cleaned up the code. [test_conv_lowering]: added test case for non-equal strides along different axes. * [im2col]: minor fix. [test_im2col]: added test case for non-equal strides along different axes. * Changes for supporting vitis_hls (#28) * [Refactor] split up RTL/HLS-related utils * [Util] rename to CallHLS and allow specifying vivado_hls/vitis_hls * [Util] more flexible stream naming in rtlsim_multi_io * Changes for supporting non-equal dilation (#29) * added support for non-equal dilation value along (H, W) dimension * added test cases for non-equal dilation configurations * appending dilation value along dummy dimension correctly (i.e. with a '1') * changed tensor sparsity annotation for consistency * Support infer_datatype for flatten layer (#30) * Support infer_datatype for flatten layer * [InferDT] add more identity op types for datatype inference * [Lint] fix linting issues Co-authored-by: Yaman Umuroglu * Update AUTHORS.rst * Create python-publish.yml * Add ZCU111 board to part map (#32) * Update AUTHORS.rst Co-authored-by: Mirza Mrahorovic <34712307+mmrahorovic@users.noreply.github.com> Co-authored-by: jalezeta <51440887+jalezeta@users.noreply.github.com> Co-authored-by: Felix Jentzsch <45395194+fpjentzsch@users.noreply.github.com> --- .github/workflows/ci-pipeline.yml | 2 + .github/workflows/python-publish.yml | 36 + AUTHORS.rst | 7 +- docs/tutorials.rst | 8 +- run-docker.sh | 9 + src/finn/core/onnx_exec.py | 22 +- src/finn/core/rtlsim_exec.py | 5 +- src/finn/custom_op/base.py | 13 +- src/finn/custom_op/general/__init__.py | 4 +- ...taflowpartition.py => genericpartition.py} | 26 +- src/finn/custom_op/general/im2col.py | 197 ++- src/finn/custom_op/general/quantavgpool2d.py | 28 + .../transformation/batchnorm_to_affine.py | 4 +- .../transformation/change_3d_tensors_to_4d.py | 195 +++ .../create_generic_partitions.py | 232 +++ src/finn/transformation/extend_partition.py | 99 ++ src/finn/transformation/infer_datatypes.py | 9 +- .../transformation/lower_convs_to_matmul.py | 98 +- src/finn/util/basic.py | 2 + src/finn/util/data_packing.py | 27 +- src/finn/util/fpgadataflow.py | 212 +-- src/finn/util/hls.py | 74 + src/finn/util/pyverilator.py | 179 +- tests/core/test_modelwrapper.py | 2 +- tests/custom_op/test_im2col.py | 1461 ++++++++++++++++- tests/transformation/test_4d_conversion.py | 254 +++ .../test_batchnorm_to_affine.py | 53 + tests/transformation/test_conv_lowering.py | 196 ++- tests/transformation/test_extend_partition.py | 322 ++++ .../test_general_transformation.py | 8 +- .../test_generic_partitioning.py | 111 ++ .../transformation/test_merge_onnx_models.py | 2 +- tests/util/test_data_packing.py | 18 + 33 files changed, 3506 insertions(+), 409 deletions(-) create mode 100644 .github/workflows/python-publish.yml rename src/finn/custom_op/general/{streamingdataflowpartition.py => genericpartition.py} (71%) mode change 100644 => 100755 create mode 100644 src/finn/transformation/change_3d_tensors_to_4d.py create mode 100755 src/finn/transformation/create_generic_partitions.py create mode 100644 src/finn/transformation/extend_partition.py create mode 100644 src/finn/util/hls.py create mode 100644 tests/transformation/test_4d_conversion.py create mode 100644 tests/transformation/test_extend_partition.py create mode 100755 tests/transformation/test_generic_partitioning.py diff --git a/.github/workflows/ci-pipeline.yml b/.github/workflows/ci-pipeline.yml index f7bc10f..5248beb 100644 --- a/.github/workflows/ci-pipeline.yml +++ b/.github/workflows/ci-pipeline.yml @@ -30,10 +30,12 @@ jobs: run: | docker run --rm \ -v $(pwd):/workspace/finn-base \ + -e FINN_BUILD_DIR=/tmp/finn-base-gha \ finn-base-gha run_tests.sh - name: Build Docs run: | docker run --rm \ -v $(pwd):/workspace/finn-base \ + -e FINN_BUILD_DIR=/tmp/finn-base-gha \ finn-base-gha build_docs.sh diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 0000000..3bfabfc --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,36 @@ +# This workflow will upload a Python Package using Twine when a release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +name: Upload Python Package + +on: + release: + types: [published] + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Build package + run: python -m build + - name: Publish package + uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/AUTHORS.rst b/AUTHORS.rst index 80551fd..14346eb 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -2,9 +2,12 @@ Contributors ============ -* Yaman Umuroglu (maintainer) -* Sambhav Jain (maintainer) +* Yaman Umuroglu (@maltanar) (maintainer) +* Sambhav Jain (@sjain-stanford) * Jakoba Petri-Koenig (@auphelia) * Lucian Petrica (@quetric) * Tobias Alonso (@Tobi-Alonso) * Hendrik Borras (@HenniOVP) +* Mirza Mrahorovic (@mmrahorovic) +* Felix Paul Jentzsch (@felixpj) +* Jon Ander Lezeta (@jalezeta) diff --git a/docs/tutorials.rst b/docs/tutorials.rst index b2ba5da..5878f54 100644 --- a/docs/tutorials.rst +++ b/docs/tutorials.rst @@ -26,8 +26,12 @@ The notebooks in this folder are more developer oriented. They should help you t * 0_custom_analysis_pass - * This notebook explains what an analysis pass is and how to write one for FINN. + * Explains what an analysis pass is and how to write one for FINN. * 1_custom_transformation_pass - * This notebook explains what a transformation pass is and how to write one for FINN. + * Explains what a transformation pass is and how to write one for FINN. + +* 2_custom_op + + * Explains the basics of FINN custom ops and how to define a new one. diff --git a/run-docker.sh b/run-docker.sh index 37a0812..ab6e6ec 100755 --- a/run-docker.sh +++ b/run-docker.sh @@ -52,6 +52,11 @@ SCRIPT=$(readlink -f "$0") # Absolute path of dir this script is in. SCRIPTPATH=$(dirname "${SCRIPT}") +# Take build dir from environment variable, otherwise use this default +: ${FINN_HOST_BUILD_DIR="/tmp/finn-base_dev"} +# Ensure build dir exists locally +mkdir -p $FINN_HOST_BUILD_DIR + DOCKER_INTERACTIVE="" if [ "$1" = "tests" ]; then @@ -70,6 +75,8 @@ else exit -1 fi +gecho "Mounting $FINN_HOST_BUILD_DIR into $FINN_HOST_BUILD_DIR" + # Build the finn-base docker image docker build -f docker/Dockerfile -t ${DOCKER_TAG} \ --build-arg GROUP=${DOCKER_GROUP} \ @@ -81,4 +88,6 @@ docker build -f docker/Dockerfile -t ${DOCKER_TAG} \ # Launch container with current directory mounted docker run -t --rm ${DOCKER_INTERACTIVE} \ -v ${SCRIPTPATH}:/workspace/finn-base \ + -v $FINN_HOST_BUILD_DIR:$FINN_HOST_BUILD_DIR \ + -e FINN_BUILD_DIR=$FINN_HOST_BUILD_DIR \ ${DOCKER_TAG} ${DOCKER_CMD} diff --git a/src/finn/core/onnx_exec.py b/src/finn/core/onnx_exec.py index 3c3d5b9..5de1afd 100644 --- a/src/finn/core/onnx_exec.py +++ b/src/finn/core/onnx_exec.py @@ -51,7 +51,27 @@ def execute_node(node, context, graph, return_full_exec_context=False): Input/output provided via context.""" - if node.op_type == "StreamingDataflowPartition": + if node.op_type == "GenericPartition": + partition_node = getCustomOp(node) + model = ModelWrapper(partition_node.get_nodeattr("model")) + inp_ctx = dict(filter(lambda x: x[0] in node.input, context.items())) + # inputs may have been renamed in partition + for i, old_iname in enumerate(node.input): + new_iname = model.graph.input[i].name + if old_iname != new_iname: + inp_ctx[new_iname] = inp_ctx[old_iname] + del inp_ctx[old_iname] + ret = execute_onnx(model, inp_ctx, return_full_exec_context) + # outputs may have been renamed in partition + for i, node_oname in enumerate(node.output): + model_oname = model.graph.output[i].name + context[node_oname] = ret[model_oname] + # prefix and insert exec context entries + if return_full_exec_context: + for tname in ret.keys(): + if tname not in [x.name for x in model.graph.output]: + context[node.name + "_" + tname] = ret[tname] + elif node.op_type == "StreamingDataflowPartition": sdp_node = getCustomOp(node) model = ModelWrapper(sdp_node.get_nodeattr("model")) inp_ctx = dict(filter(lambda x: x[0] in node.input, context.items())) diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index 6ebe66a..7f02f60 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -30,11 +30,12 @@ from finn.custom_op.registry import getCustomOp from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy -from finn.util.fpgadataflow import ( +from finn.util.pyverilator import ( pyverilate_get_liveness_threshold_cycles, pyverilate_stitched_ip, + reset_rtlsim, + toggle_clk, ) -from finn.util.pyverilator import reset_rtlsim, toggle_clk try: from pyverilator import PyVerilator diff --git a/src/finn/custom_op/base.py b/src/finn/custom_op/base.py index 2055d5b..0b8eedd 100644 --- a/src/finn/custom_op/base.py +++ b/src/finn/custom_op/base.py @@ -112,7 +112,18 @@ def set_nodeattr(self, name, value): if dtype == "s": # encode string attributes value = value.encode("utf-8") - attr.__setattr__(dtype, value) + attr.__setattr__(dtype, value) + elif dtype == "floats": # list of floats + attr.floats[:] = value + elif dtype == "ints": # list of integers + attr.ints[:] = value + elif dtype in ["strings", "tensors", "graphs", "sparse_tensors"]: + # untested / unsupported attribute types + # add testcases & appropriate getters before enabling + raise Exception("Attribute type %s not yet supported" % dtype) + else: + # attempt to set attr.dtype = value directly + attr.__setattr__(dtype, value) else: # not set, create and insert AttributeProto attr_proto = helper.make_attribute(name, value) diff --git a/src/finn/custom_op/general/__init__.py b/src/finn/custom_op/general/__init__.py index 1d1770f..3bb8bef 100644 --- a/src/finn/custom_op/general/__init__.py +++ b/src/finn/custom_op/general/__init__.py @@ -27,11 +27,11 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from finn.custom_op.general.debugmarker import DebugMarker +from finn.custom_op.general.genericpartition import GenericPartition from finn.custom_op.general.im2col import Im2Col from finn.custom_op.general.maxpoolnhwc import MaxPoolNHWC from finn.custom_op.general.multithreshold import MultiThreshold from finn.custom_op.general.quantavgpool2d import QuantAvgPool2d -from finn.custom_op.general.streamingdataflowpartition import StreamingDataflowPartition from finn.custom_op.general.xnorpopcount import XnorPopcountMatMul custom_op = dict() @@ -39,7 +39,7 @@ custom_op["DebugMarker"] = DebugMarker custom_op["QuantAvgPool2d"] = QuantAvgPool2d custom_op["MaxPoolNHWC"] = MaxPoolNHWC -custom_op["StreamingDataflowPartition"] = StreamingDataflowPartition +custom_op["GenericPartition"] = GenericPartition custom_op["MultiThreshold"] = MultiThreshold custom_op["XnorPopcountMatMul"] = XnorPopcountMatMul custom_op["Im2Col"] = Im2Col diff --git a/src/finn/custom_op/general/streamingdataflowpartition.py b/src/finn/custom_op/general/genericpartition.py old mode 100644 new mode 100755 similarity index 71% rename from src/finn/custom_op/general/streamingdataflowpartition.py rename to src/finn/custom_op/general/genericpartition.py index 869498a..77cb948 --- a/src/finn/custom_op/general/streamingdataflowpartition.py +++ b/src/finn/custom_op/general/genericpartition.py @@ -29,22 +29,14 @@ from finn.custom_op.base import CustomOp -class StreamingDataflowPartition(CustomOp): - """Class that corresponds to the meta/container node StreamingDataflowPartition - which is a placeholder for a group of fpgadataflow nodes that have been separated - out into a FINN-ONNX model of its own. Note that is does not produce any HLS or - bitfile by itself.""" +class GenericPartition(CustomOp): + """Class that corresponds to the meta/container node GenericPartition + which is a placeholder for a group of nodes that have been separated + out into a FINN-ONNX model of its own.""" def get_nodeattr_types(self): return { "model": ("s", True, ""), - "res_estimate": ("s", False, ""), - "res_hls": ("s", False, ""), - "res_synth": ("s", False, ""), - "slr": ("i", False, -1), - "partition_id": ("i", False, 0), - "device_id": ("i", False, 0), - "mem_port": ("s", False, ""), } def make_shape_compatible_op(self, model): @@ -54,8 +46,6 @@ def infer_node_datatype(self, model): pass def execute_node(self, context, graph): - # TODO add RPC execution with synthesized bitfile? - # whole-design rtlsim with PyVerilator may also be an alternative pass def verify_node(self): @@ -79,14 +69,8 @@ def verify_node(self): except Exception: info_messages.append( """The necessary attributes do not exist. - StreamingDataflowPartition needs the following attribute(s): + GenericPartition needs the following attribute(s): model""" ) - # verify the number of inputs - if len(self.onnx_node.input) >= 1: - info_messages.append("The number of inputs is correct") - else: - info_messages.append("StreamingDataflowPartition needs 1 data input") - return info_messages diff --git a/src/finn/custom_op/general/im2col.py b/src/finn/custom_op/general/im2col.py index 7e465fe..e76c613 100644 --- a/src/finn/custom_op/general/im2col.py +++ b/src/finn/custom_op/general/im2col.py @@ -8,49 +8,94 @@ # adapted from A. Karpathy's CS231 im2col code # utilities to generate a patch matrix from a multichannel image # of shape (batches, channels, height, width) +# note: the spatial dimensions can be set to 1 to indicate +# a dummy dimension (e.g. 1D convs represented as 2D) -def compute_conv_output_dim(ifm_dim, k, stride, pad=0): - """Returns spatial output dimension size for convolution with given params.""" - return int(((ifm_dim + 2 * pad - k) / stride) + 1) +def compute_conv_output_dim(ifm_dim, k, stride, total_pad=0, dilation=1): + """Returns spatial output dimension size for convolution with given params. + total_pad gives the total amount of padding along the entire axis + (both sides included). + """ + if ifm_dim == 1: + # indicates dummy dimension, keep as-is + # Also ensure that every call to this function respects the expected + # kernel shape and padding + assert ( + k == 1 and total_pad == 0 + ), "Unexpected kernel shape and padding for 1D input image" + out_dim = 1 + else: + out_dim = int(((ifm_dim + total_pad - dilation * (k - 1) - 1) / stride) + 1) + return out_dim def get_im2col_indices_nchw( - x_shape, field_height, field_width, padding=0, stride_y=1, stride_x=1 + x_shape, + field_height, + field_width, + padding=0, + stride_h=1, + stride_w=1, + dilation_h=1, + dilation_w=1, ): """Returns im2col indices.""" # First figure out what the size of the output should be - N, C, H, W = x_shape - out_height = compute_conv_output_dim(H, field_height, stride_y, padding) - out_width = compute_conv_output_dim(W, field_width, stride_x, padding) - - i0 = np.repeat(np.arange(field_height), field_width) - i0 = np.tile(i0, C) - i1 = stride_y * np.repeat(np.arange(out_height), out_width) - j0 = np.tile(np.arange(field_width), field_height * C) - j1 = stride_x * np.tile(np.arange(out_width), out_height) + n, c, h, w = x_shape + pad_h = padding[0] + padding[2] + pad_w = padding[1] + padding[3] + out_height = compute_conv_output_dim(h, field_height, stride_h, pad_h, dilation_h) + out_width = compute_conv_output_dim(w, field_width, stride_w, pad_w, dilation_w) + + i0 = dilation_h * np.repeat(np.arange(field_height), field_width) + i0 = np.tile(i0, c) + i1 = stride_h * np.repeat(np.arange(out_height), out_width) + j0 = dilation_w * np.tile(np.arange(field_width), field_height * c) + j1 = stride_w * np.tile(np.arange(out_width), out_height) i = i0.reshape(-1, 1) + i1.reshape(1, -1) j = j0.reshape(-1, 1) + j1.reshape(1, -1) - k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1) + k = np.repeat(np.arange(c), field_height * field_width).reshape(-1, 1) return (k, i, j) def im2col_indices_nchw( - x, field_height, field_width, padding=0, stride_y=1, stride_x=1, pad_val=0 + x, + ifm_h, + ifm_w, + field_height, + field_width, + padding=[0, 0, 0, 0], + stride_h=1, + stride_w=1, + pad_val=0, + dilation_h=1, + dilation_w=1, ): - """Performs im2col on x with given field height and width, as well as values - for padding and stride size. + """Performs im2col on image (2D tensor, possibly with 1-length dummy dimensions) x with + given field height and width, as well as values for padding and stride size. Returns result of im2col.""" # Zero-pad the input p = padding + x_padded = np.pad( - x, ((0, 0), (0, 0), (p, p), (p, p)), mode="constant", constant_values=pad_val + x, + ((0, 0), (0, 0), (p[0], p[2]), (p[1], p[3])), + mode="constant", + constant_values=pad_val, ) k, i, j = get_im2col_indices_nchw( - x.shape, field_height, field_width, padding, stride_y, stride_x + x.shape, + field_height, + field_width, + padding, + stride_h, + stride_w, + dilation_h, + dilation_w, ) cols = x_padded[:, k, i, j] @@ -61,34 +106,50 @@ def im2col_indices_nchw( # ONNX i/o tensor shape assumptions for Im2Col: # input 0 is the input vector, shape (1, ih, iw, ifm) -# output 0 is the output vector, shape (1, oh, ow, k*k*ifm) +# output 0 is the output vector, shape (1, oh, ow, kh*kw*ifm) # where: # * ih, iw are the height and width of the input image # * oh, ow are the height and width of the output (lowered) image # * ifm is the number of input channels -# * k is the convolutional kernel size +# * kh, kw is the convolutional kernel size # note: for the innermost (dot product) dimension of k*k*ifm, we # assume an internal ordering (k, k, ifm) +# note2: it's possible to set one of ih, iw to be 1 to indicate a +# dummy dimension, e.g. for representing 1D convs as 2D. the corresponding +# oh/ow and kh/kw will also be 1 in this case + class Im2Col(CustomOp): def get_nodeattr_types(self): return { - "stride": ("i", True, 1), - "kernel_size": ("i", True, 1), + # stride and shape of convolution kernel + "stride": ("ints", True, []), + "kernel_size": ("ints", True, []), + # input tensor shape "input_shape": ("s", True, ""), - "pad_amount": ("i", False, 0), + # amount of padding to be inserted before/after each non-dummy spatial dim + # i.e. [H_begin, W_begin, H_end, W_end] + "pad_amount": ("ints", False, [0, 0, 0, 0]), # default: no padding + # value of padding pixels to be inserted "pad_value": ("i", False, 0), # depthwise: if 1, infer ConvolutionInputGenerator with depthwise == 1 "depthwise": ("i", False, 0, {0, 1}), + # dilation factor applied to the conv kernel + "dilations": ("ints", False, [1, 1]), } def make_shape_compatible_op(self, model): - k = self.get_nodeattr("kernel_size") - stride = self.get_nodeattr("stride") + k_h, k_w = self.get_nodeattr("kernel_size") # Assumption: Height x Width + stride_h, stride_w = self.get_nodeattr("stride") ishape = self.get_nodeattr("input_shape") - pad = self.get_nodeattr("pad_amount") + dilation_h, dilation_w = self.get_nodeattr("dilations") + pad = self.get_nodeattr( + "pad_amount" + ) # padding: [H_begin, W_begin, H_end, W_end] + pad_h = pad[0] + pad[2] + pad_w = pad[1] + pad[3] # convert string into list of integers ishape = ishape.strip("(") @@ -100,12 +161,33 @@ def make_shape_compatible_op(self, model): # extract all necessary information and determine output dimensions ifm_ch = ishape[-1] assert len(ishape) == 4, "Unexpected input shape for Im2Col" - assert ishape[1] == ishape[2], "Im2Col for non-square images unsupported" - ifm_dim = ishape[1] - ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad) + # NHWC (FINN always converts to NHWC during conv lowering) + ifm_dim_h = ishape[1] + ifm_dim_w = ishape[2] + + # check that kernel tensor also respects any existing dummy dimensions + if ifm_dim_h == 1: + kernel_1d = k_h == 1 + pad_1d = pad_h == 0 + assert ( + kernel_1d and pad_1d + ), "Unexpected kernel shape and padding for input image\ + of dimensions (N, 1, W, C)" + if ifm_dim_w == 1: + kernel_1d = k_w == 1 + pad_1d = pad_w == 0 + assert ( + kernel_1d and pad_1d + ), "Unexpected kernel shape padding for input image\ + of dimensions (N, H, 1, C)" + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, pad_h, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, pad_w, dilation_w) # implement tensor with correct shape - values = np.random.randn(1, ofm_dim, ofm_dim, k * k * ifm_ch).astype(np.float32) + values = np.random.randn(1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch).astype( + np.float32 + ) return helper.make_node( "Constant", inputs=[], @@ -126,32 +208,65 @@ def infer_node_datatype(self, model): def execute_node(self, context, graph): node = self.onnx_node - k = self.get_nodeattr("kernel_size") - stride = self.get_nodeattr("stride") + k_h, k_w = self.get_nodeattr("kernel_size") # Assumption: Height x Width + stride_h, stride_w = self.get_nodeattr("stride") pad = self.get_nodeattr("pad_amount") + pad_h = pad[0] + pad[2] + pad_w = pad[1] + pad[3] pad_val = self.get_nodeattr("pad_value") + dilation_h, dilation_w = self.get_nodeattr("dilations") + iname = node.input[0] x = context[iname] qnt_annotations = graph.quantization_annotation ret = util.get_by_name(qnt_annotations, iname, "tensor_name") ret = util.get_by_name(ret.quant_parameter_tensor_names, "finn_datatype", "key") idt = DataType[ret.value] - if pad != 0: + if pad != [0, 0, 0, 0]: assert idt.allowed(pad_val), "Im2Col dtype must allow pad_val" # check that input is NHWC assert x.ndim == 4, "Unexpected number of input dims for Im2Col" - N, H, W, C = x.shape - assert H == W, "Unexpected input shape for Im2Col" - out_dim = compute_conv_output_dim(H, k, stride, pad) + n, h, w, c = x.shape + + # check that kernel tensor also respects any existing dummy dimensions + if h == 1: + kernel_1d = k_h == 1 + pad_1d = pad_h == 0 + assert ( + kernel_1d and pad_1d + ), "Unexpected kernel shape and padding for input image\ + of dimensions (N, 1, W, C)" + if w == 1: + kernel_1d = k_w == 1 + pad_1d = pad_w == 0 + assert ( + kernel_1d and pad_1d + ), "Unexpected kernel shape and padding for input image\ + of dimensions (N, H, 1, C)" + + out_dim_h = compute_conv_output_dim(h, k_h, stride_h, pad_h, dilation_h) + out_dim_w = compute_conv_output_dim(w, k_w, stride_w, pad_w, dilation_w) # internally convert input to NCHW x = x.transpose(0, 3, 1, 2) # call NCHW im2col implementation - ret = im2col_indices_nchw(x, k, k, pad, stride, stride, pad_val=pad_val) - # result shape is (k*k*N, out_dim*out_dim), convert to NCHW - ret = ret.reshape(N, C, k, k, out_dim, out_dim) + ret = im2col_indices_nchw( + x, + h, + w, + k_h, + k_w, + pad, + stride_h, + stride_w, + pad_val=pad_val, + dilation_h=dilation_h, + dilation_w=dilation_w, + ) + # result shape is (k_H*k_W*N, out_dim_H*out_dim_W), convert to NCHW + ret = ret.reshape(n, c, k_h, k_w, out_dim_h, out_dim_w) # (N=0,C=1,kh=2,kw=3,H=4,W=5) -> (N=0,H=4,W=5,kh=2,kw=3,C=1) ret = ret.transpose(0, 4, 5, 2, 3, 1) - ret = ret.reshape(N, out_dim, out_dim, k * k * C) + ret = ret.reshape(n, out_dim_h, out_dim_w, k_h * k_w * c) # ret = ret.reshape(N, k * k * C, out_dim, out_dim) # convert output back to NHWC diff --git a/src/finn/custom_op/general/quantavgpool2d.py b/src/finn/custom_op/general/quantavgpool2d.py index 071a50f..148e266 100644 --- a/src/finn/custom_op/general/quantavgpool2d.py +++ b/src/finn/custom_op/general/quantavgpool2d.py @@ -1,3 +1,31 @@ +# Copyright (c) 2021 Xilinx, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of Xilinx nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + import numpy as np import onnxruntime as rt from onnx import TensorProto, helper diff --git a/src/finn/transformation/batchnorm_to_affine.py b/src/finn/transformation/batchnorm_to_affine.py index 3917e78..5204d07 100644 --- a/src/finn/transformation/batchnorm_to_affine.py +++ b/src/finn/transformation/batchnorm_to_affine.py @@ -32,6 +32,7 @@ from finn.transformation.base import Transformation from finn.transformation.infer_shapes import InferShapes +from finn.util.basic import get_by_name class BatchNormToAffine(Transformation): @@ -52,7 +53,8 @@ def apply(self, model): bias = model.get_initializer(n.input[2]) mean = model.get_initializer(n.input[3]) variance = model.get_initializer(n.input[4]) - epsilon = 1e-5 + epsilon = get_by_name(n.attribute, "epsilon") + epsilon = getattr(epsilon, "f", 1e-5) # find A and B to compute batchnorm as affine transpose Ax+B # TODO is a division by moving avg factor needed for variance? A = scale / np.sqrt(epsilon + variance) diff --git a/src/finn/transformation/change_3d_tensors_to_4d.py b/src/finn/transformation/change_3d_tensors_to_4d.py new file mode 100644 index 0000000..251f609 --- /dev/null +++ b/src/finn/transformation/change_3d_tensors_to_4d.py @@ -0,0 +1,195 @@ +# Copyright (c) 2020 Xilinx, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of Xilinx nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import warnings + +from finn.transformation.base import Transformation +from finn.transformation.general import RemoveUnusedTensors +from finn.transformation.infer_shapes import InferShapes +from finn.util.basic import get_by_name + +# FINN currently handles convolutions (when e.g lowering them +# to matmuls) with the assumption that they operate on 4D tensors +# shaped as (N,C,H,W). H/W can be 1 for convolutions on 1D data. +# This transformation converts a graph with 3D tensors to the expected +# 4D format. Note: the transformation only works for certain node types; +# see _find_invalid_nodes below. + + +def _find_invalid_nodes(model): + """ + Check whether the graph contains any node types that are not supported by the + Change3Dto4DTensors transformation. + + """ + valid_nodes = [ + "Add", + "Mul", + "BatchNormalization", + "MultiThreshold", + "Conv", + "Transpose", + "LogSoftmax", + "ArgMax", + ] + invalid_nodes = [] + for n in model.graph.node: + node_op_type = n.op_type + if node_op_type in valid_nodes: + continue + else: + invalid_nodes.append(node_op_type) + + return invalid_nodes + + +class Change3DTo4DTensors(Transformation): + """ + Replaces 3D tensors with 4D tensors assuming the following format: + [N, C, H] -> [N, C, H, 1]. + The attributes of a (specific) set of supported nodes are changed accordingly. + If the graph contains unsupported nodes, a warning is raised and the transformation + is not applied. + """ + + def apply(self, model): + graph_modified = False + + invalid_nodes = _find_invalid_nodes(model) + if len(invalid_nodes) > 0: + warnings.warn( + "Transformation is not applied,\ + found unsupported nodes in the graph: {}.".format( + invalid_nodes + ) + ) + return (model, graph_modified) + + # Infer the shapes of each tensor, remove unused tensors + # and give each tensor a readable name + model = model.transform(InferShapes()) + model = model.transform(RemoveUnusedTensors()) + + # This list contains all nodes with initializers that need to be converted + nodes_with_initializers = ["Mul", "Conv", "Add"] + # Obtain a list of initializer names (used to filter out only value infos) + initializers_names = [x.name for x in model.graph.initializer] + + all_tensors = {} + # Extract the inputs + all_tensors = { + **all_tensors, + **{ + x.name: [x.type.tensor_type.elem_type, model.get_tensor_shape(x.name)] + for x in model.graph.input + }, + } + # Extract only the output tensors + all_tensors = { + **all_tensors, + **{ + x.name: [x.type.tensor_type.elem_type, model.get_tensor_shape(x.name)] + for x in model.graph.value_info + if x.name not in initializers_names + }, + } + # Extract only initializers from Conv, Mul and Add nodes (which are the + # only ones relevant for conversion) + all_tensors = { + **all_tensors, + **{ + x.name: [x.data_type, x.dims] + for x in model.graph.initializer + if model.find_consumers(x.name)[0].op_type in nodes_with_initializers + }, + } + # Extract the outputs + all_tensors = { + **all_tensors, + **{ + x.name: [x.type.tensor_type.elem_type, model.get_tensor_shape(x.name)] + for x in model.graph.output + }, + } + + # The list below contains tensor names that are the output of nodes that + # reduce the tensor's dimension. The shape of these tensors also needs + # to be extended + tensors_reduced_dimension = [] + for n in model.graph.node: + node_op_type = n.op_type + # Find tensors that are the output of nodes that reduce the dimension + if node_op_type == "ArgMax": + keep_dims = get_by_name(n.attribute, "keepdims", "name").i + if keep_dims == 0: + node_out = n.output + for n_o in node_out: + tensors_reduced_dimension.append(n_o) + # Each node from the list of supported nodes is made compatible + # with 4D tensors + if node_op_type == "Transpose": + perm = get_by_name(n.attribute, "perm", "name").ints + if ( + len(perm) == 3 + ): # Meaning that the transpose operation was on a 3D tensor + perm.append(3) # append 4th dimension + elif node_op_type == "ArgMax" or node_op_type == "LogSoftMax": + axis = get_by_name(n.attribute, "axis", "name") + if axis.i == -1: + axis.i = 2 # argmax is now on the second-to-last axis + elif node_op_type == "Conv": + dilations = get_by_name(n.attribute, "dilations", "name").ints + kernel_shape = get_by_name(n.attribute, "kernel_shape", "name").ints + pads = get_by_name(n.attribute, "pads", "name").ints + strides = get_by_name(n.attribute, "strides", "name").ints + if len(dilations) == 1: # we must add another dimension to it + dilations.append( + 1 + ) # only equal dilation value along each spatial axis is supported + if len(kernel_shape) == 1: # we must add another dimension to it + kernel_shape.append(1) + if ( + len(pads) == 2 + ): # pads = [x1_begin, x1_end] --> [x1_begin, x2_begin, x1_end, x2_end] + pads.insert(1, 0) + pads.append(0) + if len(strides) == 1: # strides = [stride_h, stride_w] + strides.append(1) + + # Change format of each input/value_info/output tensor + for k, v in all_tensors.items(): + tensor_type = v[0] + shape = v[1] + # Add extra dimension for tensors that either: + # 1) Have 3 dimensions ( (N,C,H) -> (N,C,H,1) ) + # 2) Come after operations that reduce their dimension: e.g. {Argmax, ...} + if len(shape) == 3 or k in tensors_reduced_dimension: + shape.append(1) + model.set_tensor_shape(k, shape, tensor_type) + + return (model, graph_modified) diff --git a/src/finn/transformation/create_generic_partitions.py b/src/finn/transformation/create_generic_partitions.py new file mode 100755 index 0000000..67da854 --- /dev/null +++ b/src/finn/transformation/create_generic_partitions.py @@ -0,0 +1,232 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import copy +import pathlib +from onnx import helper + +from finn.transformation.base import Transformation +from finn.util.basic import make_build_dir + + +class PartitionFromLambda(Transformation): + """Split a graph into partitions. Each resulting partition node has a model + attribute indicating the path to the subordinate onnx file. + Cleanup and InferShapes() transformations should be applied first. + + Argument 0: partitioning + * Function performing the mapping: node -> partition_id (int or string) + * Partitions may not cover the graph completely (nodes mapped to -1 are retained) + * Mapping must return -1 for GenericPartition nodes + + Argument 1 (optional): partition_dir + * Manually define where to save the partition models + """ + + def __init__(self, partitioning=lambda node: -1, partition_dir=None): + super().__init__() + self.partitioning = partitioning + self.partition_dir = partition_dir + + def apply(self, model): + # identify partitions to create + original_nodes = list(model.graph.node) + partition_ids = set(list(map(self.partitioning, original_nodes))) + partition_ids.discard(-1) + + # prepare dir for generated .onnx models + if self.partition_dir is None: + self.partition_dir = make_build_dir("partitioning_") + else: + pathlib.Path(self.partition_dir).mkdir(parents=True, exist_ok=True) + + for partition_id in partition_ids: + all_nodes = list(model.graph.node) + partition_nodes = list( + filter(lambda x: self.partitioning(x) == partition_id, all_nodes) + ) + non_partition_nodes = list( + filter(lambda x: x not in partition_nodes, all_nodes) + ) + + # partition the model into two models + p_model = copy.deepcopy(model) + non_p_model = model + # remove all non-partition nodes from the partition model + for node_to_remove in non_partition_nodes: + p_model.graph.node.remove(node_to_remove) + + # identify the entry and exit points for the partition part + p_in = [] + p_out = [] + p_start_ind = 0 + for node in p_model.graph.node: + for in_tensor in node.input: + # check if producer has been removed = lies outside the partition + has_initializer = in_tensor in [ + x.name for x in p_model.graph.initializer + ] + has_producer = p_model.find_producer(in_tensor) is not None + if not has_initializer and not has_producer: + # the same tensor could feed multiple nodes within the partition + # (e.g. for residual connections), so we avoid duplicates + if in_tensor not in p_in: + p_in.append(in_tensor) + # keep track of where this partition starts topologically + if p_start_ind == 0: + p_start_ind = all_nodes.index(node) + for out_tensor in node.output: + # check if tensor is top-level output + # or has a consumer outside the partition + if out_tensor in [x.name for x in model.graph.output]: + if out_tensor not in p_out: + p_out.append(out_tensor) + else: + for consumer in model.find_consumers(out_tensor): + if self.partitioning(consumer) != partition_id: + if out_tensor not in p_out: + p_out.append(out_tensor) + + p_in_vi = list(map(lambda x: p_model.get_tensor_valueinfo(x), p_in)) + p_out_vi = list(map(lambda x: p_model.get_tensor_valueinfo(x), p_out)) + + # check if partitioning is legal (i.e. creates no cycles) + to_check = [model.find_producer(x) for x in p_in] + while len(to_check) > 0: + next_to_check = [] + for node in to_check: + if node is not None: + assert ( + self.partitioning(node) != partition_id + ), """cycle-free graph violated: partition depends on itself""" + # print(node) + predecessors = model.find_direct_predecessors(node) + if predecessors is not None: + next_to_check.extend(predecessors) + to_check = next_to_check + + # set p graph in/out to be p_in/p_out + for x in p_model.graph.input: + p_model.graph.input.remove(x) + for i in p_in_vi: + p_model.graph.input.append(i) + + for x in p_model.graph.output: + p_model.graph.output.remove(x) + for o in p_out_vi: + p_model.graph.output.append(o) + + # remove redundant input and output value_info entries + for i in p_in_vi: + # the tensor can be both an input and value_info, so we also have to + # ensure that the tensor is not a relevant value_info before removing + if ( + i in p_model.graph.value_info + and p_model.find_producer(i.name) is None + ): + p_model.graph.value_info.remove(i) + + for o in p_out_vi: + # the tensor can both an output and value_info, so we also have to + # ensure that the tensor is not a relevant value_info before removing + if ( + o in p_model.graph.value_info + and p_model.find_consumers(o.name) is None + ): + p_model.graph.value_info.remove(o) + + # save partition model + p_model_filename = ( + self.partition_dir + "/partition_" + str(partition_id) + ".onnx" + ) + p_model.cleanup() + p_model.save(p_model_filename) + + # insert GenericPartition node + p_node = helper.make_node( + "GenericPartition", + p_in, + p_out, + name="GenericPartition_" + str(partition_id), + # use the model attribute to mark the partition model + model=p_model_filename, + domain="finn.custom_op.general", + ) + non_p_model.graph.node.insert(p_start_ind, p_node) + + # remove all partition nodes from the parent model + # do this after inserting the p_node for easier p_start_ind handling + for node_to_remove in partition_nodes: + non_p_model.graph.node.remove(node_to_remove) + + model = non_p_model + + return (model, False) + + +class PartitionFromDict(Transformation): + """Split a graph into partitions. Each resulting partition node has a model + attribute indicating the path to the subordinate onnx file. + Cleanup and InferShapes() transformations should be applied first. + + This transformation builds on PartitionFromLambda() and takes a dictionary that + defines partitions based on node indices. + + Argument 0: partitioning + * Dictionary with the following format: { partition_id : node_index_list } + * Example: {0 : [3,4,5], 1 : range(10, 15)} + + Argument 1 (optional): partition_dir + * Manually define where to save the partition models + """ + + def __init__(self, partitioning={}, partition_dir=None): + super().__init__() + self.partitioning = partitioning + self.partition_dir = partition_dir + + def apply(self, model): + # prepare node -> int assignment fct. + def partitioning_func(node): + partition_id = -1 + for key in self.partitioning: + if node in list(model.graph.node) and list(model.graph.node).index( + node + ) in list(self.partitioning[key]): + assert ( + partition_id == -1 + ), """single node assigned to multiple partitions""" + partition_id = key + + return partition_id + + # apply partitioning + model = model.transform( + PartitionFromLambda(partitioning_func, self.partition_dir) + ) + return (model, False) diff --git a/src/finn/transformation/extend_partition.py b/src/finn/transformation/extend_partition.py new file mode 100644 index 0000000..738c36c --- /dev/null +++ b/src/finn/transformation/extend_partition.py @@ -0,0 +1,99 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from finn.core.modelwrapper import ModelWrapper +from finn.transformation.base import Transformation +from finn.transformation.general import SortGraph +from finn.util.basic import get_by_name + + +class ExtendPartition(Transformation): + """Extends GenericPartition type nodes by inserting the graph pointed to by + the model attribute. + Argument 0: extend_index + * List that contains the node indices of the GenericPartition nodes + """ + + def __init__(self, extend_index): + super().__init__() + self.extend_index = extend_index + + def apply(self, model): + graph = model.graph + graph_modified = False + + partition_nodes_dict = { + ind: n + for ind, n in enumerate(graph.node) + if n.op_type == "GenericPartition" + } + + for k, v in partition_nodes_dict.items(): + if k in self.extend_index: + path_to_model = get_by_name(v.attribute, "model", "name").s.decode( + "utf-8" + ) + model_partition = ModelWrapper(path_to_model) + + # Append nodes + for partition_node in model_partition.graph.node: + graph.node.append(partition_node) + + # Append value infos + partition_valueinfos = [ + x.name for x in model_partition.graph.value_info + ] + for vi_name in partition_valueinfos: + vi = model_partition.get_tensor_valueinfo(vi_name) + graph.value_info.append(vi) + + # Append initializers + partition_initializers = [x for x in model_partition.graph.initializer] + for i in partition_initializers: + graph.initializer.append(i) + + # Append tensor annotations, except for the input/output tensors + # of the partitioned graph, as these will be present in the + # 'upper' model. + in_out_names = [x.name for x in model_partition.graph.input] + in_out_names += [x.name for x in model_partition.graph.output] + partition_annotations = [ + x + for x in model_partition.graph.quantization_annotation + if x.tensor_name not in in_out_names + ] + for a in partition_annotations: + graph.quantization_annotation.append(a) + + graph.node.remove(v) + graph_modified = True + + if graph_modified: + model = model.transform(SortGraph()) + + return (model, graph_modified) diff --git a/src/finn/transformation/infer_datatypes.py b/src/finn/transformation/infer_datatypes.py index 2b2e2a9..66d91ca 100644 --- a/src/finn/transformation/infer_datatypes.py +++ b/src/finn/transformation/infer_datatypes.py @@ -35,7 +35,14 @@ def _infer_node_datatype(model, node): """Infer output datatype(s) for a particular node. Returns True if any changes were made.""" - dt_identity_optypes = ["Reshape", "Transpose"] + dt_identity_optypes = [ + "Reshape", + "Transpose", + "Flatten", + "Slice", + "Gather", + "Identity", + ] idtypes = list(map(lambda x: model.get_tensor_datatype(x), node.input)) odtypes = list(map(lambda x: model.get_tensor_datatype(x), node.output)) op_type = node.op_type diff --git a/src/finn/transformation/lower_convs_to_matmul.py b/src/finn/transformation/lower_convs_to_matmul.py index c533bc2..111c3ea 100644 --- a/src/finn/transformation/lower_convs_to_matmul.py +++ b/src/finn/transformation/lower_convs_to_matmul.py @@ -30,20 +30,24 @@ from onnx import TensorProto, helper from finn.transformation.base import Transformation -from finn.transformation.infer_shapes import InferShapes from finn.util.basic import get_by_name -def _auto_pad_to_explicit_padding(autopad_str, idim, k, stride, n_dims): - pad_total = (stride - 1) * idim - stride + k - pad_half_small = int((pad_total / 2)) - pad_half_large = pad_total - pad_half_small +def _auto_pad_to_explicit_padding( + autopad_str, idim_h, idim_w, k_h, k_w, stride_h, stride_w, n_dims +): + pad_total_h = (stride_h - 1) * idim_h - stride_h + k_h + pad_total_w = (stride_w - 1) * idim_w - stride_w + k_w + pad_half_small_h = int((pad_total_h / 2)) + pad_half_small_w = int((pad_total_w / 2)) + pad_half_large_h = pad_total_h - pad_half_small_h + pad_half_large_w = pad_total_w - pad_half_small_w if autopad_str == "VALID": return [0 for i in range(2 * n_dims)] elif autopad_str == "SAME_UPPER": - return [pad_half_small, pad_half_large] * n_dims + return [pad_half_small_h, pad_half_small_w, pad_half_large_h, pad_half_large_w] elif autopad_str == "SAME_LOWER": - return [pad_half_large, pad_half_small] * n_dims + return [pad_half_large_h, pad_half_large_w, pad_half_small_h, pad_half_small_w] else: raise Exception("Unsupported auto_pad: " + autopad_str) @@ -59,21 +63,30 @@ def apply(self, model): for n in graph.node: node_ind += 1 if n.op_type == "Conv": - graph_modified = True cnv_input = n.input[0] cnv_output = n.output[0] idt = model.get_tensor_datatype(cnv_input) odt = model.get_tensor_datatype(cnv_output) # extract conv parameters - k = get_by_name(n.attribute, "kernel_shape").ints[-1] - stride = get_by_name(n.attribute, "strides").ints[-1] + k = get_by_name(n.attribute, "kernel_shape").ints + k_h = k[0] + k_w = k[1] + stride_h = get_by_name(n.attribute, "strides").ints[0] + stride_w = get_by_name(n.attribute, "strides").ints[1] group = get_by_name(n.attribute, "group").i weight_name = n.input[1] W_conv = model.get_initializer(weight_name) ifm_ch = model.get_tensor_shape(n.input[0])[1] # assume NCHW ofm_ch = model.get_tensor_shape(n.output[0])[1] # assume NCHW - ifm_dim = model.get_tensor_shape(n.input[0])[-1] # assume NCHW - ofm_dim = model.get_tensor_shape(n.output[0])[-1] # assume NCHW + ifm_dim_h = model.get_tensor_shape(n.input[0])[2] # assume NCHW + ifm_dim_w = model.get_tensor_shape(n.input[0])[3] + ofm_dim_h = model.get_tensor_shape(n.output[0])[2] # assume NCHW + ofm_dim_w = model.get_tensor_shape(n.output[0])[3] + dilation_attr = get_by_name(n.attribute, "dilations") + if dilation_attr is not None: + dilation = dilation_attr.ints + else: + dilation = [1, 1] # default value # handle both auto_pad and explicit padding auto_pad = get_by_name(n.attribute, "auto_pad") if auto_pad is not None: @@ -85,34 +98,42 @@ def apply(self, model): else: pad = _auto_pad_to_explicit_padding( auto_pad, - ifm_dim, - k, - stride, + ifm_dim_h, + ifm_dim_w, + k_h, + k_w, + stride_h, + stride_w, len(model.get_tensor_shape(n.input[0])) - 2, ) else: # use specified padding pad = get_by_name(n.attribute, "pads").ints - # ensure all pads are equal for now - assert ( - len(set(pad)) <= 1 - ), "Only all-equal padding supported for now: " + str(pad) - pad = pad[-1] + + # If len(pad) == 2, assume no padding for other dimension + if len(pad) == 2: # only one dimension should be padded + assert ( + ifm_dim_h == 1 or ifm_dim_w == 1 + ), "Padding is assumed to be 1D, image is 2D" # if depthwise conv create sparse matrix and variable "dw" # to store as attribute in Im2Col that indicates that the created # Im2Col node belongs to a depthwise convolution dw = False if group == ifm_ch and ofm_ch == ifm_ch: - W_sparse = np.zeros((ofm_ch, ifm_ch, k, k)) + W_sparse = np.zeros( + (ofm_ch, ifm_ch, k_h, k_w) + ) # (OFM, IFM, k_H, k_W) for ch in range(ifm_ch): - W_sparse[ch][ch] = W_conv[ch][0] + W_sparse[ch][ch] = W_conv[ch][ + 0 + ] # W_conv = [OFM, IFM, k_H, k_W] W_conv = W_sparse.astype(np.float32) # we need to store information of the # sparsity of the weight matrix. For this # we use the sparsity annotation of the # weight tensor - sparsity = {"dw": {"kernel_shape": k}} + sparsity = {"dw": {"kernel_shape": [k_h, k_w]}} model.set_tensor_sparsity(weight_name, sparsity) # additionally create variable "dw" to store # as attribute in Im2Col that indicates that the created @@ -123,9 +144,9 @@ def apply(self, model): # conv weights are [OFM][IFM][k][k] # first convert to [OFM][k][k][IFM] (to remain compatible with # finn-hlslib and how it does im2col/sliding window) - W_matmul = W_conv.transpose(0, 2, 3, 1) + W_matmul = W_conv.transpose(0, 2, 3, 1) # W_conv = [OFM, IFM, k_H, k_W] # reshape into [OFM][k*k*IFM] matrix - W_matmul = W_matmul.reshape(ofm_ch, ifm_ch * k * k) + W_matmul = W_matmul.reshape(ofm_ch, ifm_ch * k_h * k_w) # transpose to get ONNX-compatible [k*k*IFM][OFM] matrix W_matmul = W_matmul.T model.set_initializer(weight_name, W_matmul) @@ -134,21 +155,31 @@ def apply(self, model): inp_trans_out = helper.make_tensor_value_info( model.make_new_valueinfo_name(), TensorProto.FLOAT, - (1, ifm_dim, ifm_dim, ifm_ch), # NHWC + (1, ifm_dim_h, ifm_dim_w, ifm_ch), # NHWC ) graph.value_info.append(inp_trans_out) inp_trans_out = inp_trans_out.name model.set_tensor_datatype(inp_trans_out, idt) need_im2col = True - if k == 1 and pad == 0 and stride == 1: + if all(p == 0 for p in pad): + padding = 0 + + # k_h=k_w==1: pointwise convolution, thus no im2col needed + if ( + k_h == 1 + and k_w == 1 + and padding == 0 + and stride_h == 1 + and stride_w == 1 + ): need_im2col = False if need_im2col: im2col_out = helper.make_tensor_value_info( model.make_new_valueinfo_name(), TensorProto.FLOAT, - (1, ofm_dim, ofm_dim, ifm_ch * k * k), + (1, ofm_dim_h, ofm_dim_w, ifm_ch * k_h * k_w), ) graph.value_info.append(im2col_out) im2col_out = im2col_out.name @@ -157,7 +188,7 @@ def apply(self, model): matmul_out = helper.make_tensor_value_info( model.make_new_valueinfo_name(), TensorProto.FLOAT, - (1, ofm_dim, ofm_dim, ofm_ch), + (1, ofm_dim_h, ofm_dim_w, ofm_ch), ) graph.value_info.append(matmul_out) matmul_out = matmul_out.name @@ -177,11 +208,12 @@ def apply(self, model): [inp_trans_out], [im2col_out], domain="finn.custom_op.general", - stride=stride, - kernel_size=k, + stride=[stride_h, stride_w], + kernel_size=[k_h, k_w], pad_amount=pad, - input_shape="(1,{},{},{})".format(ifm_dim, ifm_dim, ifm_ch), + input_shape="(1,{},{},{})".format(ifm_dim_h, ifm_dim_w, ifm_ch), depthwise=dw, + dilations=dilation, ) # do matmul @@ -203,5 +235,5 @@ def apply(self, model): graph.node.insert(node_ind + 2, out_trans_node) # remove old nodes graph.node.remove(n) - model = model.transform(InferShapes()) + return (model, graph_modified) diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index 3781470..cacb5d4 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -44,6 +44,7 @@ pynq_part_map["Pynq-Z2"] = "xc7z020clg400-1" pynq_part_map["ZCU102"] = "xczu9eg-ffvb1156-2-e" pynq_part_map["ZCU104"] = "xczu7ev-ffvc1156-2-e" +pynq_part_map["ZCU111"] = "xczu28dr-ffvg1517-2-e" # native AXI HP port width (in bits) for PYNQ boards pynq_native_port_width = dict() @@ -52,6 +53,7 @@ pynq_native_port_width["Ultra96"] = 128 pynq_native_port_width["ZCU102"] = 128 pynq_native_port_width["ZCU104"] = 128 +pynq_native_port_width["ZCU111"] = 128 # Alveo device and platform mappings alveo_part_map = dict() diff --git a/src/finn/util/data_packing.py b/src/finn/util/data_packing.py index d75f860..500cae8 100644 --- a/src/finn/util/data_packing.py +++ b/src/finn/util/data_packing.py @@ -319,18 +319,37 @@ def finnpy_to_packed_bytearray( of 8 bits. The returned ndarray has the same number of dimensions as the input. - If fast_mode is enabled, will attempt to use shortcuts (casting) to save - on runtime for certain cases. - This mode is currently not well-tested, use at your own risk. + If fast_mode is enabled, will attempt to use shortcuts to save + on runtime for certain cases: + * 8-bit ndarray -> 8-bit + * ndarray -> 1-bit and total bits % 8 == 0 + This mode is currently not well-tested, use at your own risk! """ - # handle no-packing cases (if fast_mode) via casting to save on compute + # handle fast_mode cases (currently only called from driver): if issubclass(type(ndarray), np.ndarray) and fast_mode: inp_is_byte = ndarray.dtype in [np.uint8, np.int8] out_is_byte = dtype.bitwidth() == 8 double_reverse = reverse_inner and reverse_endian + # fast mode case: byte -> byte: cast if inp_is_byte and out_is_byte and double_reverse: return ndarray.view(np.uint8) + # fast mode case: xxx -> bit with nbits % 8 == 0: np.packbits + out_is_bit = dtype.bitwidth() == 1 + bits = dtype.bitwidth() * ndarray.shape[-1] + bits_padded = roundup_to_integer_multiple(bits, 8) + no_pad = bits_padded == bits + if out_is_bit and no_pad and double_reverse: + in_as_int8 = ndarray.astype(np.int8) + # bipolar -> binary if needed + if dtype == DataType.BIPOLAR: + in_as_int8 = (in_as_int8 + 1) // 2 + # reverse inner + in_as_int8 = np.flip(in_as_int8, axis=-1) + # pack with numpy + packed_data = np.packbits(in_as_int8, axis=-1) + # reverse endianness and return + return np.flip(packed_data, axis=-1) if (not issubclass(type(ndarray), np.ndarray)) or ndarray.dtype != np.float32: # try to convert to a float numpy array (container dtype is float) diff --git a/src/finn/util/fpgadataflow.py b/src/finn/util/fpgadataflow.py index d3f741f..9f521f4 100644 --- a/src/finn/util/fpgadataflow.py +++ b/src/finn/util/fpgadataflow.py @@ -26,127 +26,7 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import os -import subprocess - -from finn.util.basic import ( - get_by_name, - get_rtlsim_trace_depth, - is_finn_op, - make_build_dir, - which, -) - -try: - from pyverilator import PyVerilator -except ModuleNotFoundError: - PyVerilator = None - - -class IPGenBuilder: - """Builds the bash script to generate IP blocks using Vivado HLS.""" - - def __init__(self): - self.tcl_script = "" - self.ipgen_path = "" - self.code_gen_dir = "" - self.ipgen_script = "" - - def append_tcl(self, tcl_script): - """Sets member variable "tcl_script" to given tcl script.""" - self.tcl_script = tcl_script - - def set_ipgen_path(self, path): - """Sets member variable ipgen_path to given path.""" - self.ipgen_path = path - - def build(self, code_gen_dir): - """Builds the bash script with given parameters and saves it in given folder. - To guarantee the generation in the correct folder the bash script contains a - cd command.""" - assert which("vivado_hls") is not None, "vivado_hls not found in PATH" - self.code_gen_dir = code_gen_dir - self.ipgen_script = str(self.code_gen_dir) + "/ipgen.sh" - working_dir = os.environ["PWD"] - f = open(self.ipgen_script, "w") - f.write("#!/bin/bash \n") - f.write("cd {}\n".format(code_gen_dir)) - f.write("vivado_hls {}\n".format(self.tcl_script)) - f.write("cd {}\n".format(working_dir)) - f.close() - bash_command = ["bash", self.ipgen_script] - process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) - process_compile.communicate() - - -def pyverilate_stitched_ip(model, read_internal_signals=True): - """Given a model with stitched IP, return a PyVerilator sim object. - If read_internal_signals is True, it will be possible to examine the - internal (not only port) signals of the Verilog module, but this may - slow down compilation and emulation. - Trace depth is also controllable, see get_rtlsim_trace_depth() - """ - if PyVerilator is None: - raise ImportError("Installation of PyVerilator is required.") - - vivado_stitch_proj_dir = model.get_metadata_prop("vivado_stitch_proj") - with open(vivado_stitch_proj_dir + "/all_verilog_srcs.txt", "r") as f: - all_verilog_srcs = f.read().split() - - def file_to_dir(x): - return os.path.dirname(os.path.realpath(x)) - - def file_to_basename(x): - return os.path.basename(os.path.realpath(x)) - - top_module_file_name = file_to_basename(model.get_metadata_prop("wrapper_filename")) - top_module_name = top_module_file_name.strip(".v") - build_dir = make_build_dir("pyverilator_ipstitched_") - - # dump all Verilog code to a single file - # this is because large models with many files require - # a verilator command line too long for bash on most systems - # NOTE: there are duplicates in this list, and some files - # are identical but in multiple directories (regslice_core.v) - - # remove duplicates from list by doing list -> set -> list - all_verilog_files = list(set(filter(lambda x: x.endswith(".v"), all_verilog_srcs))) - - # remove all but one instances of regslice_core.v - filtered_verilog_files = [] - remove_entry = False - for vfile in all_verilog_files: - if "regslice_core" in vfile: - if not remove_entry: - filtered_verilog_files.append(vfile) - remove_entry = True - else: - filtered_verilog_files.append(vfile) - - # concatenate all verilog code into a single file - with open(vivado_stitch_proj_dir + "/" + top_module_file_name, "w") as wf: - for vfile in filtered_verilog_files: - with open(vfile) as rf: - wf.write("//Added from " + vfile + "\n\n") - wf.write(rf.read()) - - sim = PyVerilator.build( - top_module_file_name, - verilog_path=[vivado_stitch_proj_dir], - build_dir=build_dir, - trace_depth=get_rtlsim_trace_depth(), - top_module_name=top_module_name, - auto_eval=False, - read_internal_signals=read_internal_signals, - ) - return sim - - -def pyverilate_get_liveness_threshold_cycles(): - """Return the number of no-output cycles rtlsim will wait before assuming - the simulation is not finishing and throwing an exception.""" - - return int(os.getenv("LIVENESS_THRESHOLD", 10000)) +from finn.util.basic import get_by_name, is_finn_op def is_fpgadataflow_node(node): @@ -161,93 +41,3 @@ def is_fpgadataflow_node(node): is_node = True return is_node - - -def rtlsim_multi_io(sim, io_dict, num_out_values, trace_file=""): - """Runs the pyverilator simulation by passing the input values to the simulation, - toggle the clock and observing the execution time. Function contains also an - observation loop that can abort the simulation if no output value is produced - after a set number of cycles. Can handle multiple i/o streams. See function - implementation for details on how the top-level signals should be named. - - Arguments: - - * sim: the PyVerilator object for simulation - * io_dict: a dict of dicts in the following format: - {"inputs" : {"in0" : , "in1" : }, - "outputs" : {"out0" : [], "out1" : []} } - is a list of Python arbitrary-precision ints indicating - what data to push into the simulation, and the output lists are - similarly filled when the simulation is complete - * num_out_values: number of total values to be read from the simulation to - finish the simulation and return. - - Returns: number of clock cycles elapsed for completion - - """ - - if trace_file != "": - sim.start_vcd_trace(trace_file) - - for outp in io_dict["outputs"]: - sim.io[outp + "_V_V_TREADY"] = 1 - - # observe if output is completely calculated - # total_cycle_count will contain the number of cycles the calculation ran - output_done = False - total_cycle_count = 0 - output_count = 0 - old_output_count = 0 - - # avoid infinite looping of simulation by aborting when there is no change in - # output values after 100 cycles - no_change_count = 0 - liveness_threshold = pyverilate_get_liveness_threshold_cycles() - - while not (output_done): - for inp in io_dict["inputs"]: - inputs = io_dict["inputs"][inp] - sim.io[inp + "_V_V_TVALID"] = 1 if len(inputs) > 0 else 0 - sim.io[inp + "_V_V_TDATA"] = inputs[0] if len(inputs) > 0 else 0 - if sim.io[inp + "_V_V_TREADY"] == 1 and sim.io[inp + "_V_V_TVALID"] == 1: - inputs = inputs[1:] - io_dict["inputs"][inp] = inputs - - for outp in io_dict["outputs"]: - outputs = io_dict["outputs"][outp] - if sim.io[outp + "_V_V_TVALID"] == 1 and sim.io[outp + "_V_V_TREADY"] == 1: - outputs = outputs + [sim.io[outp + "_V_V_TDATA"]] - output_count += 1 - io_dict["outputs"][outp] = outputs - - sim.io.ap_clk = 1 - sim.io.ap_clk = 0 - - total_cycle_count = total_cycle_count + 1 - - if output_count == old_output_count: - no_change_count = no_change_count + 1 - else: - no_change_count = 0 - old_output_count = output_count - - # check if all expected output words received - if output_count == num_out_values: - output_done = True - - # end sim on timeout - if no_change_count == liveness_threshold: - if trace_file != "": - sim.flush_vcd_trace() - sim.stop_vcd_trace() - raise Exception( - "Error in simulation! Takes too long to produce output. " - "Consider setting the LIVENESS_THRESHOLD env.var. to a " - "larger value." - ) - - if trace_file != "": - sim.flush_vcd_trace() - sim.stop_vcd_trace() - - return total_cycle_count diff --git a/src/finn/util/hls.py b/src/finn/util/hls.py new file mode 100644 index 0000000..fb23af0 --- /dev/null +++ b/src/finn/util/hls.py @@ -0,0 +1,74 @@ +# Copyright (c) 2021 Xilinx, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of Xilinx nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +import os +import subprocess + +from finn.util.basic import which + + +class CallHLS: + """Call either vivado_hls or vitis_hls to run HLS build tcl scripts.""" + + def __init__(self, backend="vivado_hls"): + self.tcl_script = "" + self.ipgen_path = "" + self.code_gen_dir = "" + self.ipgen_script = "" + assert backend in [ + "vivado_hls", + "vitis_hls", + ], "Unrecognized backend for CallHLS" + self.backend = backend + + def append_tcl(self, tcl_script): + """Sets the tcl script to be executed.""" + self.tcl_script = tcl_script + + def set_ipgen_path(self, path): + """Sets member variable ipgen_path to given path.""" + self.ipgen_path = path + + def build(self, code_gen_dir): + """Builds the bash script with given parameters and saves it in given folder. + To guarantee the generation in the correct folder the bash script contains a + cd command.""" + assert which(self.backend) is not None, "%s not found in PATH" % self.backend + self.code_gen_dir = code_gen_dir + self.ipgen_script = str(self.code_gen_dir) + "/ipgen.sh" + working_dir = os.environ["PWD"] + f = open(self.ipgen_script, "w") + f.write("#!/bin/bash \n") + f.write("cd {}\n".format(code_gen_dir)) + f.write("%s %s\n" % (self.backend, self.tcl_script)) + f.write("cd {}\n".format(working_dir)) + f.close() + bash_command = ["bash", self.ipgen_script] + process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) + process_compile.communicate() diff --git a/src/finn/util/pyverilator.py b/src/finn/util/pyverilator.py index fb022c1..b598a4a 100644 --- a/src/finn/util/pyverilator.py +++ b/src/finn/util/pyverilator.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, Xilinx +# Copyright (c) 2021, Xilinx # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -26,6 +26,183 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import os + +from finn.util.basic import get_rtlsim_trace_depth, make_build_dir + +try: + from pyverilator import PyVerilator +except ModuleNotFoundError: + PyVerilator = None + + +def pyverilate_get_liveness_threshold_cycles(): + """Return the number of no-output cycles rtlsim will wait before assuming + the simulation is not finishing and throwing an exception.""" + + return int(os.getenv("LIVENESS_THRESHOLD", 10000)) + + +def rtlsim_multi_io(sim, io_dict, num_out_values, trace_file="", sname="_V_V_"): + """Runs the pyverilator simulation by passing the input values to the simulation, + toggle the clock and observing the execution time. Function contains also an + observation loop that can abort the simulation if no output value is produced + after a set number of cycles. Can handle multiple i/o streams. See function + implementation for details on how the top-level signals should be named. + + Arguments: + + * sim: the PyVerilator object for simulation + * io_dict: a dict of dicts in the following format: + {"inputs" : {"in0" : , "in1" : }, + "outputs" : {"out0" : [], "out1" : []} } + is a list of Python arbitrary-precision ints indicating + what data to push into the simulation, and the output lists are + similarly filled when the simulation is complete + * num_out_values: number of total values to be read from the simulation to + finish the simulation and return. + * trace_file: vcd dump filename, empty string (no vcd dump) by default + * sname: signal naming for streams, "_V_V_" by default, vitis_hls uses "_V_" + + Returns: number of clock cycles elapsed for completion + + """ + + if trace_file != "": + sim.start_vcd_trace(trace_file) + + for outp in io_dict["outputs"]: + sim.io[outp + sname + "TREADY"] = 1 + + # observe if output is completely calculated + # total_cycle_count will contain the number of cycles the calculation ran + output_done = False + total_cycle_count = 0 + output_count = 0 + old_output_count = 0 + + # avoid infinite looping of simulation by aborting when there is no change in + # output values after 100 cycles + no_change_count = 0 + liveness_threshold = pyverilate_get_liveness_threshold_cycles() + + while not (output_done): + for inp in io_dict["inputs"]: + inputs = io_dict["inputs"][inp] + sim.io[inp + sname + "TVALID"] = 1 if len(inputs) > 0 else 0 + sim.io[inp + sname + "TDATA"] = inputs[0] if len(inputs) > 0 else 0 + if ( + sim.io[inp + sname + "TREADY"] == 1 + and sim.io[inp + sname + "TVALID"] == 1 + ): + inputs = inputs[1:] + io_dict["inputs"][inp] = inputs + + for outp in io_dict["outputs"]: + outputs = io_dict["outputs"][outp] + if ( + sim.io[outp + sname + "TVALID"] == 1 + and sim.io[outp + sname + "TREADY"] == 1 + ): + outputs = outputs + [sim.io[outp + sname + "TDATA"]] + output_count += 1 + io_dict["outputs"][outp] = outputs + + sim.io.ap_clk = 1 + sim.io.ap_clk = 0 + + total_cycle_count = total_cycle_count + 1 + + if output_count == old_output_count: + no_change_count = no_change_count + 1 + else: + no_change_count = 0 + old_output_count = output_count + + # check if all expected output words received + if output_count == num_out_values: + output_done = True + + # end sim on timeout + if no_change_count == liveness_threshold: + if trace_file != "": + sim.flush_vcd_trace() + sim.stop_vcd_trace() + raise Exception( + "Error in simulation! Takes too long to produce output. " + "Consider setting the LIVENESS_THRESHOLD env.var. to a " + "larger value." + ) + + if trace_file != "": + sim.flush_vcd_trace() + sim.stop_vcd_trace() + + return total_cycle_count + + +def pyverilate_stitched_ip(model, read_internal_signals=True): + """Given a model with stitched IP, return a PyVerilator sim object. + If read_internal_signals is True, it will be possible to examine the + internal (not only port) signals of the Verilog module, but this may + slow down compilation and emulation. + Trace depth is also controllable, see get_rtlsim_trace_depth() + """ + if PyVerilator is None: + raise ImportError("Installation of PyVerilator is required.") + + vivado_stitch_proj_dir = model.get_metadata_prop("vivado_stitch_proj") + with open(vivado_stitch_proj_dir + "/all_verilog_srcs.txt", "r") as f: + all_verilog_srcs = f.read().split() + + def file_to_dir(x): + return os.path.dirname(os.path.realpath(x)) + + def file_to_basename(x): + return os.path.basename(os.path.realpath(x)) + + top_module_file_name = file_to_basename(model.get_metadata_prop("wrapper_filename")) + top_module_name = top_module_file_name.strip(".v") + build_dir = make_build_dir("pyverilator_ipstitched_") + + # dump all Verilog code to a single file + # this is because large models with many files require + # a verilator command line too long for bash on most systems + # NOTE: there are duplicates in this list, and some files + # are identical but in multiple directories (regslice_core.v) + + # remove duplicates from list by doing list -> set -> list + all_verilog_files = list(set(filter(lambda x: x.endswith(".v"), all_verilog_srcs))) + + # remove all but one instances of regslice_core.v + filtered_verilog_files = [] + remove_entry = False + for vfile in all_verilog_files: + if "regslice_core" in vfile: + if not remove_entry: + filtered_verilog_files.append(vfile) + remove_entry = True + else: + filtered_verilog_files.append(vfile) + + # concatenate all verilog code into a single file + with open(vivado_stitch_proj_dir + "/" + top_module_file_name, "w") as wf: + for vfile in filtered_verilog_files: + with open(vfile) as rf: + wf.write("//Added from " + vfile + "\n\n") + wf.write(rf.read()) + + sim = PyVerilator.build( + top_module_file_name, + verilog_path=[vivado_stitch_proj_dir], + build_dir=build_dir, + trace_depth=get_rtlsim_trace_depth(), + top_module_name=top_module_name, + auto_eval=False, + read_internal_signals=read_internal_signals, + ) + return sim + def _find_signal(sim, signal_name): # handle both mixed caps and lowercase signal names diff --git a/tests/core/test_modelwrapper.py b/tests/core/test_modelwrapper.py index dd7891a..0cd51c6 100644 --- a/tests/core/test_modelwrapper.py +++ b/tests/core/test_modelwrapper.py @@ -68,7 +68,7 @@ def test_modelwrapper(): assert model.get_tensor_layout(first_conv_iname) == inp_layout inp_sparsity = model.get_tensor_sparsity(first_conv_iname) assert inp_sparsity is None - inp_sparsity = {"dw": {"kernel_shape": 3}} + inp_sparsity = {"dw": {"kernel_shape": [3, 3]}} model.set_tensor_sparsity(first_conv_iname, inp_sparsity) assert model.get_tensor_sparsity(first_conv_iname) == inp_sparsity diff --git a/tests/custom_op/test_im2col.py b/tests/custom_op/test_im2col.py index ff255c6..3a26639 100644 --- a/tests/custom_op/test_im2col.py +++ b/tests/custom_op/test_im2col.py @@ -9,44 +9,49 @@ from finn.transformation.infer_shapes import InferShapes -def check_two_dict_for_equality(dict1, dict2): - for key in dict1: - assert key in dict2, "Key: {} is not in both dictionaries".format(key) - assert ( - dict1[key] == dict2[key] - ), """Values for key {} are not the same - in both dictionaries""".format( - key - ) - - return True - - -def execution_im2col(x, idt, k, stride, ifm_ch, ifm_dim, pad_amt=0, pad_val=0): - ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad_amt) +def execution_im2col( + x, + idt, + k_h, + k_w, + stride_h, + stride_w, + ifm_ch, + ifm_dim_h, + ifm_dim_w, + pad_amt, + pad_val=0, + dilation_h=1, + dilation_w=1, +): + pad_amt_h = pad_amt[0] + pad_amt[2] + pad_amt_w = pad_amt[1] + pad_amt[3] + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, pad_amt_h, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, pad_amt_w, dilation_w) # set up onnx model inp = helper.make_tensor_value_info( - "inp", TensorProto.FLOAT, [1, ifm_dim, ifm_dim, ifm_ch] + "inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch] ) outp = helper.make_tensor_value_info( - "outp", TensorProto.FLOAT, [1, ofm_dim, ofm_dim, k * k * ifm_ch] + "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch] ) - Im2Col_node = helper.make_node( + im2col_node = helper.make_node( "Im2Col", ["inp"], ["outp"], domain="finn.custom_op.general", - stride=stride, - kernel_size=k, + stride=[stride_h, stride_w], + kernel_size=[k_h, k_w], pad_amount=pad_amt, pad_value=pad_val, - input_shape="(1,{},{},{})".format(ifm_dim, ifm_dim, ifm_ch), + input_shape="(1,{},{},{})".format(ifm_dim_h, ifm_dim_w, ifm_ch), + dilations=[dilation_h, dilation_w], ) graph = helper.make_graph( - nodes=[Im2Col_node], name="im2col_graph", inputs=[inp], outputs=[outp] + nodes=[im2col_node], name="im2col_graph", inputs=[inp], outputs=[outp] ) model = helper.make_model(graph, producer_name="im2col-model") @@ -56,7 +61,12 @@ def execution_im2col(x, idt, k, stride, ifm_ch, ifm_dim, pad_amt=0, pad_val=0): # test shape inference model.transform(InferShapes()) - assert model.get_tensor_shape("outp") == [1, ofm_dim, ofm_dim, k * k * ifm_ch] + assert model.get_tensor_shape("outp") == [ + 1, + ofm_dim_h, + ofm_dim_w, + k_h * k_w * ifm_ch, + ] # test datatype inference assert model.get_tensor_datatype("outp") is DataType.FLOAT32 @@ -72,16 +82,951 @@ def execution_im2col(x, idt, k, stride, ifm_ch, ifm_dim, pad_amt=0, pad_val=0): return y_produced +# Configurations tested: +# case id | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | +# idt | Bipolar | INT8 | INT8 | INT8 | INT8 | INT8 | INT8 | INT8 | INT8 | INT8 | +# ifm_dim_H | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | +# ifm_dim_W | 5 | 5 | 5 | 5 | 5 | 1 | 1 | 1 | 1 | 1 | +# ifm_ch | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | +# pad_amt | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 1 | 1 | 1 | +# pad_val | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +# k_H | 2 | 2 | 2 | 2 | 3 | 2 | 2 | 2 | 2 | 3 | +# k_W | 2 | 2 | 2 | 2 | 3 | 1 | 1 | 1 | 1 | 1 | +# stride_h | 1 | 1 | 1 | 2 | 2 | 1 | 1 | 1 | 2 | 2 | +# stride_w | 1 | 1 | 1 | 2 | 2 | 1 | 1 | 1 | 2 | 2 | +# dilation_h | 1 | 2 | 2 | 2 | 2 | 1 | 2 | 2 | 2 | 2 | +# dilation_w | 1 | 2 | 2 | 2 | 2 | 1 | 2 | 2 | 2 | 2 | +# ------------------------------------------------------------------------------ +# case id | 10 | 11 | 12 | 13 +# idt | INT8 | INT8 | INT8 | INT8 +# ifm_dim_H | 5 | 5 | 5 | 4 +# ifm_dim_W | 5 | 1 | 5 | 5 +# ifm_ch | 2 | 2 | 2 | 2 +# pad_amt | 1 | 1 | 1 | 1 +# pad_val | 0 | 0 | 0 | 0 +# k_H | 2 | 2 | 2 | 1 +# k_W | 2 | 1 | 2 | 2 +# stride_h | 1 | 2 | 1 | 1 +# stride_w | 2 | 1 | 2 | 2 +# dilation_h | 2 | 2 | 1 | 1 +# dilation_w | 2 | 2 | 2 | 2 +def test_im2col_dilations(): + case_id = 0 + idt = DataType.INT8 + k_H = 2 + k_W = 2 + stride_h = 1 + stride_w = 1 + ifm_ch = 2 + ifm_dim_H = 5 + ifm_dim_W = 5 + pad_amt = [0, 0, 0, 0] + pad_val = 0 + dilation_h = 1 + dilation_w = 1 + + x = np.asarray( + [ + [ + [[1, -1], [2, -2], [3, -3], [4, -4], [5, -5]], + [[6, -6], [7, -7], [8, -8], [9, -9], [10, -10]], + [[11, -11], [12, -12], [13, -13], [14, -14], [15, -15]], + [[16, -16], [17, -17], [18, -18], [19, -19], [20, -20]], + [[21, -21], [22, -22], [23, -23], [24, -24], [25, -25]], + ] + ], + dtype=np.float32, + ) + + expected = np.asarray( + [ + [ + [ + [1, -1, 2, -2, 6, -6, 7, -7], + [2, -2, 3, -3, 7, -7, 8, -8], + [3, -3, 4, -4, 8, -8, 9, -9], + [4, -4, 5, -5, 9, -9, 10, -10], + ], + [ + [6, -6, 7, -7, 11, -11, 12, -12], + [7, -7, 8, -8, 12, -12, 13, -13], + [8, -8, 9, -9, 13, -13, 14, -14], + [9, -9, 10, -10, 14, -14, 15, -15], + ], + [ + [11, -11, 12, -12, 16, -16, 17, -17], + [12, -12, 13, -13, 17, -17, 18, -18], + [13, -13, 14, -14, 18, -18, 19, -19], + [14, -14, 15, -15, 19, -19, 20, -20], + ], + [ + [16, -16, 17, -17, 21, -21, 22, -22], + [17, -17, 18, -18, 22, -22, 23, -23], + [18, -18, 19, -19, 23, -23, 24, -24], + [19, -19, 20, -20, 24, -24, 25, -25], + ], + ] + ], + dtype=np.float32, + ) + + produced = execution_im2col( + x, + idt, + k_H, + k_W, + stride_h, + stride_w, + ifm_ch, + ifm_dim_H, + ifm_dim_W, + pad_amt, + pad_val, + dilation_h, + dilation_w, + ) + + assert (produced == expected).all(), "Test failed for case number {}".format( + case_id + ) + + case_id = 1 + idt = DataType.INT8 + k_H = 2 + k_W = 2 + stride_h = 1 + stride_w = 1 + ifm_ch = 2 + ifm_dim_H = 5 + ifm_dim_W = 5 + pad_amt = [0, 0, 0, 0] + pad_val = 0 + dilation_h = 2 + dilation_w = 2 + + x = np.asarray( + [ + [ + [[1, -1], [2, -2], [3, -3], [4, -4], [5, -5]], + [[6, -6], [7, -7], [8, -8], [9, -9], [10, -10]], + [[11, -11], [12, -12], [13, -13], [14, -14], [15, -15]], + [[16, -16], [17, -17], [18, -18], [19, -19], [20, -20]], + [[21, -21], [22, -22], [23, -23], [24, -24], [25, -25]], + ] + ], + dtype=np.float32, + ) + + expected = np.asarray( + [ + [ + [ + [1, -1, 3, -3, 11, -11, 13, -13], + [2, -2, 4, -4, 12, -12, 14, -14], + [3, -3, 5, -5, 13, -13, 15, -15], + ], + [ + [6, -6, 8, -8, 16, -16, 18, -18], + [7, -7, 9, -9, 17, -17, 19, -19], + [8, -8, 10, -10, 18, -18, 20, -20], + ], + [ + [11, -11, 13, -13, 21, -21, 23, -23], + [12, -12, 14, -14, 22, -22, 24, -24], + [13, -13, 15, -15, 23, -23, 25, -25], + ], + ] + ], + dtype=np.float32, + ) + + produced = execution_im2col( + x, + idt, + k_H, + k_W, + stride_h, + stride_w, + ifm_ch, + ifm_dim_H, + ifm_dim_W, + pad_amt, + pad_val, + dilation_h, + dilation_w, + ) + + assert (produced == expected).all(), "Test failed for case number {}".format( + case_id + ) + + case_id = 2 + idt = DataType.INT8 + k_H = 2 + k_W = 2 + stride_h = 1 + stride_w = 1 + ifm_ch = 2 + ifm_dim_H = 5 + ifm_dim_W = 5 + pad_amt = [1, 1, 1, 1] + pad_val = 0 + dilation_h = 2 + dilation_w = 2 + + x = np.asarray( + [ + [ + [[1, -1], [2, -2], [3, -3], [4, -4], [5, -5]], + [[6, -6], [7, -7], [8, -8], [9, -9], [10, -10]], + [[11, -11], [12, -12], [13, -13], [14, -14], [15, -15]], + [[16, -16], [17, -17], [18, -18], [19, -19], [20, -20]], + [[21, -21], [22, -22], [23, -23], [24, -24], [25, -25]], + ] + ], + dtype=np.float32, + ) + + expected = np.asarray( + [ + [ + [ + [0, 0, 0, 0, 0, 0, 7, -7], + [0, 0, 0, 0, 6, -6, 8, -8], + [0, 0, 0, 0, 7, -7, 9, -9], + [0, 0, 0, 0, 8, -8, 10, -10], + [0, 0, 0, 0, 9, -9, 0, 0], + ], + [ + [0, 0, 2, -2, 0, 0, 12, -12], + [1, -1, 3, -3, 11, -11, 13, -13], + [2, -2, 4, -4, 12, -12, 14, -14], + [3, -3, 5, -5, 13, -13, 15, -15], + [4, -4, 0, 0, 14, -14, 0, 0], + ], + [ + [0, 0, 7, -7, 0, 0, 17, -17], + [6, -6, 8, -8, 16, -16, 18, -18], + [7, -7, 9, -9, 17, -17, 19, -19], + [8, -8, 10, -10, 18, -18, 20, -20], + [9, -9, 0, 0, 19, -19, 0, 0], + ], + [ + [0, 0, 12, -12, 0, 0, 22, -22], + [11, -11, 13, -13, 21, -21, 23, -23], + [12, -12, 14, -14, 22, -22, 24, -24], + [13, -13, 15, -15, 23, -23, 25, -25], + [14, -14, 0, 0, 24, -24, 0, 0], + ], + [ + [0, 0, 17, -17, 0, 0, 0, 0], + [16, -16, 18, -18, 0, 0, 0, 0], + [17, -17, 19, -19, 0, 0, 0, 0], + [18, -18, 20, -20, 0, 0, 0, 0], + [19, -19, 0, 0, 0, 0, 0, 0], + ], + ] + ], + dtype=np.float32, + ) + + produced = execution_im2col( + x, + idt, + k_H, + k_W, + stride_h, + stride_w, + ifm_ch, + ifm_dim_H, + ifm_dim_W, + pad_amt, + pad_val, + dilation_h, + dilation_w, + ) + + assert (produced == expected).all(), "Test failed for case number {}".format( + case_id + ) + + case_id = 3 + idt = DataType.INT8 + k_H = 2 + k_W = 2 + stride_h = 2 + stride_w = 2 + ifm_ch = 2 + ifm_dim_H = 5 + ifm_dim_W = 5 + pad_amt = [1, 1, 1, 1] + pad_val = 0 + dilation_h = 2 + dilation_w = 2 + + x = np.asarray( + [ + [ + [[1, -1], [2, -2], [3, -3], [4, -4], [5, -5]], + [[6, -6], [7, -7], [8, -8], [9, -9], [10, -10]], + [[11, -11], [12, -12], [13, -13], [14, -14], [15, -15]], + [[16, -16], [17, -17], [18, -18], [19, -19], [20, -20]], + [[21, -21], [22, -22], [23, -23], [24, -24], [25, -25]], + ] + ], + dtype=np.float32, + ) + + expected = np.asarray( + [ + [ + [ + [0, 0, 0, 0, 0, 0, 7, -7], + [0, 0, 0, 0, 7, -7, 9, -9], + [0, 0, 0, 0, 9, -9, 0, 0], + ], + [ + [0, 0, 7, -7, 0, 0, 17, -17], + [7, -7, 9, -9, 17, -17, 19, -19], + [9, -9, 0, 0, 19, -19, 0, 0], + ], + [ + [0, 0, 17, -17, 0, 0, 0, 0], + [17, -17, 19, -19, 0, 0, 0, 0], + [19, -19, 0, 0, 0, 0, 0, 0], + ], + ] + ], + dtype=np.float32, + ) + + produced = execution_im2col( + x, + idt, + k_H, + k_W, + stride_h, + stride_w, + ifm_ch, + ifm_dim_H, + ifm_dim_W, + pad_amt, + pad_val, + dilation_h, + dilation_w, + ) + + assert (produced == expected).all(), "Test failed for case number {}".format( + case_id + ) + + case_id = 4 + idt = DataType.INT8 + k_H = 3 + k_W = 3 + stride_h = 2 + stride_w = 2 + ifm_ch = 2 + ifm_dim_H = 5 + ifm_dim_W = 5 + pad_amt = [1, 1, 1, 1] + pad_val = 0 + dilation_h = 2 + dilation_w = 2 + + x = np.asarray( + [ + [ + [[1, -1], [2, -2], [3, -3], [4, -4], [5, -5]], + [[6, -6], [7, -7], [8, -8], [9, -9], [10, -10]], + [[11, -11], [12, -12], [13, -13], [14, -14], [15, -15]], + [[16, -16], [17, -17], [18, -18], [19, -19], [20, -20]], + [[21, -21], [22, -22], [23, -23], [24, -24], [25, -25]], + ] + ], + dtype=np.float32, + ) + + expected = np.asarray( + [ + [ + [ + [0, 0, 0, 0, 0, 0, 0, 0, 7, -7, 9, -9, 0, 0, 17, -17, 19, -19], + [0, 0, 0, 0, 0, 0, 7, -7, 9, -9, 0, 0, 17, -17, 19, -19, 0, 0], + ], + [ + [0, 0, 7, -7, 9, -9, 0, 0, 17, -17, 19, -19, 0, 0, 0, 0, 0, 0], + [7, -7, 9, -9, 0, 0, 17, -17, 19, -19, 0, 0, 0, 0, 0, 0, 0, 0], + ], + ] + ], + dtype=np.float32, + ) + + produced = execution_im2col( + x, + idt, + k_H, + k_W, + stride_h, + stride_w, + ifm_ch, + ifm_dim_H, + ifm_dim_W, + pad_amt, + pad_val, + dilation_h, + dilation_w, + ) + + assert (produced == expected).all(), "Test failed for case number {}".format( + case_id + ) + + case_id = 5 + idt = DataType.INT8 + k_H = 2 + k_W = 1 + stride_h = 1 + stride_w = 1 + ifm_ch = 2 + ifm_dim_H = 5 + ifm_dim_W = 1 + pad_amt = [0, 0, 0, 0] + pad_val = 0 + dilation_h = 1 + dilation_w = 1 + + x = np.asarray( + [[[[1, -1]], [[2, -2]], [[3, -3]], [[4, -4]], [[5, -5]]]], + dtype=np.float32, + ) + + expected = np.asarray( + [[[[1, -1, 2, -2]], [[2, -2, 3, -3]], [[3, -3, 4, -4]], [[4, -4, 5, -5]]]], + dtype=np.float32, + ) + + produced = execution_im2col( + x, + idt, + k_H, + k_W, + stride_h, + stride_w, + ifm_ch, + ifm_dim_H, + ifm_dim_W, + pad_amt, + pad_val, + dilation_h, + dilation_w, + ) + + assert (produced == expected).all(), "Test failed for case number {}".format( + case_id + ) + + case_id = 6 + idt = DataType.INT8 + k_H = 2 + k_W = 1 + stride_h = 1 + stride_w = 1 + ifm_ch = 2 + ifm_dim_H = 5 + ifm_dim_W = 1 + pad_amt = [0, 0, 0, 0] + pad_val = 0 + dilation_h = 2 + dilation_w = 2 + + x = np.asarray( + [[[[1, -1]], [[2, -2]], [[3, -3]], [[4, -4]], [[5, -5]]]], + dtype=np.float32, + ) + + expected = np.asarray( + [[[[1, -1, 3, -3]], [[2, -2, 4, -4]], [[3, -3, 5, -5]]]], + dtype=np.float32, + ) + + produced = execution_im2col( + x, + idt, + k_H, + k_W, + stride_h, + stride_w, + ifm_ch, + ifm_dim_H, + ifm_dim_W, + pad_amt, + pad_val, + dilation_h, + dilation_w, + ) + + assert (produced == expected).all(), "Test failed for case number {}".format( + case_id + ) + + case_id = 7 + idt = DataType.INT8 + k_H = 2 + k_W = 1 + stride_h = 1 + stride_w = 1 + ifm_ch = 2 + ifm_dim_H = 5 + ifm_dim_W = 1 + pad_amt = [1, 0, 1, 0] + pad_val = 0 + dilation_h = 2 + dilation_w = 2 + + x = np.asarray( + [[[[1, -1]], [[2, -2]], [[3, -3]], [[4, -4]], [[5, -5]]]], + dtype=np.float32, + ) + + expected = np.asarray( + [ + [ + [[0, 0, 2, -2]], + [[1, -1, 3, -3]], + [[2, -2, 4, -4]], + [[3, -3, 5, -5]], + [[4, -4, 0, 0]], + ] + ], + dtype=np.float32, + ) + + produced = execution_im2col( + x, + idt, + k_H, + k_W, + stride_h, + stride_w, + ifm_ch, + ifm_dim_H, + ifm_dim_W, + pad_amt, + pad_val, + dilation_h, + dilation_w, + ) + + assert (produced == expected).all(), "Test failed for case number {}".format( + case_id + ) + + case_id = 8 + idt = DataType.INT8 + k_H = 2 + k_W = 1 + stride_h = 2 + stride_w = 2 + ifm_ch = 2 + ifm_dim_H = 5 + ifm_dim_W = 1 + pad_amt = [1, 0, 1, 0] + pad_val = 0 + dilation_h = 2 + dilation_w = 2 + + x = np.asarray( + [[[[1, -1]], [[2, -2]], [[3, -3]], [[4, -4]], [[5, -5]]]], + dtype=np.float32, + ) + + expected = np.asarray( + [[[[0, 0, 2, -2]], [[2, -2, 4, -4]], [[4, -4, 0, 0]]]], + dtype=np.float32, + ) + + produced = execution_im2col( + x, + idt, + k_H, + k_W, + stride_h, + stride_w, + ifm_ch, + ifm_dim_H, + ifm_dim_W, + pad_amt, + pad_val, + dilation_h, + dilation_w, + ) + + assert (produced == expected).all(), "Test failed for case number {}".format( + case_id + ) + + case_id = 9 + idt = DataType.INT8 + k_H = 3 + k_W = 1 + stride_h = 2 + stride_w = 2 + ifm_ch = 2 + ifm_dim_H = 5 + ifm_dim_W = 1 + pad_amt = [1, 0, 1, 0] + pad_val = 0 + dilation_h = 2 + dilation_w = 2 + + x = np.asarray( + [[[[1, -1]], [[2, -2]], [[3, -3]], [[4, -4]], [[5, -5]]]], + dtype=np.float32, + ) + + expected = np.asarray( + [[[[0, 0, 2, -2, 4, -4]], [[2, -2, 4, -4, 0, 0]]]], + dtype=np.float32, + ) + + produced = execution_im2col( + x, + idt, + k_H, + k_W, + stride_h, + stride_w, + ifm_ch, + ifm_dim_H, + ifm_dim_W, + pad_amt, + pad_val, + dilation_h, + dilation_w, + ) + + assert (produced == expected).all(), "Test failed for case number {}".format( + case_id + ) + + case_id = 10 + idt = DataType.INT8 + k_H = 2 + k_W = 2 + stride_h = 1 + stride_w = 2 + ifm_ch = 2 + ifm_dim_H = 5 + ifm_dim_W = 5 + pad_amt = [1, 1, 1, 1] + pad_val = 0 + dilation_h = 2 + dilation_w = 2 + + x = np.asarray( + [ + [ + [[1, -1], [2, -2], [3, -3], [4, -4], [5, -5]], + [[6, -6], [7, -7], [8, -8], [9, -9], [10, -10]], + [[11, -11], [12, -12], [13, -13], [14, -14], [15, -15]], + [[16, -16], [17, -17], [18, -18], [19, -19], [20, -20]], + [[21, -21], [22, -22], [23, -23], [24, -24], [25, -25]], + ] + ], + dtype=np.float32, + ) + + expected = np.asarray( + [ + [ + [ + [0, 0, 0, 0, 0, 0, 7, -7], + [0, 0, 0, 0, 7, -7, 9, -9], + [0, 0, 0, 0, 9, -9, 0, 0], + ], + [ + [0, 0, 2, -2, 0, 0, 12, -12], + [2, -2, 4, -4, 12, -12, 14, -14], + [4, -4, 0, 0, 14, -14, 0, 0], + ], + [ + [0, 0, 7, -7, 0, 0, 17, -17], + [7, -7, 9, -9, 17, -17, 19, -19], + [9, -9, 0, 0, 19, -19, 0, 0], + ], + [ + [0, 0, 12, -12, 0, 0, 22, -22], + [12, -12, 14, -14, 22, -22, 24, -24], + [14, -14, 0, 0, 24, -24, 0, 0], + ], + [ + [0, 0, 17, -17, 0, 0, 0, 0], + [17, -17, 19, -19, 0, 0, 0, 0], + [19, -19, 0, 0, 0, 0, 0, 0], + ], + ] + ], + dtype=np.float32, + ) + + produced = execution_im2col( + x, + idt, + k_H, + k_W, + stride_h, + stride_w, + ifm_ch, + ifm_dim_H, + ifm_dim_W, + pad_amt, + pad_val, + dilation_h, + dilation_w, + ) + + assert (produced == expected).all(), "Test failed for case number {}".format( + case_id + ) + + case_id = 11 + idt = DataType.INT8 + k_H = 2 + k_W = 1 + stride_h = 2 + stride_w = 1 + ifm_ch = 2 + ifm_dim_H = 5 + ifm_dim_W = 1 + pad_amt = [1, 0, 1, 0] + pad_val = 0 + dilation_h = 2 + dilation_w = 2 + + x = np.asarray( + [[[[1, -1]], [[2, -2]], [[3, -3]], [[4, -4]], [[5, -5]]]], + dtype=np.float32, + ) + + expected = np.asarray( + [[[[0, 0, 2, -2]], [[2, -2, 4, -4]], [[4, -4, 0, 0]]]], + dtype=np.float32, + ) + + produced = execution_im2col( + x, + idt, + k_H, + k_W, + stride_h, + stride_w, + ifm_ch, + ifm_dim_H, + ifm_dim_W, + pad_amt, + pad_val, + dilation_h, + dilation_w, + ) + + assert (produced == expected).all(), "Test failed for case number {}".format( + case_id + ) + + case_id = 12 + idt = DataType.INT8 + k_H = 2 + k_W = 2 + stride_h = 1 + stride_w = 2 + ifm_ch = 2 + ifm_dim_H = 5 + ifm_dim_W = 5 + pad_amt = [1, 1, 1, 1] + pad_val = 0 + dilation_h = 1 + dilation_w = 2 + + x = np.asarray( + [ + [ + [[1, -1], [2, -2], [3, -3], [4, -4], [5, -5]], + [[6, -6], [7, -7], [8, -8], [9, -9], [10, -10]], + [[11, -11], [12, -12], [13, -13], [14, -14], [15, -15]], + [[16, -16], [17, -17], [18, -18], [19, -19], [20, -20]], + [[21, -21], [22, -22], [23, -23], [24, -24], [25, -25]], + ] + ], + dtype=np.float32, + ) + + expected = np.asarray( + [ + [ + [ + [0, 0, 0, 0, 0, 0, 2, -2], + [0, 0, 0, 0, 2, -2, 4, -4], + [0, 0, 0, 0, 4, -4, 0, 0], + ], + [ + [0, 0, 2, -2, 0, 0, 7, -7], + [2, -2, 4, -4, 7, -7, 9, -9], + [4, -4, 0, 0, 9, -9, 0, 0], + ], + [ + [0, 0, 7, -7, 0, 0, 12, -12], + [7, -7, 9, -9, 12, -12, 14, -14], + [9, -9, 0, 0, 14, -14, 0, 0], + ], + [ + [0, 0, 12, -12, 0, 0, 17, -17], + [12, -12, 14, -14, 17, -17, 19, -19], + [14, -14, 0, 0, 19, -19, 0, 0], + ], + [ + [0, 0, 17, -17, 0, 0, 22, -22], + [17, -17, 19, -19, 22, -22, 24, -24], + [19, -19, 0, 0, 24, -24, 0, 0], + ], + [ + [0, 0, 22, -22, 0, 0, 0, 0], + [22, -22, 24, -24, 0, 0, 0, 0], + [24, -24, 0, 0, 0, 0, 0, 0], + ], + ] + ], + dtype=np.float32, + ) + + produced = execution_im2col( + x, + idt, + k_H, + k_W, + stride_h, + stride_w, + ifm_ch, + ifm_dim_H, + ifm_dim_W, + pad_amt, + pad_val, + dilation_h, + dilation_w, + ) + + assert (produced == expected).all(), "Test failed for case number {}".format( + case_id + ) + + case_id = 13 + idt = DataType.INT8 + k_H = 2 + k_W = 3 + stride_h = 1 + stride_w = 2 + ifm_ch = 2 + ifm_dim_H = 4 + ifm_dim_W = 5 + pad_amt = [1, 1, 1, 1] + pad_val = 0 + dilation_h = 1 + dilation_w = 2 + + x = np.asarray( + [ + [ + [[1, -1], [2, -2], [3, -3], [4, -4], [5, -5]], + [[6, -6], [7, -7], [8, -8], [9, -9], [10, -10]], + [[11, -11], [12, -12], [13, -13], [14, -14], [15, -15]], + [[16, -16], [17, -17], [18, -18], [19, -19], [20, -20]], + ] + ], + dtype=np.float32, + ) + + expected = np.asarray( + [ + [ + [ + [0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 4, -4], + [0, 0, 0, 0, 0, 0, 2, -2, 4, -4, 0, 0], + ], + [ + [0, 0, 2, -2, 4, -4, 0, 0, 7, -7, 9, -9], + [2, -2, 4, -4, 0, 0, 7, -7, 9, -9, 0, 0], + ], + [ + [0, 0, 7, -7, 9, -9, 0, 0, 12, -12, 14, -14], + [7, -7, 9, -9, 0, 0, 12, -12, 14, -14, 0, 0], + ], + [ + [0, 0, 12, -12, 14, -14, 0, 0, 17, -17, 19, -19], + [12, -12, 14, -14, 0, 0, 17, -17, 19, -19, 0, 0], + ], + [ + [0, 0, 17, -17, 19, -19, 0, 0, 0, 0, 0, 0], + [17, -17, 19, -19, 0, 0, 0, 0, 0, 0, 0, 0], + ], + ] + ], + dtype=np.float32, + ) + + produced = execution_im2col( + x, + idt, + k_H, + k_W, + stride_h, + stride_w, + ifm_ch, + ifm_dim_H, + ifm_dim_W, + pad_amt, + pad_val, + dilation_h, + dilation_w, + ) + + assert (produced == expected).all(), "Test failed for case number {}".format( + case_id + ) + + +# Configurations tested: +# case id | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | +# idt | Bipolar | INT8 | INT8 | INT8 | INT8 | INT8 | INT8 | INT8 | INT8 | +# ifm_dim_H | 4 | 4 | 4 | 4 | 4 | 4 | 5 | 5 | 5 | +# ifm_dim_W | 4 | 4 | 4 | 5 | 5 | 5 | 1 | 1 | 1 | +# ifm_ch | 1 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | +# pad_amt | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 1 | +# pad_val | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +# k_H | 2 | 2 | 2 | 2 | 3 | 3 | 3 | 3 | 3 | +# k_W | 2 | 2 | 2 | 2 | 2 | 2 | 1 | 1 | 1 | +# stride_h | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 2 | +# stride_w | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 2 | +# dilation_h | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +# dilation_w | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | def test_im2col(): + case_id = 0 # bipolar inputs with following im2col parameters idt = DataType.BIPOLAR - k = 2 - stride = 1 + k_h = 2 + k_w = 2 + stride_h = 1 + stride_w = 1 ifm_ch = 1 - ifm_dim = 4 - pad_amt = 0 + ifm_dim_h = 4 + ifm_dim_w = 4 + pad_amt = [0, 0, 0, 0] + pad_amt_h = pad_amt[0] + pad_amt[2] + pad_amt_w = pad_amt[1] + pad_amt[3] pad_val = 0 - ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad_amt) + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, pad_amt_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, pad_amt_w) x = np.asarray( [ @@ -103,7 +1048,7 @@ def test_im2col(): 1.0, ], dtype=np.float32, - ).reshape(1, ifm_dim, ifm_dim, ifm_ch) + ).reshape(1, ifm_dim_h, ifm_dim_w, ifm_ch) expected = np.asarray( [ @@ -145,19 +1090,36 @@ def test_im2col(): 1.0, ], dtype=np.float32, - ).reshape(1, ofm_dim, ofm_dim, k * k * ifm_ch) + ).reshape(1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch) - produced = execution_im2col(x, idt, k, stride, ifm_ch, ifm_dim, pad_amt, pad_val) - assert (produced == expected).all() + produced = execution_im2col( + x, + idt, + k_h, + k_w, + stride_h, + stride_w, + ifm_ch, + ifm_dim_h, + ifm_dim_w, + pad_amt, + pad_val, + ) + assert (produced == expected).all(), "Test failed for case number {}".format( + case_id + ) + case_id = 1 idt = DataType.INT8 - k = 2 - stride = 1 + k_h = 2 + k_w = 2 + stride_h = 1 + stride_w = 1 ifm_ch = 2 - ifm_dim = 4 - pad_amt = 0 + ifm_dim_h = 4 + ifm_dim_w = 4 + pad_amt = [0, 0, 0, 0] pad_val = 0 - ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad_amt) x = np.asarray( [ @@ -194,17 +1156,34 @@ def test_im2col(): dtype=np.float32, ) - produced = execution_im2col(x, idt, k, stride, ifm_ch, ifm_dim, pad_amt, pad_val) - assert (produced == expected).all() + produced = execution_im2col( + x, + idt, + k_h, + k_w, + stride_h, + stride_w, + ifm_ch, + ifm_dim_h, + ifm_dim_w, + pad_amt, + pad_val, + ) + assert (produced == expected).all(), "Test failed for case number {}".format( + case_id + ) + case_id = 2 idt = DataType.INT8 - k = 2 - stride = 1 + k_h = 2 + k_w = 2 + stride_h = 1 + stride_w = 1 ifm_ch = 2 - ifm_dim = 4 - pad_amt = 1 + ifm_dim_h = 4 + ifm_dim_w = 4 + pad_amt = [1, 1, 1, 1] pad_val = 0 - ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad_amt) x = np.asarray( [ @@ -261,25 +1240,381 @@ def test_im2col(): dtype=np.float32, ) - produced = execution_im2col(x, idt, k, stride, ifm_ch, ifm_dim, pad_amt, pad_val) - assert (produced == expected).all() + produced = execution_im2col( + x, + idt, + k_h, + k_w, + stride_h, + stride_w, + ifm_ch, + ifm_dim_h, + ifm_dim_w, + pad_amt, + pad_val, + ) + assert (produced == expected).all(), "Test failed for case number {}".format( + case_id + ) + case_id = 3 + idt = DataType.INT8 + k_h = 2 + k_w = 2 + stride_h = 1 + stride_w = 1 + ifm_ch = 2 + ifm_dim_h = 4 + ifm_dim_w = 5 + pad_amt = [0, 0, 0, 0] + pad_val = 0 -def test_im2col_infer_shapes(): + x = np.asarray( + [ + [ + [[1, -1], [2, -2], [3, -3], [4, -4], [5, -5]], + [[6, -6], [7, -7], [8, -8], [9, -9], [10, -10]], + [[11, -11], [12, -12], [13, -13], [14, -14], [15, -15]], + [[16, -16], [17, -17], [18, -18], [19, -19], [20, -20]], + ] + ], + dtype=np.float32, + ) + expected = np.asarray( + [ + [ + [ + [1, -1, 2, -2, 6, -6, 7, -7], + [2, -2, 3, -3, 7, -7, 8, -8], + [3, -3, 4, -4, 8, -8, 9, -9], + [4, -4, 5, -5, 9, -9, 10, -10], + ], + [ + [6, -6, 7, -7, 11, -11, 12, -12], + [7, -7, 8, -8, 12, -12, 13, -13], + [8, -8, 9, -9, 13, -13, 14, -14], + [9, -9, 10, -10, 14, -14, 15, -15], + ], + [ + [11, -11, 12, -12, 16, -16, 17, -17], + [12, -12, 13, -13, 17, -17, 18, -18], + [13, -13, 14, -14, 18, -18, 19, -19], + [14, -14, 15, -15, 19, -19, 20, -20], + ], + ] + ], + dtype=np.float32, + ) + + produced = execution_im2col( + x, + idt, + k_h, + k_w, + stride_h, + stride_w, + ifm_ch, + ifm_dim_h, + ifm_dim_w, + pad_amt, + pad_val, + ) + assert (produced == expected).all(), "Test failed for case number {}".format( + case_id + ) + + case_id = 4 + idt = DataType.INT8 + k_h = 3 + k_w = 2 + stride_h = 1 + stride_w = 1 + ifm_ch = 2 + ifm_dim_h = 4 + ifm_dim_w = 5 + pad_amt = [0, 0, 0, 0] + pad_val = 0 + + x = np.asarray( + [ + [ + [[1, -1], [2, -2], [3, -3], [4, -4], [5, -5]], + [[6, -6], [7, -7], [8, -8], [9, -9], [10, -10]], + [[11, -11], [12, -12], [13, -13], [14, -14], [15, -15]], + [[16, -16], [17, -17], [18, -18], [19, -19], [20, -20]], + ] + ], + dtype=np.float32, + ) + + expected = np.asarray( + [ + [ + [ + [1, -1, 2, -2, 6, -6, 7, -7, 11, -11, 12, -12], + [2, -2, 3, -3, 7, -7, 8, -8, 12, -12, 13, -13], + [3, -3, 4, -4, 8, -8, 9, -9, 13, -13, 14, -14], + [4, -4, 5, -5, 9, -9, 10, -10, 14, -14, 15, -15], + ], + [ + [6, -6, 7, -7, 11, -11, 12, -12, 16, -16, 17, -17], + [7, -7, 8, -8, 12, -12, 13, -13, 17, -17, 18, -18], + [8, -8, 9, -9, 13, -13, 14, -14, 18, -18, 19, -19], + [9, -9, 10, -10, 14, -14, 15, -15, 19, -19, 20, -20], + ], + ] + ], + dtype=np.float32, + ) + + produced = execution_im2col( + x, + idt, + k_h, + k_w, + stride_h, + stride_w, + ifm_ch, + ifm_dim_h, + ifm_dim_w, + pad_amt, + pad_val, + ) + assert (produced == expected).all(), "Test failed for case number {}".format( + case_id + ) + + case_id = 5 + idt = DataType.INT8 + k_h = 3 + k_w = 2 + stride_h = 1 + stride_w = 1 + ifm_ch = 2 + ifm_dim_h = 4 + ifm_dim_w = 5 + pad_amt = [1, 1, 1, 1] + pad_val = 0 + + x = np.asarray( + [ + [ + [[1, -1], [2, -2], [3, -3], [4, -4], [5, -5]], + [[6, -6], [7, -7], [8, -8], [9, -9], [10, -10]], + [[11, -11], [12, -12], [13, -13], [14, -14], [15, -15]], + [[16, -16], [17, -17], [18, -18], [19, -19], [20, -20]], + ] + ], + dtype=np.float32, + ) + + expected = np.asarray( + [ + [ + [ + [0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 6, -6], + [0, 0, 0, 0, 1, -1, 2, -2, 6, -6, 7, -7], + [0, 0, 0, 0, 2, -2, 3, -3, 7, -7, 8, -8], + [0, 0, 0, 0, 3, -3, 4, -4, 8, -8, 9, -9], + [0, 0, 0, 0, 4, -4, 5, -5, 9, -9, 10, -10], + [0, 0, 0, 0, 5, -5, 0, 0, 10, -10, 0, 0], + ], + [ + [0, 0, 1, -1, 0, 0, 6, -6, 0, 0, 11, -11], + [1, -1, 2, -2, 6, -6, 7, -7, 11, -11, 12, -12], + [2, -2, 3, -3, 7, -7, 8, -8, 12, -12, 13, -13], + [3, -3, 4, -4, 8, -8, 9, -9, 13, -13, 14, -14], + [4, -4, 5, -5, 9, -9, 10, -10, 14, -14, 15, -15], + [5, -5, 0, 0, 10, -10, 0, 0, 15, -15, 0, 0], + ], + [ + [0, 0, 6, -6, 0, 0, 11, -11, 0, 0, 16, -16], + [6, -6, 7, -7, 11, -11, 12, -12, 16, -16, 17, -17], + [7, -7, 8, -8, 12, -12, 13, -13, 17, -17, 18, -18], + [8, -8, 9, -9, 13, -13, 14, -14, 18, -18, 19, -19], + [9, -9, 10, -10, 14, -14, 15, -15, 19, -19, 20, -20], + [10, -10, 0, 0, 15, -15, 0, 0, 20, -20, 0, 0], + ], + [ + [0, 0, 11, -11, 0, 0, 16, -16, 0, 0, 0, 0], + [11, -11, 12, -12, 16, -16, 17, -17, 0, 0, 0, 0], + [12, -12, 13, -13, 17, -17, 18, -18, 0, 0, 0, 0], + [13, -13, 14, -14, 18, -18, 19, -19, 0, 0, 0, 0], + [14, -14, 15, -15, 19, -19, 20, -20, 0, 0, 0, 0], + [15, -15, 0, 0, 20, -20, 0, 0, 0, 0, 0, 0], + ], + ] + ], + dtype=np.float32, + ) + + produced = execution_im2col( + x, + idt, + k_h, + k_w, + stride_h, + stride_w, + ifm_ch, + ifm_dim_h, + ifm_dim_w, + pad_amt, + pad_val, + ) + assert (produced == expected).all(), "Test failed for case number {}".format( + case_id + ) + + case_id = 6 + idt = DataType.INT8 + k_h = 3 + k_w = 1 + stride_h = 1 + stride_w = 1 + ifm_ch = 2 + ifm_dim_h = 5 + ifm_dim_w = 1 + pad_amt = [0, 0, 0, 0] + pad_val = 0 + + x = np.asarray( + [[[[1, -1]], [[2, -2]], [[3, -3]], [[4, -4]], [[5, -5]]]], + dtype=np.float32, + ) + + expected = np.asarray( + [[[[1, -1, 2, -2, 3, -3]], [[2, -2, 3, -3, 4, -4]], [[3, -3, 4, -4, 5, -5]]]], + dtype=np.float32, + ) + + produced = execution_im2col( + x, + idt, + k_h, + k_w, + stride_h, + stride_w, + ifm_ch, + ifm_dim_h, + ifm_dim_w, + pad_amt, + pad_val, + ) + assert (produced == expected).all(), "Test failed for case number {}".format( + case_id + ) + + case_id = 7 + idt = DataType.INT8 + k_h = 3 + k_w = 1 + stride_h = 1 + stride_w = 1 + ifm_ch = 2 + ifm_dim_h = 5 + ifm_dim_w = 1 + pad_amt = [1, 0, 1, 0] + pad_val = 0 + + x = np.asarray( + [[[[1, -1]], [[2, -2]], [[3, -3]], [[4, -4]], [[5, -5]]]], + dtype=np.float32, + ) + + expected = np.asarray( + [ + [ + [[0, 0, 1, -1, 2, -2]], + [[1, -1, 2, -2, 3, -3]], + [[2, -2, 3, -3, 4, -4]], + [[3, -3, 4, -4, 5, -5]], + [[4, -4, 5, -5, 0, 0]], + ] + ], + dtype=np.float32, + ) + + produced = execution_im2col( + x, + idt, + k_h, + k_w, + stride_h, + stride_w, + ifm_ch, + ifm_dim_h, + ifm_dim_w, + pad_amt, + pad_val, + ) + assert (produced == expected).all(), "Test failed for case number {}".format( + case_id + ) + + case_id = 8 + idt = DataType.INT8 + k_h = 3 + k_w = 1 + stride_h = 2 + stride_w = 2 + ifm_ch = 2 + ifm_dim_h = 5 + ifm_dim_w = 1 + pad_amt = [1, 0, 1, 0] + pad_val = 0 + + x = np.asarray( + [[[[1, -1]], [[2, -2]], [[3, -3]], [[4, -4]], [[5, -5]]]], + dtype=np.float32, + ) + + expected = np.asarray( + [[[[0, 0, 1, -1, 2, -2]], [[2, -2, 3, -3, 4, -4]], [[4, -4, 5, -5, 0, 0]]]], + dtype=np.float32, + ) + + produced = execution_im2col( + x, + idt, + k_h, + k_w, + stride_h, + stride_w, + ifm_ch, + ifm_dim_h, + ifm_dim_w, + pad_amt, + pad_val, + ) + assert (produced == expected).all(), "Test failed for case number {}".format( + case_id + ) + + +def test_im2col_infer_shapes(): idt = DataType.BIPOLAR - k = 2 - stride = 1 + k_h = 2 + k_w = 2 + stride_h = 1 + stride_w = 1 ifm_ch = 1 - ifm_dim = 4 - ofm_dim = int(((ifm_dim - k) / stride) + 1) + ifm_dim_h = 4 + ifm_dim_w = 4 + pad_amt = [0, 0, 0, 0] # default + pad_amt_h = pad_amt[0] + pad_amt[2] + pad_amt_w = pad_amt[1] + pad_amt[3] + dilation = 1 + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, pad_amt_h, dilation) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, pad_amt_w, dilation) # set up onnx model inp = helper.make_tensor_value_info( - "inp", TensorProto.FLOAT, [1, ifm_dim, ifm_dim, ifm_ch] + "inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch] ) outp = helper.make_tensor_value_info( - "outp", TensorProto.FLOAT, [1, ofm_dim, ofm_dim, k * k * ifm_ch] + "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch] ) abs_node = helper.make_node("Abs", inputs=["inp"], outputs=["abs"]) @@ -289,9 +1624,10 @@ def test_im2col_infer_shapes(): ["abs"], ["im2col"], domain="finn.custom_op.general", - stride=stride, - kernel_size=k, - input_shape="(1,{},{},{})".format(ifm_dim, ifm_dim, ifm_ch), + stride=[stride_w, stride_w], + kernel_size=[k_h, k_w], + input_shape="(1,{},{},{})".format(ifm_dim_h, ifm_dim_w, ifm_ch), + dilations=[dilation, dilation], ) abs1_node = helper.make_node("Abs", inputs=["im2col"], outputs=["outp"]) @@ -303,10 +1639,12 @@ def test_im2col_infer_shapes(): outputs=[outp], value_info=[ helper.make_tensor_value_info( - "abs", TensorProto.FLOAT, [1, ifm_dim, ifm_dim, ifm_ch] + "abs", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch] ), helper.make_tensor_value_info( - "im2col", TensorProto.FLOAT, [1, ofm_dim, ofm_dim, k * k * ifm_ch] + "im2col", + TensorProto.FLOAT, + [1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch], ), ], ) @@ -318,4 +1656,9 @@ def test_im2col_infer_shapes(): # test shape inference model.transform(InferShapes()) - assert model.get_tensor_shape("im2col") == [1, ofm_dim, ofm_dim, k * k * ifm_ch] + assert model.get_tensor_shape("im2col") == [ + 1, + ofm_dim_h, + ofm_dim_w, + k_h * k_w * ifm_ch, + ] diff --git a/tests/transformation/test_4d_conversion.py b/tests/transformation/test_4d_conversion.py new file mode 100644 index 0000000..18fe9cc --- /dev/null +++ b/tests/transformation/test_4d_conversion.py @@ -0,0 +1,254 @@ +import numpy as np +import onnx + +import finn.core.onnx_exec as oxe +from finn.core.datatype import DataType +from finn.core.modelwrapper import ModelWrapper +from finn.transformation.change_3d_tensors_to_4d import Change3DTo4DTensors +from finn.util.basic import gen_finn_dt_tensor + + +def generate_random_input(model): + """ + Creates input dictionary with a random numpy array + that matches the input tensor shape. + """ + input_dict = {} + for i in range(len(model.graph.input)): + input_node = model.graph.input[i] + input_node_name = input_node.name + input_node_shape = model.get_tensor_shape(input_node_name) + i_val = gen_finn_dt_tensor(DataType.FLOAT32, input_node_shape) + input_dict[input_node_name] = i_val + return input_dict + + +def set_all_initializers(model): + """Sets all initializers of the graph to a random value.""" + for n in model.graph.node: + if len(n.input) > 1: + init_name = n.input[1] + init_shape = model.get_tensor_shape(init_name) + init_val = gen_finn_dt_tensor(DataType.FLOAT32, init_shape) + model.set_initializer(init_name, init_val) + + +def create_arbitrary_model(invalid=False): + """ + Creates arbitrary model for testing the 3D to 4D transform. + This model is based on a subpart of QuartzNet. + """ + + Mul1_node = onnx.helper.make_node( + "Mul", + inputs=["in1_mul1", "in2_mul1"], # inputs + outputs=["out1_mul1"], # outputs + name="Mul1", # name + ) + + Conv1_node = onnx.helper.make_node( + "Conv", + inputs=["out1_mul1", "in2_conv1"], + outputs=["out1_conv1"], + name="Conv1", + dilations=[1], + group=1, + kernel_shape=[1], + pads=[0, 0], + strides=[1], + ) + + if ( + invalid is True + ): # To make the graph invalid, a ReLU node is added after the Conv node + Relu1_node = onnx.helper.make_node( + "Relu", inputs=["out1_conv1"], outputs=["out1_relu1"], name="Relu1" + ) + Add1_node = onnx.helper.make_node( + "Add", inputs=["out1_relu1", "in2_add1"], outputs=["out1_add1"], name="Add1" + ) + else: + Add1_node = onnx.helper.make_node( + "Add", inputs=["out1_conv1", "in2_add1"], outputs=["out1_add1"], name="Add1" + ) + + Mul2_node = onnx.helper.make_node( + "Mul", inputs=["out1_add1", "in2_mul2"], outputs=["out1_mul2"], name="Mul2" + ) + + Transpose1_node = onnx.helper.make_node( + "Transpose", + inputs=["out1_mul2"], + outputs=["out1_transpose1"], + name="Transpose1", + perm=[0, 2, 1], + ) + + LogSoftmax1_node = onnx.helper.make_node( + "LogSoftmax", + inputs=["out1_transpose1"], + outputs=["out1_logsoftmax1"], + name="LogSoftmax1", + axis=2, + ) + + ArgMax1_node = onnx.helper.make_node( + "ArgMax", + inputs=["out1_logsoftmax1"], + outputs=["out1_argmax1"], + name="ArgMax1", + axis=-1, + keepdims=0, + ) + + # Inputs and outputs + in1_mul1 = onnx.helper.make_tensor_value_info( + "in1_mul1", onnx.TensorProto.FLOAT, [1, 1024, 128] + ) + out1_argmax1 = onnx.helper.make_tensor_value_info( + "out1_argmax1", onnx.TensorProto.INT64, [1, 128] + ) + + # Value infos + out1_mul1 = onnx.helper.make_tensor_value_info( + "out1_mul1", onnx.TensorProto.FLOAT, [1, 1024, 128] + ) + out1_conv1 = onnx.helper.make_tensor_value_info( + "out1_conv1", onnx.TensorProto.FLOAT, [1, 29, 128] + ) + + if invalid is True: + out1_relu1 = onnx.helper.make_tensor_value_info( + "out1_relu1", onnx.TensorProto.FLOAT, [1, 29, 128] + ) + + out1_add1 = onnx.helper.make_tensor_value_info( + "out1_add1", onnx.TensorProto.FLOAT, [1, 29, 128] + ) + + out1_mul2 = onnx.helper.make_tensor_value_info( + "out1_mul2", onnx.TensorProto.FLOAT, [1, 29, 128] + ) + out1_transpose1 = onnx.helper.make_tensor_value_info( + "out1_transpose1", onnx.TensorProto.FLOAT, [1, 128, 29] + ) + out1_logsoftmax1 = onnx.helper.make_tensor_value_info( + "out1_logsoftmax1", onnx.TensorProto.FLOAT, [1, 128, 29] + ) + + # Initializers + in2_mul1 = onnx.helper.make_tensor_value_info( + "in2_mul1", onnx.TensorProto.FLOAT, [1] + ) + in2_conv1 = onnx.helper.make_tensor_value_info( + "in2_conv1", onnx.TensorProto.FLOAT, [29, 1024, 1] + ) + in2_add1 = onnx.helper.make_tensor_value_info( + "in2_add1", onnx.TensorProto.FLOAT, [1, 29, 1] + ) + in2_mul2 = onnx.helper.make_tensor_value_info( + "in2_mul2", onnx.TensorProto.FLOAT, [1] + ) + + list_of_nodes = [ + Mul1_node, + Conv1_node, + Add1_node, + Mul2_node, + Transpose1_node, + LogSoftmax1_node, + ArgMax1_node, + ] + list_of_value_infos = [ + out1_mul1, + out1_conv1, + out1_add1, + out1_mul2, + out1_transpose1, + out1_logsoftmax1, + in2_mul1, + in2_conv1, + in2_add1, + in2_mul2, + ] + + if invalid is True: + list_of_nodes.insert(2, Relu1_node) + list_of_value_infos.append(out1_relu1) + + graph = onnx.helper.make_graph( + nodes=list_of_nodes, + name="4d_conversion_test_graph", + inputs=[in1_mul1], + outputs=[out1_argmax1], + value_info=list_of_value_infos, + ) + onnx_model = onnx.helper.make_model(graph, producer_name="4d_conversion_test-model") + model = ModelWrapper(onnx_model) + + return model + + +def test_4d_conversion(): + """ + Test for the 3D to 4D transformation with a valid graph. + """ + model = create_arbitrary_model(invalid=False) + + # Inputs + input_dict = generate_random_input(model) + + # Initializers + set_all_initializers(model) + + # Comparing the outputs of the model before and after the transform + output_node_name = model.graph.output[0].name + output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True) + expected = output_dict[output_node_name] + + model = model.transform(Change3DTo4DTensors()) + + for k, v in input_dict.items(): + old_in_name = k + old_shape = np.shape(v) + new_in_name = model.graph.input[0].name + new_shape = old_shape + (1,) + new_in_val = np.reshape(v, new_shape) + del input_dict[old_in_name] + input_dict[new_in_name] = new_in_val + + output_node_name = model.graph.output[0].name + output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True) + expected_modified = output_dict[output_node_name] + + expected_modified = np.reshape(expected_modified, np.shape(expected)) + + assert (expected == expected_modified).all() + + +def test_4d_conversion_invalid_nodes(): + """ + Test for the 3D to 4D transformation with an invalid graph. + """ + model = create_arbitrary_model(invalid=True) + + # Inputs + input_dict = generate_random_input(model) + + # Initializers + set_all_initializers(model) + + # Comparing the outputs of the model before and after the transform + output_node_name = model.graph.output[0].name + output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True) + expected = output_dict[output_node_name] + + model = model.transform(Change3DTo4DTensors()) + + output_node_name = model.graph.output[0].name + output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True) + expected_modified = output_dict[output_node_name] + + expected_modified = np.reshape(expected_modified, np.shape(expected)) + + assert (expected == expected_modified).all() diff --git a/tests/transformation/test_batchnorm_to_affine.py b/tests/transformation/test_batchnorm_to_affine.py index 984a996..4adc874 100644 --- a/tests/transformation/test_batchnorm_to_affine.py +++ b/tests/transformation/test_batchnorm_to_affine.py @@ -29,6 +29,7 @@ import pytest import numpy as np +import onnx import os import urllib.request as ureq @@ -65,3 +66,55 @@ def test_batchnorm_to_affine_shufflenet(): produced = oxe.execute_onnx(new_model, input_dict)[oname] assert np.isclose(expected, produced).all() os.remove(export_onnx_path) + + +@pytest.mark.parametrize("epsilon", [0.0, 0.00001, 0.001]) +def test_batchnorm_to_affine_epsilon(epsilon): + """Dummy batchnorm node to test out the epsilon attribute.""" + + batchnorm_node = onnx.helper.make_node( + "BatchNormalization", + inputs=["x", "s", "bias", "mean", "var"], + outputs=["y"], + epsilon=epsilon, + ) + + x = onnx.helper.make_tensor_value_info("x", onnx.TensorProto.FLOAT, [1, 3, 5, 5]) + s = onnx.helper.make_tensor_value_info("s", onnx.TensorProto.FLOAT, [3]) + bias = onnx.helper.make_tensor_value_info("bias", onnx.TensorProto.FLOAT, [3]) + mean = onnx.helper.make_tensor_value_info("mean", onnx.TensorProto.FLOAT, [3]) + var = onnx.helper.make_tensor_value_info("var", onnx.TensorProto.FLOAT, [3]) + y = onnx.helper.make_tensor_value_info("y", onnx.TensorProto.FLOAT, [1, 3, 5, 5]) + + # Graph + graph = onnx.helper.make_graph( + nodes=[batchnorm_node], + name="test_batchnorm_graph", + inputs=[x], + outputs=[y], + value_info=[s, bias, mean, var], + ) + + onnx_model = onnx.helper.make_model(graph, producer_name="test_batchnorm-model") + model = ModelWrapper(onnx_model) + + model.set_initializer("s", np.array([1, 2, 3]).astype(np.float32)) + model.set_initializer("bias", np.array([1, 2, 3]).astype(np.float32)) + model.set_initializer("mean", np.array([3, 4, 5]).astype(np.float32)) + model.set_initializer("var", np.array([0.5, 0.7, 0.3]).astype(np.float32)) + + i_val = np.arange(0, 3 * 5 * 5, dtype=np.float32) + i_val = np.reshape(i_val, [1, 3, 5, 5]) + input_dict = {"x": i_val} + output_node_name = "y" + + output_dict = oxe.execute_onnx(model, input_dict, return_full_exec_context=True) + output_original = output_dict[output_node_name] + + model_lowered = model.transform(BatchNormToAffine()) + output_dict = oxe.execute_onnx( + model_lowered, input_dict, return_full_exec_context=True + ) + output_lowered = output_dict[output_node_name] + + assert (output_original == output_lowered).all() diff --git a/tests/transformation/test_conv_lowering.py b/tests/transformation/test_conv_lowering.py index 90a9e53..604acf7 100644 --- a/tests/transformation/test_conv_lowering.py +++ b/tests/transformation/test_conv_lowering.py @@ -72,40 +72,197 @@ def test_conv_lowering_convmnist(): # input datatype @pytest.mark.parametrize("idt", [DataType.INT2, DataType.INT4]) # kernel size -@pytest.mark.parametrize("k", [2, 4]) +@pytest.mark.parametrize("k_h", [2, 3]) +@pytest.mark.parametrize("k_w", [2, 3, 1]) # input dimension -@pytest.mark.parametrize("ifm_dim", [4, 6]) +@pytest.mark.parametrize("ifm_dim_h", [9, 11]) +@pytest.mark.parametrize("ifm_dim_w", [9, 11, 1]) # input channels @pytest.mark.parametrize("ifm_ch", [2, 3]) # stride -@pytest.mark.parametrize("stride", [1, 2]) +@pytest.mark.parametrize("stride", [[1, 1], [1, 2], [2, 1], [2, 2]]) # padding @pytest.mark.parametrize("padding", [[0, 0, 0, 0], [1, 1, 1, 1]]) -def test_depthwise_conv_lowering(idt, k, ifm_dim, ifm_ch, stride, padding): +# dilations +@pytest.mark.parametrize("dilations", [[1, 1], [2, 2], [3, 3]]) +# depthwise or channelwise +@pytest.mark.parametrize("dw", [True, False]) +def test_dws_reg_conv_lowering( + idt, k_h, k_w, ifm_dim_h, ifm_dim_w, ifm_ch, stride, padding, dilations, dw +): + if k_h > ifm_dim_h: + pytest.skip("Kernel height must be smaller than image height") + if k_w > ifm_dim_w: + pytest.skip("Kernel width must be smaller than image height") + # Ensure the right padding parameters are set + if ifm_dim_w == 1: + dilations[1] = 1 + padding[1] = 0 + padding[3] = 0 wdt = idt odt = DataType.INT32 ofm_ch = ifm_ch - ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad=padding[0]) + pad_h = padding[0] + padding[2] + pad_w = padding[1] + padding[3] + stride_h = stride[0] + stride_w = stride[1] + + ofm_dim_h = compute_conv_output_dim( + ifm_dim_h, + k_h, + stride_h, + pad_h, + dilations[0], + ) + ofm_dim_w = compute_conv_output_dim( + ifm_dim_w, + k_w, + stride_w, + pad_w, + dilations[1], + ) # set up onnx model inp = oh.make_tensor_value_info( - "inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim, ifm_dim] + "inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim_h, ifm_dim_w] ) outp = oh.make_tensor_value_info( - "outp", TensorProto.FLOAT, [1, ofm_ch, ofm_dim, ofm_dim] + "outp", TensorProto.FLOAT, [1, ofm_ch, ofm_dim_h, ofm_dim_w] ) - W = oh.make_tensor_value_info("W", TensorProto.FLOAT, [ofm_ch, 1, k, k]) + if dw is True: + W = oh.make_tensor_value_info("W", TensorProto.FLOAT, [ofm_ch, 1, k_h, k_w]) + group = ifm_ch + else: + W = oh.make_tensor_value_info( + "W", TensorProto.FLOAT, [ofm_ch, ifm_ch, k_h, k_w] + ) + group = 1 dw_cnv = oh.make_node( "Conv", inputs=["inp", "W"], outputs=["outp"], - kernel_shape=[k, k], + kernel_shape=[k_h, k_w], + pads=padding, + strides=[stride_h, stride_w], + group=group, + dilations=dilations, + ) + graph = oh.make_graph( + nodes=[dw_cnv], + name="dw_cnv_graph", + inputs=[inp], + outputs=[outp], + value_info=[W], + ) + + model = oh.make_model(graph, producer_name="test_dws_reg_cnv-model") + model = ModelWrapper(model) + model.set_tensor_datatype("inp", idt) + model.set_tensor_datatype("outp", odt) + model.set_tensor_datatype("W", wdt) + + if dw is True: + w_tensor = gen_finn_dt_tensor(wdt, [ofm_ch, 1, k_h, k_w]) + else: + w_tensor = gen_finn_dt_tensor(wdt, [ofm_ch, ifm_ch, k_h, k_w]) + + model.set_initializer("W", w_tensor) + model = model.transform(InferShapes()) + + input_tensor = gen_finn_dt_tensor(idt, [1, ifm_ch, ifm_dim_h, ifm_dim_w]) + input_dict = {"inp": input_tensor} + output_dict = oxe.execute_onnx(model, input_dict) + expected = output_dict["outp"] + + model = model.transform(LowerConvsToMatMul()) + output_dict = oxe.execute_onnx(model, input_dict) + produced = output_dict["outp"] + assert (produced == expected).all() + + if dw is True: + # check if created nodes have attributes that indicate depthwise conv + assert model.get_tensor_sparsity("W") is not None + im2col_node = getCustomOp(model.graph.node[1]) + assert im2col_node.get_nodeattr("depthwise") == 1 + + +# input datatype +@pytest.mark.parametrize("idt", [DataType.INT2, DataType.INT4]) +# kernel size +@pytest.mark.parametrize("k_h", [2]) +@pytest.mark.parametrize("k_w", [2]) +# input dimension +@pytest.mark.parametrize("ifm_dim_h", [4]) +@pytest.mark.parametrize("ifm_dim_w", [4]) +# input channels +@pytest.mark.parametrize("ifm_ch", [2]) +# stride +@pytest.mark.parametrize("stride", [1, 2]) +# padding. Padding is applied to dimensions H and W as: [H_begin, W_begin, H_end, W_end] +@pytest.mark.parametrize( + "padding", + [ + [0, 0, 0, 0], + [0, 0, 0, 1], + [0, 0, 1, 0], + [0, 0, 1, 1], + [0, 1, 0, 0], + [0, 1, 0, 1], + [0, 1, 1, 0], + [0, 1, 1, 1], + [1, 0, 0, 0], + [1, 0, 0, 1], + [1, 0, 1, 0], + [1, 0, 1, 1], + [1, 1, 0, 0], + [1, 1, 0, 1], + [1, 1, 1, 0], + [1, 1, 1, 1], + ], +) +def test_non_equal_padding( + idt, k_h, k_w, ifm_dim_h, ifm_dim_w, ifm_ch, stride, padding +): + wdt = idt + odt = DataType.INT32 + ofm_ch = ifm_ch + pad_h = padding[0] + padding[2] + pad_w = padding[1] + padding[3] + + ofm_dim_h = compute_conv_output_dim( + ifm_dim_h, + k_h, + stride, + pad_h, + ) + ofm_dim_w = compute_conv_output_dim( + ifm_dim_w, + k_w, + stride, + pad_w, + ) + + # set up onnx model + inp = oh.make_tensor_value_info( + "inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim_h, ifm_dim_w] + ) + outp = oh.make_tensor_value_info( + "outp", TensorProto.FLOAT, [1, ofm_ch, ofm_dim_h, ofm_dim_w] + ) + + W = oh.make_tensor_value_info("W", TensorProto.FLOAT, [ofm_ch, ifm_ch, k_h, k_w]) + + dw_cnv = oh.make_node( + "Conv", + inputs=["inp", "W"], + outputs=["outp"], + kernel_shape=[k_h, k_w], pads=padding, strides=[stride, stride], - group=ifm_ch, + group=1, ) graph = oh.make_graph( nodes=[dw_cnv], @@ -120,11 +277,11 @@ def test_depthwise_conv_lowering(idt, k, ifm_dim, ifm_ch, stride, padding): model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) model.set_tensor_datatype("W", wdt) - w_tensor = gen_finn_dt_tensor(wdt, [ofm_ch, 1, k, k]) + w_tensor = gen_finn_dt_tensor(wdt, [ofm_ch, ifm_ch, k_h, k_w]) model.set_initializer("W", w_tensor) model = model.transform(InferShapes()) - input_tensor = gen_finn_dt_tensor(idt, [1, ifm_ch, ifm_dim, ifm_dim]) + input_tensor = gen_finn_dt_tensor(idt, [1, ifm_ch, ifm_dim_h, ifm_dim_w]) input_dict = {"inp": input_tensor} output_dict = oxe.execute_onnx(model, input_dict) expected = output_dict["outp"] @@ -134,23 +291,20 @@ def test_depthwise_conv_lowering(idt, k, ifm_dim, ifm_ch, stride, padding): produced = output_dict["outp"] assert (produced == expected).all() - # check if created nodes have attributes that indicate depthwise conv - assert model.get_tensor_sparsity("W") is not None - im2col_node = getCustomOp(model.graph.node[1]) - assert im2col_node.get_nodeattr("depthwise") == 1 - def test_conv_lowering_conv_1x1(): np.random.seed(0) - in_feature_dim = 7 + in_feature_dim_h = 7 + in_feature_dim_w = 7 in_chn = 3 kernel_size = 1 - out_feature_dim = in_feature_dim + out_feature_dim_h = in_feature_dim_h + out_feature_dim_w = in_feature_dim_w - input_shape = [1, in_chn, in_feature_dim, in_feature_dim] - output_shape = [1, in_chn, out_feature_dim, out_feature_dim] + input_shape = [1, in_chn, in_feature_dim_h, in_feature_dim_w] + output_shape = [1, in_chn, out_feature_dim_h, out_feature_dim_w] conv_param_shape = [in_chn, in_chn, kernel_size, kernel_size] diff --git a/tests/transformation/test_extend_partition.py b/tests/transformation/test_extend_partition.py new file mode 100644 index 0000000..fa7f288 --- /dev/null +++ b/tests/transformation/test_extend_partition.py @@ -0,0 +1,322 @@ +# Copyright (c) 2020 Xilinx, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of Xilinx nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest + +import numpy as np +from onnx import TensorProto +from onnx import helper as oh + +import finn.core.onnx_exec as oxe +from finn.core.datatype import DataType +from finn.core.modelwrapper import ModelWrapper +from finn.transformation.create_generic_partitions import PartitionFromDict +from finn.transformation.extend_partition import ExtendPartition +from finn.util.basic import gen_finn_dt_tensor + + +def create_model(): + MultiThreshold0_node = oh.make_node( + "MultiThreshold", + inputs=["in1_multithreshold0", "in2_multithreshold0"], + outputs=["out_multithreshold0"], + name="MultiThreshold0", + domain="finn.custom_op.general", + out_dtype="UINT4", + ) + + Conv0_node = oh.make_node( + "Conv", + inputs=["out_multithreshold0", "in2_conv0"], + outputs=["out_conv0"], + name="Conv0", + dilations=[1, 1], + group=1, + kernel_shape=[1, 1], + pads=[0, 0, 0, 0], + strides=[1, 1], + ) + + Conv1_node = oh.make_node( + "Conv", + inputs=["out_multithreshold0", "in2_conv1"], + outputs=["out_conv1"], + name="Conv1", + dilations=[1, 1], + group=1, + kernel_shape=[1, 1], + pads=[0, 0, 0, 0], + strides=[1, 1], + ) + + MultiThreshold1_node = oh.make_node( + "MultiThreshold", + inputs=["out_conv0", "in2_multithreshold1"], + outputs=["out_multithreshold1"], + name="MultiThreshold1", + domain="finn.custom_op.general", + out_dtype="UINT4", + ) + + MultiThreshold2_node = oh.make_node( + "MultiThreshold", + inputs=["out_conv1", "in2_multithreshold2"], + outputs=["out_multithreshold2"], + name="MultiThreshold2", + domain="finn.custom_op.general", + out_dtype="UINT4", + ) + + Add0_node = oh.make_node( + "Add", + inputs=["out_multithreshold1", "out_multithreshold2"], + outputs=["out_add0"], + name="Add0", + ) + + MultiThreshold3_node = oh.make_node( + "MultiThreshold", + inputs=["out_add0", "in2_multithreshold3"], + outputs=["out_multithreshold3"], + name="MultiThreshold3", + domain="finn.custom_op.general", + out_dtype="UINT4", + ) + + Conv2_node = oh.make_node( + "Conv", + inputs=["out_multithreshold3", "in2_conv2"], + outputs=["out_conv2"], + name="Conv2", + dilations=[1, 1], + group=1, + kernel_shape=[1, 1], + pads=[0, 0, 0, 0], + strides=[1, 1], + ) + + Conv3_node = oh.make_node( + "Conv", + inputs=["out_multithreshold3", "in2_conv3"], + outputs=["out_conv3"], + name="Conv3", + dilations=[1, 1], + group=1, + kernel_shape=[1, 1], + pads=[0, 0, 0, 0], + strides=[1, 1], + ) + + MultiThreshold4_node = oh.make_node( + "MultiThreshold", + inputs=["out_conv2", "in2_multithreshold4"], + outputs=["out_multithreshold4"], + name="MultiThreshold4", + domain="finn.custom_op.general", + out_dtype="UINT4", + ) + + MultiThreshold5_node = oh.make_node( + "MultiThreshold", + inputs=["out_conv3", "in2_multithreshold5"], + outputs=["out_multithreshold5"], + name="MultiThreshold5", + domain="finn.custom_op.general", + out_dtype="UINT4", + ) + + Add1_node = oh.make_node( + "Add", + inputs=["out_multithreshold4", "out_multithreshold5"], + outputs=["out_add1"], + name="Add1", + ) + + # Inputs/outputs (global) + t_type = TensorProto.FLOAT + t_shape = [1, 256, 128, 1] + in1_multithreshold0 = oh.make_tensor_value_info( + "in1_multithreshold0", t_type, t_shape + ) + out_add1 = oh.make_tensor_value_info("out_add1", t_type, t_shape) + + # Initializers + in2_multithreshold0 = oh.make_tensor_value_info( + "in2_multithreshold0", t_type, [256, 15] + ) + in2_conv0 = oh.make_tensor_value_info("in2_conv0", t_type, [256, 256, 1, 1]) + in2_conv1 = oh.make_tensor_value_info("in2_conv1", t_type, [256, 256, 1, 1]) + in2_multithreshold1 = oh.make_tensor_value_info( + "in2_multithreshold1", t_type, [256, 15] + ) + in2_multithreshold2 = oh.make_tensor_value_info( + "in2_multithreshold2", t_type, [256, 15] + ) + in2_multithreshold3 = oh.make_tensor_value_info( + "in2_multithreshold3", t_type, [256, 15] + ) + in2_conv2 = oh.make_tensor_value_info("in2_conv2", t_type, [256, 256, 1, 1]) + in2_conv3 = oh.make_tensor_value_info("in2_conv3", t_type, [256, 256, 1, 1]) + in2_multithreshold4 = oh.make_tensor_value_info( + "in2_multithreshold4", t_type, [256, 15] + ) + in2_multithreshold5 = oh.make_tensor_value_info( + "in2_multithreshold5", t_type, [256, 15] + ) + + # Value_infos + out_multithreshold0 = oh.make_tensor_value_info( + "out_multithreshold0", t_type, t_shape + ) + out_conv0 = oh.make_tensor_value_info("out_conv0", t_type, t_shape) + out_conv1 = oh.make_tensor_value_info("out_conv1", t_type, t_shape) + out_multithreshold1 = oh.make_tensor_value_info( + "out_multithreshold1", t_type, t_shape + ) + out_multithreshold2 = oh.make_tensor_value_info( + "out_multithreshold2", t_type, t_shape + ) + out_add0 = oh.make_tensor_value_info("out_add0", t_type, t_shape) + out_multithreshold3 = oh.make_tensor_value_info( + "out_multithreshold3", t_type, t_shape + ) + out_conv2 = oh.make_tensor_value_info("out_conv2", t_type, t_shape) + out_conv3 = oh.make_tensor_value_info("out_conv3", t_type, t_shape) + out_multithreshold4 = oh.make_tensor_value_info( + "out_multithreshold4", t_type, t_shape + ) + out_multithreshold5 = oh.make_tensor_value_info( + "out_multithreshold5", t_type, t_shape + ) + + graph = oh.make_graph( + nodes=[ + MultiThreshold0_node, + Conv0_node, + Conv1_node, + MultiThreshold1_node, + MultiThreshold2_node, + Add0_node, + MultiThreshold3_node, + Conv2_node, + Conv3_node, + MultiThreshold4_node, + MultiThreshold5_node, + Add1_node, + ], + name="test_graph", + inputs=[in1_multithreshold0], + outputs=[out_add1], + value_info=[ + in2_multithreshold0, + in2_conv0, + in2_conv1, + in2_multithreshold1, + in2_multithreshold2, + in2_multithreshold3, + in2_conv2, + in2_conv3, + in2_multithreshold4, + in2_multithreshold5, + out_multithreshold0, + out_conv0, + out_conv1, + out_multithreshold1, + out_multithreshold2, + out_add0, + out_multithreshold3, + out_conv2, + out_conv3, + out_multithreshold4, + out_multithreshold5, + ], + ) + + onnx_model = oh.make_model(graph, producer_name="test_model") + model = ModelWrapper(onnx_model) + + mt_weights = np.random.randint(low=-1000, high=1000, size=[6, 256, 15]) + mt_weights = np.sort(mt_weights, 2) + for i in range(0, 6): + model.set_initializer("in2_multithreshold" + str(i), mt_weights[i]) + + conv_weights = np.random.randint(low=-8, high=7, size=[4, 256, 256, 1, 1]).astype( + np.float32 + ) + for i in range(0, 4): + model.set_initializer("in2_conv" + str(i), conv_weights[i]) + model.set_tensor_datatype("in2_conv" + str(i), DataType.INT4) + + return model + + +# Partitioning +@pytest.mark.parametrize("p", [0, 1, 2]) +# Extending +@pytest.mark.parametrize("extend_id", [[0], [1], [0, 1]]) +def test_extend_partition(p, extend_id): + if p == 0: + if extend_id != [0]: + pytest.skip("Only the first partition node can be extended") + if p == 1: + if extend_id != [1]: + pytest.skip("Only the second partition node can be extended") + else: + extend_id = [6] # The 6th node is the index of the GenericPartition + # node, so we set the index to the right value + + model = create_model() + + # Partition the model first + partitionings = [ + {0: range(0, 6)}, + {0: range(6, 12)}, + {0: range(0, 6), 1: range(6, 12)}, + ] + partitioning = partitionings[p] + + model = model.transform(PartitionFromDict(partitioning)) + + # Create input data + input0_tensor_name = model.graph.input[0].name + + input_shape = model.get_tensor_shape(input0_tensor_name) + input_dtype = model.get_tensor_datatype(input0_tensor_name) + input_val = gen_finn_dt_tensor(input_dtype, input_shape) + input_dict = {} + input_dict[input0_tensor_name] = input_val + + # Extend the model + model_extended = model.transform(ExtendPartition(extend_id)) + + assert oxe.compare_execution(model, model_extended, input_dict) + + # Check if FINN data_types are retained + for n in model_extended.graph.node: + if n.op_type == "Conv": + assert model_extended.get_tensor_datatype(n.input[1]) == DataType.INT4 diff --git a/tests/transformation/test_general_transformation.py b/tests/transformation/test_general_transformation.py index 825faf5..97536bd 100644 --- a/tests/transformation/test_general_transformation.py +++ b/tests/transformation/test_general_transformation.py @@ -138,12 +138,12 @@ def test_apply_config(): model = model.transform(GiveUniqueNodeNames()) # set up a config in a dict, then dump it to JSON config = {} - config["Defaults"] = {"kernel_size": [3, ["Im2Col"]]} - config["Im2Col_0"] = {"kernel_size": 7} + config["Defaults"] = {"kernel_size": [[3, 3], ["Im2Col"]]} + config["Im2Col_0"] = {"kernel_size": [7, 7]} with open("config.json", "w") as f: json.dump(config, f, indent=4) model = model.transform(ApplyConfig("config.json")) # check model - assert getCustomOp(model.graph.node[2]).get_nodeattr("kernel_size") == 7 - assert getCustomOp(model.graph.node[9]).get_nodeattr("kernel_size") == 3 + assert getCustomOp(model.graph.node[2]).get_nodeattr("kernel_size") == [7, 7] + assert getCustomOp(model.graph.node[9]).get_nodeattr("kernel_size") == [3, 3] os.remove("config.json") diff --git a/tests/transformation/test_generic_partitioning.py b/tests/transformation/test_generic_partitioning.py new file mode 100755 index 0000000..d5def96 --- /dev/null +++ b/tests/transformation/test_generic_partitioning.py @@ -0,0 +1,111 @@ +# Copyright (c) 2020 Xilinx, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of Xilinx nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest + +import numpy as np +from onnx import TensorProto, helper + +import finn.core.onnx_exec as oxe +from finn.core.modelwrapper import ModelWrapper +from finn.custom_op.registry import getCustomOp +from finn.transformation.create_generic_partitions import PartitionFromDict +from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames +from finn.transformation.infer_shapes import InferShapes + + +# select example partitioning +@pytest.mark.parametrize("p", [0, 1, 2, 3]) +def test_generic_partitioning(p): + partitionings = [ + {0: range(0, 4)}, + {0: [0], 1: [3]}, + {0: [1, 2]}, + {"first": [0, 1], "last": [2, 3]}, + ] + partitioning = partitionings[p] + + # set up model + shape = [1, 10] + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, shape) + a0 = helper.make_tensor_value_info("a0", TensorProto.FLOAT, []) + a1 = helper.make_tensor_value_info("a1", TensorProto.FLOAT, []) + a2 = helper.make_tensor_value_info("a2", TensorProto.FLOAT, []) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, shape) + + mul_node = helper.make_node("Mul", ["inp", "a0"], ["mul_out"]) + div_node = helper.make_node("Div", ["mul_out", "a1"], ["div_out"]) + sub_node = helper.make_node("Sub", ["div_out", "a2"], ["sub_out"]) + add_node = helper.make_node("Add", ["sub_out", "mul_out"], ["outp"]) + + graph = helper.make_graph( + nodes=[mul_node, div_node, sub_node, add_node], + name="model-graph", + inputs=[inp], + outputs=[outp], + value_info=[a0, a1, a2], + ) + + model = helper.make_model(graph, producer_name="model") + model = ModelWrapper(model) + # initialize model + a0_value = np.random.uniform(low=0, high=1, size=(1)).astype(np.float32) + model.set_initializer("a0", a0_value) + a1_value = np.random.uniform(low=0.1, high=1, size=(1)).astype(np.float32) + model.set_initializer("a1", a1_value) + a2_value = np.random.uniform(low=0.1, high=1, size=(1)).astype(np.float32) + model.set_initializer("a2", a2_value) + + model = model.transform(InferShapes()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + + # apply partitioning + model_parent = model.transform(PartitionFromDict(partitioning)) + + # random input data + inp_values = np.random.random_sample(shape).astype(np.float32) + idict = {model.graph.input[0].name: inp_values} + + # test transformed model + assert oxe.compare_execution(model, model_parent, idict) + + # examine created partitions + num_nodes_expected = len(model.graph.node) + for p_node in model_parent.get_nodes_by_op_type("GenericPartition"): + p_node = getCustomOp(p_node) + p_model_filename = p_node.get_nodeattr("model") + model_child = ModelWrapper(p_model_filename) + num_nodes_expected -= len(model_child.graph.node) - 1 + + # count number of partitions + assert len(model_parent.get_nodes_by_op_type("GenericPartition")) == len( + partitioning + ) + # count number of nodes + assert len(model_parent.graph.node) == num_nodes_expected diff --git a/tests/transformation/test_merge_onnx_models.py b/tests/transformation/test_merge_onnx_models.py index 46dd0ef..aab0802 100644 --- a/tests/transformation/test_merge_onnx_models.py +++ b/tests/transformation/test_merge_onnx_models.py @@ -81,7 +81,7 @@ def test_merge_onnx_models(): model2.set_initializer("a1", a1_value) # set a dummy sparsity annotation to check if it gets correctly transferred # to the merged model - sparsity = {"dw": {"kernel_shape": 0}} + sparsity = {"dw": {"kernel_shape": [0, 0]}} model2.set_tensor_sparsity("a1", sparsity) model2 = model2.transform(InferShapes()) model2 = model2.transform(InferDataTypes()) diff --git a/tests/util/test_data_packing.py b/tests/util/test_data_packing.py index 42086ca..13a791a 100644 --- a/tests/util/test_data_packing.py +++ b/tests/util/test_data_packing.py @@ -29,6 +29,7 @@ import numpy as np from finn.core.datatype import DataType +from finn.util.basic import gen_finn_dt_tensor from finn.util.data_packing import ( array2hexstring, finnpy_to_packed_bytearray, @@ -86,6 +87,23 @@ def test_finnpy_to_packed_bytearray(): assert (finnpy_to_packed_bytearray(E, DataType.INT32) == eE).all() +def test_finnpy_to_packed_bytearray_fastmode_binary(): + def test_fast_vs_slow_random(idt, ishape): + iarr = gen_finn_dt_tensor(idt, ishape) + ret_slow = finnpy_to_packed_bytearray( + iarr, idt, reverse_endian=True, reverse_inner=True, fast_mode=False + ) + ret_fast = finnpy_to_packed_bytearray( + iarr, idt, reverse_endian=True, reverse_inner=True, fast_mode=True + ) + assert (ret_fast == ret_slow).all() + + for i in range(5): + test_fast_vs_slow_random(DataType.BIPOLAR, (1, 8)) + test_fast_vs_slow_random(DataType.BINARY, (1, 16)) + test_fast_vs_slow_random(DataType.BIPOLAR, (10, 600)) + + def test_packed_bytearray_to_finnpy(): A = np.asarray([[14], [6]], dtype=np.uint8) eA = [[1, 1, 1, 0], [0, 1, 1, 0]]