diff --git a/ML-Frameworks/pytorch-aarch64/CHANGELOG.md b/ML-Frameworks/pytorch-aarch64/CHANGELOG.md index 025002e..97be37d 100644 --- a/ML-Frameworks/pytorch-aarch64/CHANGELOG.md +++ b/ML-Frameworks/pytorch-aarch64/CHANGELOG.md @@ -7,18 +7,31 @@ where `YY` is the year, and `MM` the month of the increment. ## [unreleased] +### Added + +### Changed + +### Removed + +### Fixed + +## [r25.02] 2025-02-11 +https://github.com/ARM-software/Tool-Solutions/tree/pytorch-aarch64--r25.02 + ### Added - Adds work-in-progress PyTorch PRs: - - 145942 3d05899222da2b93ed3d4c88c382d318e68eeec6 - Enable qlinear_dynamic path for AArch64 through Arm Compute Library directly. - - 146476 8cacbf8a58ba43bb51340ba69204be342b368cf5 - Improve KleidiAI 4 bit kernel performance - - 143666 8e5134e9c22cdb6150e425bee43015998ae55c59 - Extend Vec backend with SVE BF16 + - https://github.com/pytorch/pytorch/pull/145942 - Enable qlinear_dynamic path for AArch64 through Arm Compute Library directly. Gives ~15% speed up on approach in Tool Solutions 24.12. + - https://github.com/pytorch/pytorch/pull/146476 - Improve KleidiAI 4 bit kernel performance. Greater than 10% performance improvment when calling INT4 KleidiAI kernels + - https://github.com/pytorch/pytorch/pull/143666 - Extend Vec backend with SVE BF16. - Adds work-in-progress oneDNN PRs: - - 2502 49ac258a43520562a196ba081a3c259ac3732df2 - cpu: aarch64: ip: Allow bf16 for ACL inner product + - https://github.com/oneapi-src/oneDNN/pull/2502 - cpu: aarch64: ip: Allow bf16 for ACL inner product. Gives speedups of ~170x for BERT_pytorch and ~160x for alexnet using bf16 compile mode. - Minor improvements to build process and logging. +- OpenBLAS build from source at 1b85b6a396c94e78c9ba14aafcdfd5c5da5a8bb2 from develop branch. + This includes https://github.com/OpenMathLib/OpenBLAS/pull/5108 to add SBGEMM support for 256bit SVE. ### Changed - Updates hashes for: - - PyTorch to 8d4926e30a944320adf434016129cb6788eff79b, from viable/strict + - PyTorch to 8d4926e30a944320adf434016129cb6788eff79b (2.7.0.dev20250115), from viable/strict - ideep to e026f3b0318087fe19e2b062e8edf55bfe7a522c, from ideep_pytorch - oneDNN to 0fd3b73a25d11106e141ddefd19fcacc74f8bbfe, from main - Arm Compute Library to 6acccf1730b48c9a22155998fc4b2e0752472148. from main @@ -31,11 +44,13 @@ where `YY` is the year, and `MM` the month of the increment. ### Removed - Removes patches that are now merged upstream. +- Removes https://github.com/pytorch/pytorch/pull/139387 - Add prepacking for linear weights. Performance gains better realised by ideep reorder caching. ### Fixed - Addition of https://github.com/pytorch/pytorch/pull/145486 fixes illigal instruction on non-SVE targets. ## [r24.12] 2024-12-20 +https://github.com/ARM-software/Tool-Solutions/tree/pytorch-aarch64--r24.12 ### Added - Adds torchao. diff --git a/ML-Frameworks/pytorch-aarch64/build-wheel.sh b/ML-Frameworks/pytorch-aarch64/build-wheel.sh index e349bab..f234db3 100755 --- a/ML-Frameworks/pytorch-aarch64/build-wheel.sh +++ b/ML-Frameworks/pytorch-aarch64/build-wheel.sh @@ -87,7 +87,7 @@ if ! docker container inspect $TORCH_BUILD_CONTAINER >/dev/null 2>&1 ; then docker exec -t $TORCH_BUILD_CONTAINER bash -c "yum install -y tbb tbb-devel" # This must be in this if block because it cannot handle being called twice - docker exec -t $TORCH_BUILD_CONTAINER bash -c "bash $UTILS/install_openblas.sh" + docker exec -t $TORCH_BUILD_CONTAINER bash -c "bash $UTILS/build_openblas.sh" echo "Storing torch build container id in $TORCH_BUILD_CONTAINER_ID_FILE for reuse: $TORCH_BUILD_CONTAINER" echo $TORCH_BUILD_CONTAINER > "$TORCH_BUILD_CONTAINER_ID_FILE" diff --git a/ML-Frameworks/pytorch-aarch64/get-source.sh b/ML-Frameworks/pytorch-aarch64/get-source.sh index eed9237..44a40e9 100755 --- a/ML-Frameworks/pytorch-aarch64/get-source.sh +++ b/ML-Frameworks/pytorch-aarch64/get-source.sh @@ -37,12 +37,6 @@ git-shallow-clone https://github.com/pytorch/pytorch.git $PYTORCH_HASH apply-github-patch https://github.com/pytorch/pytorch 139887 eff3c11b1a31f725b50020ce32f6eddba17b5a94 # Use s8s8s8 for qlinear on aarch64 instead of u8s8u8 with mkl-dnn apply-github-patch https://github.com/pytorch/pytorch 136850 6d5aaff8434203f870d76d840158d6989ddd61d0 # Enable XNNPACK for quantized add apply-github-patch https://github.com/pytorch/pytorch 142391 8373846f441381a56e7abd905af84102aa52fc7b # parallelize sort - apply-github-patch https://github.com/pytorch/pytorch 139387 4140ee51b77d11966fc6d584f11eb15a45ec63c7 # Add prepacking for linear weights - apply-github-patch https://github.com/pytorch/pytorch 139387 c71443890e6d5538168d4fb2d760200fd9fef8d4 # Add prepacking for linear weights - apply-github-patch https://github.com/pytorch/pytorch 139387 3759c98aced224d9aa987e80db46a22355001cf5 # Add prepacking for linear weights - apply-github-patch https://github.com/pytorch/pytorch 139387 a7ff8b73fc5a44f8842853fe25a53695ecca92f2 # Add prepacking for linear weights - apply-github-patch https://github.com/pytorch/pytorch 139387 d4512a8ea1748ca15c751c2e87d177ef1c7094da # Add prepacking for linear weights - apply-github-patch https://github.com/pytorch/pytorch 139387 d4145ece750e0737b0d1e73677d658817c4b64da # Add prepacking for linear weights apply-github-patch https://github.com/pytorch/pytorch 140159 8d3404ec5972528f606fe605887ad2254a174fbc # cpu: aarch64: enable gemm-bf16f32 apply-github-patch https://github.com/pytorch/pytorch 140159 ab4c191ef0de1e4eced6b4dd7b6e387f57034ad9 # cpu: aarch64: enable gemm-bf16f32 apply-github-patch https://github.com/pytorch/pytorch 140159 879ca72d54559a388db315eed40803d2f1c827b7 # cpu: aarch64: enable gemm-bf16f32 diff --git a/ML-Frameworks/pytorch-aarch64/utils/install_openblas.sh b/ML-Frameworks/pytorch-aarch64/utils/build_openblas.sh similarity index 80% rename from ML-Frameworks/pytorch-aarch64/utils/install_openblas.sh rename to ML-Frameworks/pytorch-aarch64/utils/build_openblas.sh index be82b79..12950d8 100755 --- a/ML-Frameworks/pytorch-aarch64/utils/install_openblas.sh +++ b/ML-Frameworks/pytorch-aarch64/utils/build_openblas.sh @@ -25,16 +25,11 @@ source /utils/helper.sh set -ex +OPENBLAS_HASH="1b85b6a396c94e78c9ba14aafcdfd5c5da5a8bb2" OPENBLAS_CHECKOUT_DIR="OpenBLAS" cd / -git clone https://github.com/OpenMathLib/OpenBLAS.git -b develop --depth 1 -( - cd $OPENBLAS_CHECKOUT_DIR - apply-github-patch https://github.com/OpenMathLib/OpenBLAS/ 5108 4379a6fbe37038082c657bba5be5c67331a0bd0b - apply-github-patch https://github.com/OpenMathLib/OpenBLAS/ 5108 c748e6a33871f0dfa3bf6569c88a676c9a387411 - cd / -) +git-shallow-clone https://github.com/OpenMathLib/OpenBLAS.git $OPENBLAS_HASH OPENBLAS_BUILD_FLAGS=" NUM_THREADS=128