Skip to content

Commit 1b936e4

Browse files
authored
[AArch64] Add FEAT_SME_B16B16 and remove FEAT_B16B16 (#102501)
Implement FEAT_SME_B16B16 to enable ZA-targeting non-widening SME BFloat16 instructions. Remove the now redundant FEAT_B16B16 which has been replaced by FEAT_SVE_B16B16 and FEAT_SME_B16B16 (this commit), see #101480 for the details and reasoning of this change to LLVM. FEAT_SME_B16B16 is documented under the latest Armv9.4 feature documentation: https://developer.arm.com/documentation/109697/0100/Feature-descriptions/The-Armv9-4-architecture-extensio - Changes to Clang AArch64 frontend - Change target guard of SME2 ZA-targeting non-widening BFloat16 intrinsics to 'sme-b16b16' - Changes to LLVM AArch64 backend - llvm/lib/Target/AArch64/AArch64Features.td - Create FeatureSMEB16B16, which implies FeatureSME2 and FeatureSVEB16B16 - Remove FeatureB16B16 - Fix description of FeatureSVEB16B16 - llvm/lib/Target/AArch64/AArch64InstrInfo.td - Create HasSMEB16B16 predicate - llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td - Change predictication of SME2 ZA-targeting non-widening BFloat16 instructions to new HasSMEB16B16 - llvm/lib/Target/AArch64/AArch64.td - Add HasSMEB16B16 to SME2Unsupported (FEAT_SME_B16B16 implies FEAT_SME2) - llvm/lib/AArch64/AsmParser/AArch64AsmParser.cpp - Remove flag 'b16b16' mapping to removed FeatureB16B16 - Add flag 'sme-b16b16' mapping to new FeatureSMEB16B16 - Changes to LLVM unit tests - llvm/unittests/TargetParser/TargetParserTest.cpp - Add new sme-b16b16 flag to existing target parser tests - Add tests for the sme-b16b16 dependencies: - 'sme-b16b16' should enable 'sme2', 'sve-b16b16'. - Remove 'b16b16' from bf16 dependency test - Added MC tests - llvm/test/MC/AArch64/SME2p1 - To ensure that ZA-targeting multi-vector non-widening BFloat16 instructions are enabled by +sme-b16b16, and that this feature is removed by +nosme-b61b6. - Modidified tests - All CodeGen, Semantic, and MC tests that are effected by the removal of 'b16b16', have been modified to supply and/or expect 'sme-b16b16' where appropriate.
1 parent 3512bcc commit 1b936e4

32 files changed

+536
-509
lines changed

clang/include/clang/Basic/arm_sme.td

+3-3
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@ multiclass ZAAddSub<string n_suffix> {
325325
def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>;
326326
}
327327

328-
let SMETargetGuard = "sme2,b16b16" in {
328+
let SMETargetGuard = "sme-b16b16" in {
329329
def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>;
330330
def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>;
331331
}
@@ -506,7 +506,7 @@ let SMETargetGuard = "sme-f16f16" in {
506506
def SVMLS_LANE_VG1x4_F16 : Inst<"svmls_lane_za16[_f16]_vg1x4", "vm4di", "h", MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_7>]>;
507507
}
508508

509-
let SMETargetGuard = "sme2,b16b16" in {
509+
let SMETargetGuard = "sme-b16b16" in {
510510
def SVMLA_MULTI_VG1x2_BF16 : Inst<"svmla_za16[_bf16]_vg1x2", "vm22", "b", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>;
511511
def SVMLA_MULTI_VG1x4_BF16 : Inst<"svmla_za16[_bf16]_vg1x4", "vm44", "b", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>;
512512
def SVMLS_MULTI_VG1x2_BF16 : Inst<"svmls_za16[_bf16]_vg1x2", "vm22", "b", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsInOutZA], []>;
@@ -742,7 +742,7 @@ let SMETargetGuard = "sme2" in {
742742

743743
////////////////////////////////////////////////////////////////////////////////
744744
// SME2p1 - FMOPA, FMOPS (non-widening)
745-
let SMETargetGuard = "sme2,b16b16" in {
745+
let SMETargetGuard = "sme-b16b16" in {
746746
def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b",
747747
MergeNone, "aarch64_sme_mopa",
748748
[IsStreaming, IsInOutZA],

clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
2-
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
3-
// RUN: %clang_cc1 -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX
4-
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
5-
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX
2+
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
3+
// RUN: %clang_cc1 -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f8f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX
4+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f8f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
5+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX
66

7-
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -o /dev/null
7+
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -S -Werror -Wall -o /dev/null
88

99
// REQUIRES: aarch64-registered-target
1010

clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_fmlas16.c

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
2-
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
3-
// RUN: %clang_cc1 -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefix CHECK-CXX
4-
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
5-
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefix CHECK-CXX
2+
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
3+
// RUN: %clang_cc1 -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefix CHECK-CXX
4+
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
5+
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefix CHECK-CXX
66

7-
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall %s -o /dev/null
7+
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -O2 -S -Werror -Wall %s -o /dev/null
88

99
// REQUIRES: aarch64-registered-target
1010
#include <arm_sme.h>

clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
2-
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK
3-
// RUN: %clang_cc1 -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX
4-
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK
5-
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX
2+
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK
3+
// RUN: %clang_cc1 -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX
4+
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK
5+
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX
66

7-
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -o /dev/null %s
7+
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -S -O2 -Werror -o /dev/null %s
88

99
// REQUIRES: aarch64-registered-target
1010

clang/test/Driver/print-supported-extensions-aarch64.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
// CHECK-EMPTY:
66
// CHECK-NEXT: Name Architecture Feature(s) Description
77
// CHECK-NEXT: aes FEAT_AES, FEAT_PMULL Enable AES support
8-
// CHECK-NEXT: b16b16 FEAT_B16B16 Enable SME2.1 ZA-targeting non-widening BFloat16 to BFloat16 instructions
98
// CHECK-NEXT: bf16 FEAT_BF16 Enable BFloat16 Extension
109
// CHECK-NEXT: brbe FEAT_BRBE Enable Branch Record Buffer Extension
1110
// CHECK-NEXT: bti FEAT_BTI Enable Branch Target Identification
@@ -55,6 +54,7 @@
5554
// CHECK-NEXT: sha3 FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support
5655
// CHECK-NEXT: sm4 FEAT_SM4, FEAT_SM3 Enable SM3 and SM4 support
5756
// CHECK-NEXT: sme FEAT_SME Enable Scalable Matrix Extension (SME)
57+
// CHECK-NEXT: sme-b16b16 FEAT_SME_B16B16 Enable SME2.1 ZA-targeting non-widening BFloat16 instructions
5858
// CHECK-NEXT: sme-f16f16 FEAT_SME_F16F16 Enable SME non-widening Float16 instructions
5959
// CHECK-NEXT: sme-f64f64 FEAT_SME_F64F64 Enable Scalable Matrix Extension (SME) F64F64 instructions
6060
// CHECK-NEXT: sme-f8f16 FEAT_SME_F8F16 Enable Scalable Matrix Extension (SME) F8F16 instructions
@@ -71,7 +71,7 @@
7171
// CHECK-NEXT: ssve-fp8dot4 FEAT_SSVE_FP8DOT4 Enable SVE2 FP8 4-way dot product instructions
7272
// CHECK-NEXT: ssve-fp8fma FEAT_SSVE_FP8FMA Enable SVE2 FP8 multiply-add instructions
7373
// CHECK-NEXT: sve FEAT_SVE Enable Scalable Vector Extension (SVE) instructions
74-
// CHECK-NEXT: sve-b16b16 FEAT_SVE_B16B16 Enable SVE2.1 non-widening and SME2.1 Z-targeting non-widening BFloat16 to BFloat16 instructions
74+
// CHECK-NEXT: sve-b16b16 FEAT_SVE_B16B16 Enable SVE2 non-widening and SME2 Z-targeting non-widening BFloat16 instructions
7575
// CHECK-NEXT: sve2 FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions
7676
// CHECK-NEXT: sve2-aes FEAT_SVE_AES, FEAT_SVE_PMULL128 Enable AES SVE2 instructions
7777
// CHECK-NEXT: sve2-bitperm FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions

clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c

+4-4
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@ void test_features(uint32_t slice, svfloat16x2_t zn2, svfloat16x4_t zn4,
1515
// expected-error@+1 {{'svsub_za16_f16_vg1x4' needs target feature sme-f16f16|sme-f8f16}}
1616
svsub_za16_f16_vg1x4(slice, zn4);
1717

18-
// expected-error@+1 {{'svadd_za16_bf16_vg1x2' needs target feature sme2,b16b16}}
18+
// expected-error@+1 {{'svadd_za16_bf16_vg1x2' needs target feature sme-b16b16}}
1919
svadd_za16_bf16_vg1x2(slice, bzn2);
20-
// expected-error@+1 {{'svadd_za16_bf16_vg1x4' needs target feature sme2,b16b16}}
20+
// expected-error@+1 {{'svadd_za16_bf16_vg1x4' needs target feature sme-b16b16}}
2121
svadd_za16_bf16_vg1x4(slice, bzn4);
22-
// expected-error@+1 {{'svsub_za16_bf16_vg1x2' needs target feature sme2,b16b16}}
22+
// expected-error@+1 {{'svsub_za16_bf16_vg1x2' needs target feature sme-b16b16}}
2323
svsub_za16_bf16_vg1x2(slice, bzn2);
24-
// expected-error@+1 {{'svsub_za16_bf16_vg1x4' needs target feature sme2,b16b16}}
24+
// expected-error@+1 {{'svsub_za16_bf16_vg1x4' needs target feature sme-b16b16}}
2525
svsub_za16_bf16_vg1x4(slice, bzn4);
2626
}
2727

clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_fmlas16.c

+12-12
Original file line numberDiff line numberDiff line change
@@ -39,29 +39,29 @@ void test_features_f16f16(uint32_t slice,
3939
// expected-error@+1 {{'svmls_lane_za16_f16_vg1x4' needs target feature sme-f16f16}}
4040
svmls_lane_za16_f16_vg1x4(slice, zn4, zm, 7);
4141

42-
// expected-error@+1 {{'svmla_single_za16_bf16_vg1x2' needs target feature sme2,b16b16}}
42+
// expected-error@+1 {{'svmla_single_za16_bf16_vg1x2' needs target feature sme-b16b16}}
4343
svmla_single_za16_bf16_vg1x2(slice, bzn2, bzm);
44-
// expected-error@+1 {{'svmla_single_za16_bf16_vg1x4' needs target feature sme2,b16b16}}
44+
// expected-error@+1 {{'svmla_single_za16_bf16_vg1x4' needs target feature sme-b16b16}}
4545
svmla_single_za16_bf16_vg1x4(slice, bzn4, bzm);
46-
// expected-error@+1 {{'svmls_single_za16_bf16_vg1x2' needs target feature sme2,b16b16}}
46+
// expected-error@+1 {{'svmls_single_za16_bf16_vg1x2' needs target feature sme-b16b16}}
4747
svmls_single_za16_bf16_vg1x2(slice, bzn2, bzm);
48-
// expected-error@+1 {{'svmls_single_za16_bf16_vg1x4' needs target feature sme2,b16b16}}
48+
// expected-error@+1 {{'svmls_single_za16_bf16_vg1x4' needs target feature sme-b16b16}}
4949
svmls_single_za16_bf16_vg1x4(slice, bzn4, bzm);
50-
// expected-error@+1 {{'svmla_za16_bf16_vg1x2' needs target feature sme2,b16b16}}
50+
// expected-error@+1 {{'svmla_za16_bf16_vg1x2' needs target feature sme-b16b16}}
5151
svmla_za16_bf16_vg1x2(slice, bzn2, bzm2);
52-
// expected-error@+1 {{'svmla_za16_bf16_vg1x4' needs target feature sme2,b16b16}}
52+
// expected-error@+1 {{'svmla_za16_bf16_vg1x4' needs target feature sme-b16b16}}
5353
svmla_za16_bf16_vg1x4(slice, bzn4, bzm4);
54-
// expected-error@+1 {{'svmls_za16_bf16_vg1x2' needs target feature sme2,b16b16}}
54+
// expected-error@+1 {{'svmls_za16_bf16_vg1x2' needs target feature sme-b16b16}}
5555
svmls_za16_bf16_vg1x2(slice, bzn2, bzm2);
56-
// expected-error@+1 {{'svmls_za16_bf16_vg1x4' needs target feature sme2,b16b16}}
56+
// expected-error@+1 {{'svmls_za16_bf16_vg1x4' needs target feature sme-b16b16}}
5757
svmls_za16_bf16_vg1x4(slice, bzn4, bzm4);
58-
// expected-error@+1 {{'svmla_lane_za16_bf16_vg1x2' needs target feature sme2,b16b16}}
58+
// expected-error@+1 {{'svmla_lane_za16_bf16_vg1x2' needs target feature sme-b16b16}}
5959
svmla_lane_za16_bf16_vg1x2(slice, bzn2, bzm, 7);
60-
// expected-error@+1 {{'svmla_lane_za16_bf16_vg1x4' needs target feature sme2,b16b16}}
60+
// expected-error@+1 {{'svmla_lane_za16_bf16_vg1x4' needs target feature sme-b16b16}}
6161
svmla_lane_za16_bf16_vg1x4(slice, bzn4, bzm, 7);
62-
// expected-error@+1 {{'svmls_lane_za16_bf16_vg1x2' needs target feature sme2,b16b16}}
62+
// expected-error@+1 {{'svmls_lane_za16_bf16_vg1x2' needs target feature sme-b16b16}}
6363
svmls_lane_za16_bf16_vg1x2(slice, bzn2, bzm, 7);
64-
// expected-error@+1 {{'svmls_lane_za16_bf16_vg1x4' needs target feature sme2,b16b16}}
64+
// expected-error@+1 {{'svmls_lane_za16_bf16_vg1x4' needs target feature sme-b16b16}}
6565
svmls_lane_za16_bf16_vg1x4(slice, bzn4, bzm, 7);
6666
}
6767

clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ void test_features(svbool_t pn, svbool_t pm,
88
svfloat16_t zn, svfloat16_t zm,
99
svbfloat16_t znb, svbfloat16_t zmb)
1010
__arm_streaming __arm_inout("za") {
11-
// expected-error@+1 {{'svmopa_za16_bf16_m' needs target feature sme2,b16b16}}
11+
// expected-error@+1 {{'svmopa_za16_bf16_m' needs target feature sme-b16b16}}
1212
svmopa_za16_bf16_m(0, pn, pm, znb, zmb);
13-
// expected-error@+1 {{'svmops_za16_bf16_m' needs target feature sme2,b16b16}}
13+
// expected-error@+1 {{'svmops_za16_bf16_m' needs target feature sme-b16b16}}
1414
svmops_za16_bf16_m(0, pn, pm, znb, zmb);
1515
// expected-error@+1 {{'svmopa_za16_f16_m' needs target feature sme-f16f16}}
1616
svmopa_za16_f16_m(0, pn, pm, zn, zm);

llvm/lib/Target/AArch64/AArch64.td

+1-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def SME2p1Unsupported : AArch64Unsupported;
7777

7878
def SME2Unsupported : AArch64Unsupported {
7979
let F = !listconcat([HasSME2, HasSVE2orSME2, HasSVE2p1_or_HasSME2, HasSSVE_FP8FMA,
80-
HasSMEF8F16, HasSMEF8F32, HasSMEF16F16orSMEF8F16],
80+
HasSMEF8F16, HasSMEF8F32, HasSMEF16F16orSMEF8F16, HasSMEB16B16],
8181
SME2p1Unsupported.F);
8282
}
8383

llvm/lib/Target/AArch64/AArch64Features.td

+5-4
Original file line numberDiff line numberDiff line change
@@ -435,11 +435,12 @@ def FeatureMEC : Extension<"mec", "MEC", "FEAT_MEC",
435435
def FeatureSVE2p1: ExtensionWithMArch<"sve2p1", "SVE2p1", "FEAT_SVE2p1",
436436
"Enable Scalable Vector Extension 2.1 instructions", [FeatureSVE2]>;
437437

438-
def FeatureB16B16 : ExtensionWithMArch<"b16b16", "B16B16", "FEAT_B16B16",
439-
"Enable SME2.1 ZA-targeting non-widening BFloat16 to BFloat16 instructions", [FeatureBF16]>;
440-
441438
def FeatureSVEB16B16: ExtensionWithMArch<"sve-b16b16", "SVEB16B16", "FEAT_SVE_B16B16",
442-
"Enable SVE2.1 non-widening and SME2.1 Z-targeting non-widening BFloat16 to BFloat16 instructions">;
439+
"Enable SVE2 non-widening and SME2 Z-targeting non-widening BFloat16 instructions">;
440+
441+
def FeatureSMEB16B16 : ExtensionWithMArch<"sme-b16b16", "SMEB16B16", "FEAT_SME_B16B16",
442+
"Enable SME2.1 ZA-targeting non-widening BFloat16 instructions",
443+
[FeatureSME2, FeatureSVEB16B16]>;
443444

444445
def FeatureSMEF16F16 : ExtensionWithMArch<"sme-f16f16", "SMEF16F16", "FEAT_SME_F16F16",
445446
"Enable SME non-widening Float16 instructions", [FeatureSME2]>;

0 commit comments

Comments
 (0)