Skip to content

Commit d6b0448

Browse files
[AArch64] Lower __builtin_bswap16 to rev16 if return value is 16-bit
Fixes #77222.
1 parent cedb828 commit d6b0448

File tree

3 files changed

+41
-5
lines changed

3 files changed

+41
-5
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22137,6 +22137,22 @@ static SDValue performExtendCombine(SDNode *N,
2213722137
N->getOperand(0)->getOpcode() == ISD::SETCC)
2213822138
return performSignExtendSetCCCombine(N, DCI, DAG);
2213922139

22140+
// If we see (any_extend (bswap ...)) with bswap returning an i16, we know
22141+
// that the top half of the result register must be unused, due to the
22142+
// any_extend. This means that we can replace this pattern with (rev16
22143+
// (any_extend ...)). This saves a machine instruction compared to (lsr (rev
22144+
// ...)), which is what this pattern would otherwise be lowered to.
22145+
if (N->getOpcode() == ISD::ANY_EXTEND &&
22146+
N->getOperand(0).getOpcode() == ISD::BSWAP &&
22147+
N->getOperand(0).getValueType().isScalarInteger() &&
22148+
N->getOperand(0).getValueType().getFixedSizeInBits() == 16) {
22149+
SDNode *BswapNode = N->getOperand(0).getNode();
22150+
SDValue NewAnyExtend = DAG.getNode(ISD::ANY_EXTEND, SDLoc(BswapNode),
22151+
EVT(MVT::i32), BswapNode->getOperand(0));
22152+
return DAG.getNode(AArch64ISD::REV16, SDLoc(N), N->getValueType(0),
22153+
NewAnyExtend);
22154+
}
22155+
2214022156
return SDValue();
2214122157
}
2214222158

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,8 @@ def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>;
758758
def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>;
759759
def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>;
760760

761+
def AArch64rev16_scalar : SDNode<"AArch64ISD::REV16", SDTIntUnaryOp>;
762+
761763
def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>;
762764
def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>;
763765
def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>;
@@ -2840,6 +2842,8 @@ def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
28402842
def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>;
28412843
def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>;
28422844

2845+
def : Pat<(AArch64rev16_scalar GPR32:$Rn), (REV16Wr GPR32:$Rn)>;
2846+
28432847
def : Pat<(or (and (srl GPR64:$Rn, (i64 8)), (i64 0x00ff00ff00ff00ff)),
28442848
(and (shl GPR64:$Rn, (i64 8)), (i64 0xff00ff00ff00ff00))),
28452849
(REV16Xr GPR64:$Rn)>;

llvm/test/CodeGen/AArch64/bswap.ll

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,32 @@
33
; RUN: llc -mtriple=aarch64 -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
44

55
; ====== Scalar Tests =====
6-
define i16 @bswap_i16(i16 %a){
7-
; CHECK-LABEL: bswap_i16:
6+
define i16 @bswap_i16_to_i16(i16 %a){
7+
; CHECK-SD-LABEL: bswap_i16_to_i16:
8+
; CHECK-SD: // %bb.0:
9+
; CHECK-SD-NEXT: rev16 w0, w0
10+
; CHECK-SD-NEXT: ret
11+
;
12+
; CHECK-GI-LABEL: bswap_i16_to_i16:
13+
; CHECK-GI: // %bb.0:
14+
; CHECK-GI-NEXT: rev w8, w0
15+
; CHECK-GI-NEXT: lsr w0, w8, #16
16+
; CHECK-GI-NEXT: ret
17+
%3 = call i16 @llvm.bswap.i16(i16 %a)
18+
ret i16 %3
19+
}
20+
declare i16 @llvm.bswap.i16(i16)
21+
22+
define i32 @bswap_i16_to_i32(i16 %a){
23+
; CHECK-LABEL: bswap_i16_to_i32:
824
; CHECK: // %bb.0:
925
; CHECK-NEXT: rev w8, w0
1026
; CHECK-NEXT: lsr w0, w8, #16
1127
; CHECK-NEXT: ret
12-
%3 = call i16 @llvm.bswap.i16(i16 %a)
13-
ret i16 %3
28+
%3 = call i16 @llvm.bswap.i16(i16 %a)
29+
%4 = zext i16 %3 to i32
30+
ret i32 %4
1431
}
15-
declare i16 @llvm.bswap.i16(i16)
1632

1733
define i32 @bswap_i32(i32 %a){
1834
; CHECK-LABEL: bswap_i32:

0 commit comments

Comments
 (0)