From 69898c4102b699a1f48121573ec61881fe933afe Mon Sep 17 00:00:00 2001 From: guochen2 Date: Fri, 14 Feb 2025 02:26:10 -0500 Subject: [PATCH] Revert "[AMDGPU][True16][CodeGen] true16 codegen pattern for fma (#122950)" This reverts commit 2a7487cc2e0fb8bd91784e2d9636a65baa6d90ed. --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 2 - llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 74 ++-- llvm/lib/Target/AMDGPU/SIInstructions.td | 8 - .../Target/AMDGPU/SIShrinkInstructions.cpp | 17 +- llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll | 68 ++-- .../CodeGen/AMDGPU/fix-sgpr-copies-f16.mir | 3 +- llvm/test/CodeGen/AMDGPU/fma.f16.ll | 328 +++++------------- .../CodeGen/AMDGPU/shrink-mad-fma-fake16.mir | 242 ------------- .../CodeGen/AMDGPU/shrink-mad-fma-gfx10.mir | 258 -------------- llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir | 115 +++++- 10 files changed, 244 insertions(+), 871 deletions(-) delete mode 100644 llvm/test/CodeGen/AMDGPU/shrink-mad-fma-fake16.mir delete mode 100644 llvm/test/CodeGen/AMDGPU/shrink-mad-fma-gfx10.mir diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 9cc74a7acd8ae..d8f3f9c54abc1 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -198,8 +198,6 @@ static unsigned macToMad(unsigned Opc) { return AMDGPU::V_FMA_F32_e64; case AMDGPU::V_FMAC_F16_e64: return AMDGPU::V_FMA_F16_gfx9_e64; - case AMDGPU::V_FMAC_F16_t16_e64: - return AMDGPU::V_FMA_F16_gfx9_t16_e64; case AMDGPU::V_FMAC_F16_fake16_e64: return AMDGPU::V_FMA_F16_gfx9_fake16_e64; case AMDGPU::V_FMAC_LEGACY_F32_e64: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 0a01ee1dc3a71..baacb5d3d5455 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3544,7 +3544,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 || Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 || - Opc == AMDGPU::V_FMAC_F16_t16_e64 || Opc == AMDGPU::V_FMAC_F16_fake16_e64) { // Don't fold if we are using source or output modifiers. The new VOP2 // instructions don't have them. @@ -3565,7 +3564,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, bool IsFMA = Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 || - Opc == AMDGPU::V_FMAC_F16_t16_e64 || Opc == AMDGPU::V_FMAC_F16_fake16_e64; MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1); MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2); @@ -3599,19 +3597,16 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned NewOpc = IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32 - : ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts() - ? AMDGPU::V_FMAMK_F16_t16 - : AMDGPU::V_FMAMK_F16_fake16 + : ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16 : AMDGPU::V_FMAMK_F16) : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16); if (pseudoToMCOpcode(NewOpc) == -1) return false; - // V_FMAMK_F16_t16 takes VGPR_16_Lo128 operands while V_FMAMK_F16_fake16 - // takes VGPR_32_Lo128 operands, so the rewrite would also require - // restricting their register classes. For now just bail out. - if (NewOpc == AMDGPU::V_FMAMK_F16_t16 || - NewOpc == AMDGPU::V_FMAMK_F16_fake16) + // V_FMAMK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite + // would also require restricting their register classes. For now + // just bail out. + if (NewOpc == AMDGPU::V_FMAMK_F16_fake16) return false; const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1); @@ -3626,7 +3621,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Src0->setIsKill(RegSrc->isKill()); if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 || - Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 || + Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64) UseMI.untieRegOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); @@ -3681,26 +3676,23 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned NewOpc = IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32 - : ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts() - ? AMDGPU::V_FMAAK_F16_t16 - : AMDGPU::V_FMAAK_F16_fake16 + : ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16 : AMDGPU::V_FMAAK_F16) : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16); if (pseudoToMCOpcode(NewOpc) == -1) return false; - // V_FMAAK_F16_t16 takes VGPR_16_Lo128 operands while V_FMAAK_F16_fake16 - // takes VGPR_32_Lo128 operands, so the rewrite would also require - // restricting their register classes. For now just bail out. - if (NewOpc == AMDGPU::V_FMAAK_F16_t16 || - NewOpc == AMDGPU::V_FMAAK_F16_fake16) + // V_FMAAK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite + // would also require restricting their register classes. For now + // just bail out. + if (NewOpc == AMDGPU::V_FMAAK_F16_fake16) return false; // FIXME: This would be a lot easier if we could return a new instruction // instead of having to modify in place. if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 || - Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 || + Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64) UseMI.untieRegOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); @@ -3887,11 +3879,8 @@ static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc) { return AMDGPU::V_FMA_LEGACY_F32_e64; case AMDGPU::V_FMAC_F16_e32: case AMDGPU::V_FMAC_F16_e64: - case AMDGPU::V_FMAC_F16_t16_e64: case AMDGPU::V_FMAC_F16_fake16_e64: - return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts() - ? AMDGPU::V_FMA_F16_gfx9_t16_e64 - : AMDGPU::V_FMA_F16_gfx9_fake16_e64 + return ST.hasTrue16BitInsts() ? AMDGPU::V_FMA_F16_gfx9_fake16_e64 : AMDGPU::V_FMA_F16_gfx9_e64; case AMDGPU::V_FMAC_F32_e32: case AMDGPU::V_FMAC_F32_e64: @@ -3957,22 +3946,19 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, return MIB; } - assert(Opc != AMDGPU::V_FMAC_F16_t16_e32 && - Opc != AMDGPU::V_FMAC_F16_fake16_e32 && - "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be " - "present " - "pre-RA"); + assert( + Opc != AMDGPU::V_FMAC_F16_fake16_e32 && + "V_FMAC_F16_fake16_e32 is not supported and not expected to be present " + "pre-RA"); // Handle MAC/FMAC. bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 || - Opc == AMDGPU::V_FMAC_F16_t16_e64 || Opc == AMDGPU::V_FMAC_F16_fake16_e64; bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 || Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 || - Opc == AMDGPU::V_FMAC_F16_t16_e64 || Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64; bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64; @@ -3987,7 +3973,6 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, return nullptr; case AMDGPU::V_MAC_F16_e64: case AMDGPU::V_FMAC_F16_e64: - case AMDGPU::V_FMAC_F16_t16_e64: case AMDGPU::V_FMAC_F16_fake16_e64: case AMDGPU::V_MAC_F32_e64: case AMDGPU::V_MAC_LEGACY_F32_e64: @@ -4073,11 +4058,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, int64_t Imm; if (!Src0Literal && getFoldableImm(Src2, Imm, &DefMI)) { unsigned NewOpc = - IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() - ? ST.useRealTrue16Insts() - ? AMDGPU::V_FMAAK_F16_t16 - : AMDGPU::V_FMAAK_F16_fake16 - : AMDGPU::V_FMAAK_F16) + IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16 + : AMDGPU::V_FMAAK_F16) : AMDGPU::V_FMAAK_F32) : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32); if (pseudoToMCOpcode(NewOpc) != -1) { @@ -4094,14 +4076,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, return MIB; } } - unsigned NewOpc = IsFMA - ? (IsF16 ? (ST.hasTrue16BitInsts() - ? ST.useRealTrue16Insts() - ? AMDGPU::V_FMAMK_F16_t16 - : AMDGPU::V_FMAMK_F16_fake16 - : AMDGPU::V_FMAMK_F16) - : AMDGPU::V_FMAMK_F32) - : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32); + unsigned NewOpc = + IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16 + : AMDGPU::V_FMAMK_F16) + : AMDGPU::V_FMAMK_F32) + : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32); if (!Src0Literal && getFoldableImm(Src1, Imm, &DefMI)) { if (pseudoToMCOpcode(NewOpc) != -1) { MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) @@ -4547,7 +4526,6 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI, case AMDGPU::V_MAC_F32_e64: case AMDGPU::V_MAC_LEGACY_F32_e64: case AMDGPU::V_FMAC_F16_e64: - case AMDGPU::V_FMAC_F16_t16_e64: case AMDGPU::V_FMAC_F16_fake16_e64: case AMDGPU::V_FMAC_F32_e64: case AMDGPU::V_FMAC_F64_e64: @@ -5604,9 +5582,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64; case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64; case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64; - case AMDGPU::S_FMAC_F16: - return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64 - : AMDGPU::V_FMAC_F16_fake16_e64; + case AMDGPU::S_FMAC_F16: return AMDGPU::V_FMAC_F16_fake16_e64; case AMDGPU::S_FMAMK_F32: return AMDGPU::V_FMAMK_F32; case AMDGPU::S_FMAAK_F32: return AMDGPU::V_FMAAK_F32; case AMDGPU::S_CMP_LT_F32: return AMDGPU::V_CMP_LT_F32_e64; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 3faf0795157dc..6e08aff24ec23 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3287,14 +3287,6 @@ def : GCNPat < (V_FMAC_F16_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2) >; -let True16Predicate = UseRealTrue16Insts in -def : GCNPat < - (fma (f16 (VOP3NoMods f16:$src0)), - (f16 (VOP3NoMods f16:$src1)), - (f16 (VOP3NoMods f16:$src2))), - (V_FMAC_F16_t16_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, - SRCMODS.NONE, $src2) ->; let True16Predicate = UseFakeTrue16Insts in def : GCNPat < (fma (f16 (VOP3NoMods f16:$src0)), diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index f03cde455f295..979812e07fc3f 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -455,13 +455,9 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const { break; case AMDGPU::V_FMA_F16_e64: case AMDGPU::V_FMA_F16_gfx9_e64: - NewOpcode = AMDGPU::V_FMAAK_F16; - break; - case AMDGPU::V_FMA_F16_gfx9_t16_e64: - NewOpcode = AMDGPU::V_FMAAK_F16_t16; - break; case AMDGPU::V_FMA_F16_gfx9_fake16_e64: - NewOpcode = AMDGPU::V_FMAAK_F16_fake16; + NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16 + : AMDGPU::V_FMAAK_F16; break; } } @@ -489,13 +485,9 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const { break; case AMDGPU::V_FMA_F16_e64: case AMDGPU::V_FMA_F16_gfx9_e64: - NewOpcode = AMDGPU::V_FMAMK_F16; - break; - case AMDGPU::V_FMA_F16_gfx9_t16_e64: - NewOpcode = AMDGPU::V_FMAMK_F16_t16; - break; case AMDGPU::V_FMA_F16_gfx9_fake16_e64: - NewOpcode = AMDGPU::V_FMAMK_F16_fake16; + NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16 + : AMDGPU::V_FMAMK_F16; break; } } @@ -967,7 +959,6 @@ bool SIShrinkInstructions::run(MachineFunction &MF) { MI.getOpcode() == AMDGPU::V_MAD_F16_e64 || MI.getOpcode() == AMDGPU::V_FMA_F16_e64 || MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 || - MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_t16_e64 || MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64) { shrinkMadFma(MI); continue; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll index 0b09cabf25a16..99e6c5d06a0e1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll @@ -3,8 +3,7 @@ ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s define float @v_fma_f32(float %x, float %y, float %z) { ; GFX6-LABEL: v_fma_f32: @@ -108,18 +107,11 @@ define half @v_fma_f16(half %x, half %y, half %z) { ; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: v_fma_f16: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_fmac_f16_e32 v2.l, v0.l, v1.l -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v2 -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: v_fma_f16: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_fma_f16 v0, v0, v1, v2 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: v_fma_f16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX11-NEXT: s_setpc_b64 s[30:31] %fma = call half @llvm.fma.f16(half %x, half %y, half %z) ret half %fma } @@ -153,17 +145,11 @@ define half @v_fma_f16_fneg_lhs(half %x, half %y, half %z) { ; GFX10-NEXT: v_fma_f16 v0, -v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: v_fma_f16_fneg_lhs: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, -v0.l, v1.l, v2.l -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: v_fma_f16_fneg_lhs: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_fma_f16 v0, -v0, v1, v2 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: v_fma_f16_fneg_lhs: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_fma_f16 v0, -v0, v1, v2 +; GFX11-NEXT: s_setpc_b64 s[30:31] %neg.x = fneg half %x %fma = call half @llvm.fma.f16(half %neg.x, half %y, half %z) ret half %fma @@ -198,17 +184,11 @@ define half @v_fma_f16_fneg_rhs(half %x, half %y, half %z) { ; GFX10-NEXT: v_fma_f16 v0, v0, -v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: v_fma_f16_fneg_rhs: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, -v1.l, v2.l -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: v_fma_f16_fneg_rhs: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_fma_f16 v0, v0, -v1, v2 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: v_fma_f16_fneg_rhs: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_fma_f16 v0, v0, -v1, v2 +; GFX11-NEXT: s_setpc_b64 s[30:31] %neg.y = fneg half %y %fma = call half @llvm.fma.f16(half %x, half %neg.y, half %z) ret half %fma @@ -243,17 +223,11 @@ define half @v_fma_f16_fneg_add(half %x, half %y, half %z) { ; GFX10-NEXT: v_fma_f16 v0, v0, v1, -v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: v_fma_f16_fneg_add: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, v1.l, -v2.l -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: v_fma_f16_fneg_add: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_fma_f16 v0, v0, v1, -v2 -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: v_fma_f16_fneg_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_fma_f16 v0, v0, v1, -v2 +; GFX11-NEXT: s_setpc_b64 s[30:31] %neg.z = fneg half %z %fma = call half @llvm.fma.f16(half %x, half %y, half %neg.z) ret half %fma diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir index 23e4b80b61f69..ac7944f25fe37 100644 --- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir +++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 -# FIXME-TRUE16. reenable after fix-sgpr-copies is fixed for true16 flow -# XUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,REAL16 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,REAL16 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,FAKE16 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/fma.f16.ll b/llvm/test/CodeGen/AMDGPU/fma.f16.ll index a33fd03e0ce03..52a23690dcf53 100644 --- a/llvm/test/CodeGen/AMDGPU/fma.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fma.f16.ll @@ -3,10 +3,8 @@ ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX9,GFX9-GISEL ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX10,GFX10-SDAG ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX10,GFX10-GISEL -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11-SDAG-TRUE16 -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11-SDAG-FAKE16 -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11-GISEL-TRUE16 -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11-GISEL-FAKE16 +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11,GFX11-SDAG +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11,GFX11-GISEL ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX12,GFX12-SDAG ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX12,GFX12-GISEL @@ -26,34 +24,11 @@ define half @test_fma(half %x, half %y, half %z) { ; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: test_fma: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, v0.h, v1.l -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: test_fma: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_fma_f16 v0, v0, v1, v2 -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-TRUE16-LABEL: test_fma: -; GFX11-GISEL-TRUE16: ; %bb.0: -; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-TRUE16-NEXT: v_fmac_f16_e32 v2.l, v0.l, v1.l -; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, v2 -; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-FAKE16-LABEL: test_fma: -; GFX11-GISEL-FAKE16: ; %bb.0: -; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-FAKE16-NEXT: v_fma_f16 v0, v0, v1, v2 -; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: test_fma: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_fma: ; GFX12: ; %bb.0: @@ -82,31 +57,11 @@ define half @test_fmac(half %x, half %y, half %z) { ; GFX10-NEXT: v_fmac_f16_e32 v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: test_fmac: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_fmac_f16_e32 v0.l, v1.l, v1.h -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: test_fmac: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_fmac_f16_e32 v0, v1, v2 -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-TRUE16-LABEL: test_fmac: -; GFX11-GISEL-TRUE16: ; %bb.0: -; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-TRUE16-NEXT: v_fmac_f16_e32 v0.l, v1.l, v2.l -; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-FAKE16-LABEL: test_fmac: -; GFX11-GISEL-FAKE16: ; %bb.0: -; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-FAKE16-NEXT: v_fmac_f16_e32 v0, v1, v2 -; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: test_fmac: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_fmac_f16_e32 v0, v1, v2 +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_fmac: ; GFX12: ; %bb.0: @@ -143,31 +98,11 @@ define half @test_fmaak(half %x, half %y, half %z) { ; GFX10-NEXT: v_fmaak_f16 v0, v0, v1, 0x4200 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: test_fmaak: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_fmaak_f16 v0.l, v0.l, v0.h, 0x4200 -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: test_fmaak: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_fmaak_f16 v0, v0, v1, 0x4200 -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-TRUE16-LABEL: test_fmaak: -; GFX11-GISEL-TRUE16: ; %bb.0: -; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-TRUE16-NEXT: v_fmaak_f16 v0.l, v0.l, v1.l, 0x4200 -; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-FAKE16-LABEL: test_fmaak: -; GFX11-GISEL-FAKE16: ; %bb.0: -; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-FAKE16-NEXT: v_fmaak_f16 v0, v0, v1, 0x4200 -; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: test_fmaak: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_fmaak_f16 v0, v0, v1, 0x4200 +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_fmaak: ; GFX12: ; %bb.0: @@ -204,33 +139,11 @@ define half @test_fmamk(half %x, half %y, half %z) { ; GFX10-NEXT: v_fmamk_f16 v0, v0, 0x4200, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: test_fmamk: -; GFX11-SDAG-TRUE16: ; %bb.0: -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0x4200, v0.h -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: test_fmamk: -; GFX11-SDAG-FAKE16: ; %bb.0: -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_fmamk_f16 v0, v0, 0x4200, v2 -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-TRUE16-LABEL: test_fmamk: -; GFX11-GISEL-TRUE16: ; %bb.0: -; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-TRUE16-NEXT: v_fmac_f16_e32 v2.l, 0x4200, v0.l -; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, v2 -; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-FAKE16-LABEL: test_fmamk: -; GFX11-GISEL-FAKE16: ; %bb.0: -; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-FAKE16-NEXT: v_fmamk_f16 v0, v0, 0x4200, v2 -; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: test_fmamk: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_fmamk_f16 v0, v0, 0x4200, v2 +; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_fmamk: ; GFX12: ; %bb.0: @@ -295,61 +208,33 @@ define i32 @test_D139469_f16(half %arg) { ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: test_D139469_f16: -; GFX11-SDAG-TRUE16: ; %bb.0: ; %bb -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0x211e -; GFX11-SDAG-TRUE16-NEXT: v_mul_f16_e32 v1.l, 0x291e, v0.l -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_fmac_f16_e32 v0.h, 0x291e, v0.l -; GFX11-SDAG-TRUE16-NEXT: v_min_f16_e32 v0.l, v1.l, v0.h -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v0.l -; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: test_D139469_f16: -; GFX11-SDAG-FAKE16: ; %bb.0: ; %bb -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0x211e -; GFX11-SDAG-FAKE16-NEXT: v_mul_f16_e32 v2, 0x291e, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_fmac_f16_e32 v1, 0x291e, v0 -; GFX11-SDAG-FAKE16-NEXT: v_min_f16_e32 v0, v2, v1 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v0 -; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-TRUE16-LABEL: test_D139469_f16: -; GFX11-GISEL-TRUE16: ; %bb.0: ; %bb -; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0x211e -; GFX11-GISEL-TRUE16-NEXT: v_mul_f16_e32 v1.l, 0x291e, v0.l -; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-GISEL-TRUE16-NEXT: v_fmac_f16_e32 v0.h, 0x291e, v0.l -; GFX11-GISEL-TRUE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v1.l -; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-TRUE16-NEXT: v_cmp_gt_f16_e64 s0, 0, v0.h -; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s0, vcc_lo, s0 -; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 -; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-FAKE16-LABEL: test_D139469_f16: -; GFX11-GISEL-FAKE16: ; %bb.0: ; %bb -; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0x211e -; GFX11-GISEL-FAKE16-NEXT: v_mul_f16_e32 v2, 0x291e, v0 -; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-GISEL-FAKE16-NEXT: v_fmac_f16_e32 v1, 0x291e, v0 -; GFX11-GISEL-FAKE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v2 -; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-FAKE16-NEXT: v_cmp_gt_f16_e64 s0, 0, v1 -; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s0, vcc_lo, s0 -; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 -; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: test_D139469_f16: +; GFX11-SDAG: ; %bb.0: ; %bb +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0x211e +; GFX11-SDAG-NEXT: v_mul_f16_e32 v2, 0x291e, v0 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_fmac_f16_e32 v1, 0x291e, v0 +; GFX11-SDAG-NEXT: v_min_f16_e32 v0, v2, v1 +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: test_D139469_f16: +; GFX11-GISEL: ; %bb.0: ; %bb +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0x211e +; GFX11-GISEL-NEXT: v_mul_f16_e32 v2, 0x291e, v0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_fmac_f16_e32 v1, 0x291e, v0 +; GFX11-GISEL-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-NEXT: v_cmp_gt_f16_e64 s0, 0, v1 +; GFX11-GISEL-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-LABEL: test_D139469_f16: ; GFX12-SDAG: ; %bb.0: ; %bb @@ -462,83 +347,44 @@ define <2 x i32> @test_D139469_v2f16(<2 x half> %arg) { ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-TRUE16-LABEL: test_D139469_v2f16: -; GFX11-SDAG-TRUE16: ; %bb.0: ; %bb -; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: s_movk_i32 s0, 0x211e -; GFX11-SDAG-TRUE16-NEXT: v_pk_mul_f16 v1, 0x291e, v0 op_sel_hi:[0,1] -; GFX11-SDAG-TRUE16-NEXT: v_pk_fma_f16 v0, 0x291e, v0, s0 op_sel_hi:[0,1,0] -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_pk_min_f16 v0, v1, v0 -; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX11-SDAG-TRUE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v0.l -; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX11-SDAG-TRUE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v1.l -; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo -; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SDAG-FAKE16-LABEL: test_D139469_v2f16: -; GFX11-SDAG-FAKE16: ; %bb.0: ; %bb -; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: s_movk_i32 s0, 0x211e -; GFX11-SDAG-FAKE16-NEXT: v_pk_mul_f16 v1, 0x291e, v0 op_sel_hi:[0,1] -; GFX11-SDAG-FAKE16-NEXT: v_pk_fma_f16 v0, 0x291e, v0, s0 op_sel_hi:[0,1,0] -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_pk_min_f16 v0, v1, v0 -; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX11-SDAG-FAKE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v0 -; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX11-SDAG-FAKE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v1 -; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo -; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-TRUE16-LABEL: test_D139469_v2f16: -; GFX11-GISEL-TRUE16: ; %bb.0: ; %bb -; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x211e211e -; GFX11-GISEL-TRUE16-NEXT: v_pk_mul_f16 v2, 0x291e291e, v0 -; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-GISEL-TRUE16-NEXT: v_pk_fma_f16 v0, 0x291e291e, v0, v1 -; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX11-GISEL-TRUE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v2.l -; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) -; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX11-GISEL-TRUE16-NEXT: v_cmp_gt_f16_e64 s0, 0, v0.l -; GFX11-GISEL-TRUE16-NEXT: v_cmp_gt_f16_e64 s1, 0, v1.l -; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-GISEL-TRUE16-NEXT: v_cmp_gt_f16_e64 s2, 0, v3.l -; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s0, vcc_lo, s0 -; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 -; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s0, s1, s2 -; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 -; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-FAKE16-LABEL: test_D139469_v2f16: -; GFX11-GISEL-FAKE16: ; %bb.0: ; %bb -; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0x211e211e -; GFX11-GISEL-FAKE16-NEXT: v_pk_mul_f16 v2, 0x291e291e, v0 -; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-GISEL-FAKE16-NEXT: v_pk_fma_f16 v0, 0x291e291e, v0, v1 -; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX11-GISEL-FAKE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v2 -; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) -; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX11-GISEL-FAKE16-NEXT: v_cmp_gt_f16_e64 s0, 0, v0 -; GFX11-GISEL-FAKE16-NEXT: v_cmp_gt_f16_e64 s1, 0, v1 -; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-GISEL-FAKE16-NEXT: v_cmp_gt_f16_e64 s2, 0, v3 -; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s0, vcc_lo, s0 -; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 -; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s0, s1, s2 -; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 -; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-LABEL: test_D139469_v2f16: +; GFX11-SDAG: ; %bb.0: ; %bb +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x211e +; GFX11-SDAG-NEXT: v_pk_mul_f16 v1, 0x291e, v0 op_sel_hi:[0,1] +; GFX11-SDAG-NEXT: v_pk_fma_f16 v0, 0x291e, v0, s0 op_sel_hi:[0,1,0] +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v1, v0 +; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX11-SDAG-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-SDAG-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v1 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: test_D139469_v2f16: +; GFX11-GISEL: ; %bb.0: ; %bb +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0x211e211e +; GFX11-GISEL-NEXT: v_pk_mul_f16 v2, 0x291e291e, v0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, 0x291e291e, v0, v1 +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX11-GISEL-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX11-GISEL-NEXT: v_cmp_gt_f16_e64 s0, 0, v0 +; GFX11-GISEL-NEXT: v_cmp_gt_f16_e64 s1, 0, v1 +; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-GISEL-NEXT: v_cmp_gt_f16_e64 s2, 0, v3 +; GFX11-GISEL-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11-GISEL-NEXT: s_or_b32 s0, s1, s2 +; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-LABEL: test_D139469_v2f16: ; GFX12-SDAG: ; %bb.0: ; %bb diff --git a/llvm/test/CodeGen/AMDGPU/shrink-mad-fma-fake16.mir b/llvm/test/CodeGen/AMDGPU/shrink-mad-fma-fake16.mir deleted file mode 100644 index d551ad88f56b7..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/shrink-mad-fma-fake16.mir +++ /dev/null @@ -1,242 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GFX11 - ---- -name: mad_cvv_f32 -body: | - bb.0: - ; GFX11-LABEL: name: mad_cvv_f32 - ; GFX11: $vgpr0 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2 = V_MADMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec - ; GFX11-NEXT: SI_RETURN implicit $vgpr2 - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_MAD_F32_e64 0, 1092616192, 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: mad_vcv_f32 -body: | - bb.0: - ; GFX11-LABEL: name: mad_vcv_f32 - ; GFX11: $vgpr0 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2 = V_MADMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec - ; GFX11-NEXT: SI_RETURN implicit $vgpr2 - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_MAD_F32_e64 0, $vgpr0, 0, 1092616192, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: mad_vvc_f32 -body: | - bb.0: - ; GFX11-LABEL: name: mad_vvc_f32 - ; GFX11: $vgpr0 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2 = V_MADAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec - ; GFX11-NEXT: SI_RETURN implicit $vgpr2 - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_MAD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: mad_vsc_f32 -body: | - bb.0: - ; GFX11-LABEL: name: mad_vsc_f32 - ; GFX11: $vgpr0 = IMPLICIT_DEF - ; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2 = V_MADAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec - ; GFX11-NEXT: SI_RETURN implicit $vgpr2 - $vgpr0 = IMPLICIT_DEF - $sgpr1 = IMPLICIT_DEF - $vgpr2 = V_MAD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: fma_cvv_f32 -body: | - bb.0: - ; GFX11-LABEL: name: fma_cvv_f32 - ; GFX11: $vgpr0 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2 = V_FMAMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec - ; GFX11-NEXT: SI_RETURN implicit $vgpr2 - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F32_e64 0, 1092616192, 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: fma_vcv_f32 -body: | - bb.0: - ; GFX11-LABEL: name: fma_vcv_f32 - ; GFX11: $vgpr0 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2 = V_FMAMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec - ; GFX11-NEXT: SI_RETURN implicit $vgpr2 - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F32_e64 0, $vgpr0, 0, 1092616192, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: fma_vvc_f32 -body: | - bb.0: - ; GFX11-LABEL: name: fma_vvc_f32 - ; GFX11: $vgpr0 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2 = V_FMAAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec - ; GFX11-NEXT: SI_RETURN implicit $vgpr2 - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: fma_vsc_f32 -body: | - bb.0: - ; GFX11-LABEL: name: fma_vsc_f32 - ; GFX11: $vgpr0 = IMPLICIT_DEF - ; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2 = V_FMAAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec - ; GFX11-NEXT: SI_RETURN implicit $vgpr2 - $vgpr0 = IMPLICIT_DEF - $sgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: mad_cvv_f16 -body: | - bb.0: - ; GFX11-LABEL: name: mad_cvv_f16 - ; GFX11: $vgpr0 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2 = V_MADMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec - ; GFX11-NEXT: SI_RETURN implicit $vgpr2 - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_MAD_F16_e64 0, 18688, 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: mad_vcv_f16 -body: | - bb.0: - ; GFX11-LABEL: name: mad_vcv_f16 - ; GFX11: $vgpr0 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2 = V_MADMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec - ; GFX11-NEXT: SI_RETURN implicit $vgpr2 - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_MAD_F16_e64 0, $vgpr0, 0, 18688, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: mad_vvc_f16 -body: | - bb.0: - ; GFX11-LABEL: name: mad_vvc_f16 - ; GFX11: $vgpr0 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2 = V_MADAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec - ; GFX11-NEXT: SI_RETURN implicit $vgpr2 - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_MAD_F16_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: mad_vsc_f16 -body: | - bb.0: - ; GFX11-LABEL: name: mad_vsc_f16 - ; GFX11: $vgpr0 = IMPLICIT_DEF - ; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2 = V_MADAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec - ; GFX11-NEXT: SI_RETURN implicit $vgpr2 - $vgpr0 = IMPLICIT_DEF - $sgpr1 = IMPLICIT_DEF - $vgpr2 = V_MAD_F16_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: fma_cvv_f16 -body: | - bb.0: - ; GFX11-LABEL: name: fma_cvv_f16 - ; GFX11: $vgpr0 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2 = V_FMAMK_F16_fake16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec - ; GFX11-NEXT: SI_RETURN implicit $vgpr2 - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F16_gfx9_fake16_e64 0, 18688, 0, $vgpr0, 0, $vgpr1, 0, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: fma_vcv_f16 -body: | - bb.0: - ; GFX11-LABEL: name: fma_vcv_f16 - ; GFX11: $vgpr0 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2 = V_FMAMK_F16_fake16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec - ; GFX11-NEXT: SI_RETURN implicit $vgpr2 - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F16_gfx9_fake16_e64 0, $vgpr0, 0, 18688, 0, $vgpr1, 0, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: fma_vvc_f16 -body: | - bb.0: - ; GFX11-LABEL: name: fma_vvc_f16 - ; GFX11: $vgpr0 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2 = V_FMAAK_F16_fake16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec - ; GFX11-NEXT: SI_RETURN implicit $vgpr2 - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F16_gfx9_fake16_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: fma_vsc_f16 -body: | - bb.0: - ; GFX11-LABEL: name: fma_vsc_f16 - ; GFX11: $vgpr0 = IMPLICIT_DEF - ; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2 = V_FMAAK_F16_fake16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec - ; GFX11-NEXT: SI_RETURN implicit $vgpr2 - $vgpr0 = IMPLICIT_DEF - $sgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F16_gfx9_fake16_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... diff --git a/llvm/test/CodeGen/AMDGPU/shrink-mad-fma-gfx10.mir b/llvm/test/CodeGen/AMDGPU/shrink-mad-fma-gfx10.mir deleted file mode 100644 index 89ef5df9beb8e..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/shrink-mad-fma-gfx10.mir +++ /dev/null @@ -1,258 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GFX10 - ---- -name: mad_cvv_f32 -body: | - bb.0: - ; GFX10-LABEL: name: mad_cvv_f32 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_MADMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_MAD_F32_e64 0, 1092616192, 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: mad_vcv_f32 -body: | - bb.0: - ; GFX10-LABEL: name: mad_vcv_f32 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_MADMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_MAD_F32_e64 0, $vgpr0, 0, 1092616192, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: mad_vvc_f32 -body: | - bb.0: - ; GFX10-LABEL: name: mad_vvc_f32 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_MADAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_MAD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: mad_vsc_f32 -body: | - bb.0: - ; GFX10-LABEL: name: mad_vsc_f32 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_MADAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; - $vgpr0 = IMPLICIT_DEF - $sgpr1 = IMPLICIT_DEF - $vgpr2 = V_MAD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: fma_cvv_f32 -body: | - bb.0: - ; GFX10-LABEL: name: fma_cvv_f32 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_FMAMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F32_e64 0, 1092616192, 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: fma_vcv_f32 -body: | - bb.0: - ; GFX10-LABEL: name: fma_vcv_f32 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_FMAMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F32_e64 0, $vgpr0, 0, 1092616192, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: fma_vvc_f32 -body: | - bb.0: - ; GFX10-LABEL: name: fma_vvc_f32 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_FMAAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: fma_vsc_f32 -body: | - bb.0: - ; GFX10-LABEL: name: fma_vsc_f32 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_FMAAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; - $vgpr0 = IMPLICIT_DEF - $sgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: mad_cvv_f16 -body: | - bb.0: - ; GFX10-LABEL: name: mad_cvv_f16 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_MADMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_MAD_F16_e64 0, 18688, 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: mad_vcv_f16 -body: | - bb.0: - ; GFX10-LABEL: name: mad_vcv_f16 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_MADMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_MAD_F16_e64 0, $vgpr0, 0, 18688, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: mad_vvc_f16 -body: | - bb.0: - ; GFX10-LABEL: name: mad_vvc_f16 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_MADAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_MAD_F16_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: mad_vsc_f16 -body: | - bb.0: - ; GFX10-LABEL: name: mad_vsc_f16 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_MADAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; - $vgpr0 = IMPLICIT_DEF - $sgpr1 = IMPLICIT_DEF - $vgpr2 = V_MAD_F16_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: fma_cvv_f16 -body: | - bb.0: - ; GFX10-LABEL: name: fma_cvv_f16 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_FMAMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F16_gfx9_e64 0, 18688, 0, $vgpr0, 0, $vgpr1, 0, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: fma_vcv_f16 -body: | - bb.0: - ; GFX10-LABEL: name: fma_vcv_f16 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_FMAMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, 18688, 0, $vgpr1, 0, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: fma_vvc_f16 -body: | - bb.0: - ; GFX10-LABEL: name: fma_vvc_f16 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_FMAAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; - $vgpr0 = IMPLICIT_DEF - $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... - ---- -name: fma_vsc_f16 -body: | - bb.0: - ; GFX10-LABEL: name: fma_vsc_f16 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_FMAAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; - $vgpr0 = IMPLICIT_DEF - $sgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, 0, implicit $mode, implicit $exec - SI_RETURN implicit $vgpr2 -... diff --git a/llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir b/llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir index c9138dda7d1a7..26feb8120c751 100644 --- a/llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir @@ -1,10 +1,17 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GFX11 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GFX10 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GFX11 --- name: mad_cvv_f32 body: | bb.0: + ; GFX10-LABEL: name: mad_cvv_f32 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_MADMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; ; GFX11-LABEL: name: mad_cvv_f32 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF @@ -20,6 +27,12 @@ body: | name: mad_vcv_f32 body: | bb.0: + ; GFX10-LABEL: name: mad_vcv_f32 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_MADMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; ; GFX11-LABEL: name: mad_vcv_f32 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF @@ -35,6 +48,12 @@ body: | name: mad_vvc_f32 body: | bb.0: + ; GFX10-LABEL: name: mad_vvc_f32 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_MADAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; ; GFX11-LABEL: name: mad_vvc_f32 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF @@ -50,6 +69,12 @@ body: | name: mad_vsc_f32 body: | bb.0: + ; GFX10-LABEL: name: mad_vsc_f32 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_MADAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; ; GFX11-LABEL: name: mad_vsc_f32 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF @@ -65,6 +90,12 @@ body: | name: fma_cvv_f32 body: | bb.0: + ; GFX10-LABEL: name: fma_cvv_f32 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_FMAMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; ; GFX11-LABEL: name: fma_cvv_f32 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF @@ -80,6 +111,12 @@ body: | name: fma_vcv_f32 body: | bb.0: + ; GFX10-LABEL: name: fma_vcv_f32 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_FMAMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; ; GFX11-LABEL: name: fma_vcv_f32 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF @@ -95,6 +132,12 @@ body: | name: fma_vvc_f32 body: | bb.0: + ; GFX10-LABEL: name: fma_vvc_f32 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_FMAAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; ; GFX11-LABEL: name: fma_vvc_f32 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF @@ -110,6 +153,12 @@ body: | name: fma_vsc_f32 body: | bb.0: + ; GFX10-LABEL: name: fma_vsc_f32 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_FMAAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; ; GFX11-LABEL: name: fma_vsc_f32 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF @@ -125,6 +174,12 @@ body: | name: mad_cvv_f16 body: | bb.0: + ; GFX10-LABEL: name: mad_cvv_f16 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_MADMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; ; GFX11-LABEL: name: mad_cvv_f16 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF @@ -140,6 +195,12 @@ body: | name: mad_vcv_f16 body: | bb.0: + ; GFX10-LABEL: name: mad_vcv_f16 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_MADMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; ; GFX11-LABEL: name: mad_vcv_f16 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF @@ -155,6 +216,12 @@ body: | name: mad_vvc_f16 body: | bb.0: + ; GFX10-LABEL: name: mad_vvc_f16 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_MADAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; ; GFX11-LABEL: name: mad_vvc_f16 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF @@ -170,6 +237,12 @@ body: | name: mad_vsc_f16 body: | bb.0: + ; GFX10-LABEL: name: mad_vsc_f16 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_MADAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; ; GFX11-LABEL: name: mad_vsc_f16 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF @@ -185,14 +258,20 @@ body: | name: fma_cvv_f16 body: | bb.0: + ; GFX10-LABEL: name: fma_cvv_f16 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_FMAMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; ; GFX11-LABEL: name: fma_cvv_f16 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2_lo16 = V_FMAMK_F16_t16 $vgpr0_lo16, 18688, $vgpr1_lo16, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr2 = V_FMAMK_F16_fake16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF - $vgpr2_lo16 = V_FMA_F16_gfx9_t16_e64 0, 18688, 0, $vgpr0_lo16, 0, $vgpr1_lo16, 0, 0, 0, implicit $mode, implicit $exec + $vgpr2 = V_FMA_F16_gfx9_e64 0, 18688, 0, $vgpr0, 0, $vgpr1, 0, 0, 0, implicit $mode, implicit $exec SI_RETURN implicit $vgpr2 ... @@ -200,14 +279,20 @@ body: | name: fma_vcv_f16 body: | bb.0: + ; GFX10-LABEL: name: fma_vcv_f16 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_FMAMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; ; GFX11-LABEL: name: fma_vcv_f16 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2_lo16 = V_FMAMK_F16_t16 $vgpr0_lo16, 18688, $vgpr1_lo16, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr2 = V_FMAMK_F16_fake16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF - $vgpr2_lo16 = V_FMA_F16_gfx9_t16_e64 0, $vgpr0_lo16, 0, 18688, 0, $vgpr1_lo16, 0, 0, 0, implicit $mode, implicit $exec + $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, 18688, 0, $vgpr1, 0, 0, 0, implicit $mode, implicit $exec SI_RETURN implicit $vgpr2 ... @@ -215,14 +300,20 @@ body: | name: fma_vvc_f16 body: | bb.0: + ; GFX10-LABEL: name: fma_vvc_f16 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_FMAAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; ; GFX11-LABEL: name: fma_vvc_f16 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2_lo16 = V_FMAAK_F16_t16 $vgpr0_lo16, $vgpr1_lo16, 18688, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr2 = V_FMAAK_F16_fake16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF - $vgpr2_lo16 = V_FMA_F16_gfx9_t16_e64 0, $vgpr0_lo16, 0, $vgpr1_lo16, 0, 18688, 0, 0, 0, implicit $mode, implicit $exec + $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, 0, implicit $mode, implicit $exec SI_RETURN implicit $vgpr2 ... @@ -230,13 +321,19 @@ body: | name: fma_vsc_f16 body: | bb.0: + ; GFX10-LABEL: name: fma_vsc_f16 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_FMAAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; ; GFX11-LABEL: name: fma_vsc_f16 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2_lo16 = V_FMAAK_F16_t16 $vgpr0_hi16, $vgpr1_hi16, 18688, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr2 = V_FMAAK_F16_fake16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $sgpr1 = IMPLICIT_DEF - $vgpr2_lo16 = V_FMA_F16_gfx9_t16_e64 0, $vgpr0_hi16, 0, $vgpr1_hi16, 0, 18688, 0, 0, 0, implicit $mode, implicit $exec + $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, 0, implicit $mode, implicit $exec SI_RETURN implicit $vgpr2 ...