Skip to content

Commit c3c1922

Browse files
SC llvm teamSC llvm team
SC llvm team
authored and
SC llvm team
committed
Merged main:71be020dda2c into amd-gfx:a4d6f75a4aba
Local branch amd-gfx a4d6f75 Merged main:5b7982f2b223 into amd-gfx:ee2d08d63842 Remote branch main 71be020 [SelectionDAG][PowerPC] Memset reuse vector element for tail store
2 parents a4d6f75 + 71be020 commit c3c1922

File tree

24 files changed

+510
-190
lines changed

24 files changed

+510
-190
lines changed

libunwind/cmake/config-ix.cmake

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,11 @@ include(CheckCSourceCompiles)
1111
# --unwindlib=none is supported, and use that if possible.
1212
llvm_check_compiler_linker_flag(C "--unwindlib=none" CXX_SUPPORTS_UNWINDLIB_EQ_NONE_FLAG)
1313

14-
check_library_exists(c fopen "" LIBUNWIND_HAS_C_LIB)
14+
if (HAIKU)
15+
check_library_exists(root fopen "" LIBUNWIND_HAS_ROOT_LIB)
16+
else()
17+
check_library_exists(c fopen "" LIBUNWIND_HAS_C_LIB)
18+
endif()
1519

1620
if (NOT LIBUNWIND_USE_COMPILER_RT)
1721
if (ANDROID)
@@ -45,6 +49,9 @@ if (CXX_SUPPORTS_NOSTDLIBXX_FLAG OR C_SUPPORTS_NODEFAULTLIBS_FLAG)
4549
if (LIBUNWIND_HAS_C_LIB)
4650
list(APPEND CMAKE_REQUIRED_LIBRARIES c)
4751
endif ()
52+
if (LIBUNWIND_HAS_ROOT_LIB)
53+
list(APPEND CMAKE_REQUIRED_LIBRARIES root)
54+
endif ()
4855
if (LIBUNWIND_USE_COMPILER_RT)
4956
include(HandleCompilerRT)
5057
find_compiler_rt_library(builtins LIBUNWIND_BUILTINS_LIBRARY
@@ -111,3 +118,7 @@ else()
111118
check_library_exists(dl dladdr "" LIBUNWIND_HAS_DL_LIB)
112119
check_library_exists(pthread pthread_once "" LIBUNWIND_HAS_PTHREAD_LIB)
113120
endif()
121+
122+
if(HAIKU)
123+
check_library_exists(bsd dl_iterate_phdr "" LIBUNWIND_HAS_BSD_LIB)
124+
endif()

libunwind/include/__libunwind_config.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@
3636
# if defined(__linux__)
3737
# define _LIBUNWIND_TARGET_LINUX 1
3838
# endif
39+
# if defined(__HAIKU__)
40+
# define _LIBUNWIND_TARGET_HAIKU 1
41+
# endif
3942
# if defined(__i386__)
4043
# define _LIBUNWIND_TARGET_I386
4144
# define _LIBUNWIND_CONTEXT_SIZE 8

libunwind/src/CMakeLists.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,16 @@ if (APPLE)
102102
endif ()
103103
endif ()
104104

105+
if (HAIKU)
106+
add_library_flags_if(LIBUNWIND_HAS_ROOT_LIB root)
107+
108+
add_library_flags_if(LIBUNWIND_HAS_BSD_LIB bsd)
109+
add_compile_flags_if(LIBUNWIND_HAS_BSD_LIB -D_LIBUNWIND_USE_HAIKU_BSD_LIB=1)
110+
111+
add_compile_flags("-D_DEFAULT_SOURCE")
112+
add_compile_flags("-DPT_GNU_EH_FRAME=PT_EH_FRAME")
113+
endif ()
114+
105115
string(REPLACE ";" " " LIBUNWIND_COMPILE_FLAGS "${LIBUNWIND_COMPILE_FLAGS}")
106116
string(REPLACE ";" " " LIBUNWIND_CXX_FLAGS "${LIBUNWIND_CXX_FLAGS}")
107117
string(REPLACE ";" " " LIBUNWIND_C_FLAGS "${LIBUNWIND_C_FLAGS}")

libunwind/src/config.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@
4646
#elif defined(_AIX)
4747
// The traceback table at the end of each function is used for unwinding.
4848
#define _LIBUNWIND_SUPPORT_TBTAB_UNWIND 1
49+
#elif defined(__HAIKU__)
50+
#if defined(_LIBUNWIND_USE_HAIKU_BSD_LIB)
51+
#define _LIBUNWIND_USE_DL_ITERATE_PHDR 1
52+
#endif
53+
#define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1
54+
#define _LIBUNWIND_SUPPORT_DWARF_INDEX 1
4955
#else
5056
// Assume an ELF system with a dl_iterate_phdr function.
5157
#define _LIBUNWIND_USE_DL_ITERATE_PHDR 1

libunwind/test/configs/llvm-libunwind-merged.cfg.in

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ if @LIBUNWIND_ENABLE_CET@:
1414
if '@CMAKE_SYSTEM_NAME@' == 'Linux':
1515
link_flags.append('-Wl,--export-dynamic')
1616

17+
if '@CMAKE_DL_LIBS@':
18+
link_flags.append('-l@CMAKE_DL_LIBS@')
19+
1720
# Stack unwinding tests need unwinding tables and these are not generated by default on all targets.
1821
compile_flags.append('-funwind-tables')
1922

@@ -25,7 +28,7 @@ config.substitutions.append(('%{compile_flags}',
2528
'-nostdinc++ -I %{{include}} {}'.format(' '.join(compile_flags))
2629
))
2730
config.substitutions.append(('%{link_flags}',
28-
'-L %{{lib}} -Wl,-rpath,%{{lib}} -lc++ -ldl {}'.format(' '.join(link_flags))
31+
'-L %{{lib}} -Wl,-rpath,%{{lib}} -lc++ {}'.format(' '.join(link_flags))
2932
))
3033
config.substitutions.append(('%{exec}',
3134
'%{executor} --execdir %T -- '

libunwind/test/configs/llvm-libunwind-shared.cfg.in

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ if @LIBUNWIND_ENABLE_CET@:
1313
if '@CMAKE_SYSTEM_NAME@' == 'Linux':
1414
link_flags.append('-Wl,--export-dynamic')
1515

16+
if '@CMAKE_DL_LIBS@':
17+
link_flags.append('-l@CMAKE_DL_LIBS@')
18+
1619
# Stack unwinding tests need unwinding tables and these are not generated by default on all targets.
1720
compile_flags.append('-funwind-tables')
1821

@@ -24,7 +27,7 @@ config.substitutions.append(('%{compile_flags}',
2427
'-nostdinc++ -I %{{include}} {}'.format(' '.join(compile_flags))
2528
))
2629
config.substitutions.append(('%{link_flags}',
27-
'-L %{{lib}} -Wl,-rpath,%{{lib}} -lunwind -ldl {}'.format(' '.join(link_flags))
30+
'-L %{{lib}} -Wl,-rpath,%{{lib}} -lunwind {}'.format(' '.join(link_flags))
2831
))
2932
config.substitutions.append(('%{exec}',
3033
'%{executor} --execdir %T -- '

libunwind/test/configs/llvm-libunwind-static.cfg.in

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ if @LIBUNWIND_ENABLE_CET@:
1616
if '@CMAKE_SYSTEM_NAME@' == 'Linux':
1717
link_flags.append('-Wl,--export-dynamic')
1818

19+
if '@CMAKE_DL_LIBS@':
20+
link_flags.append('-l@CMAKE_DL_LIBS@')
21+
1922
# Stack unwinding tests need unwinding tables and these are not generated by default on all targets.
2023
compile_flags.append('-funwind-tables')
2124

@@ -27,7 +30,7 @@ config.substitutions.append(('%{compile_flags}',
2730
'-nostdinc++ -I %{{include}} {}'.format(' '.join(compile_flags))
2831
))
2932
config.substitutions.append(('%{link_flags}',
30-
'%{{lib}}/libunwind.a -ldl {}'.format(' '.join(link_flags))
33+
'%{{lib}}/libunwind.a {}'.format(' '.join(link_flags))
3134
))
3235
config.substitutions.append(('%{exec}',
3336
'%{executor} --execdir %T -- '

llvm/include/llvm/Analysis/CFGPrinter.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,8 @@ std::string CompleteNodeLabelString(
153153
if (OutStr[0] == '%') {
154154
OutStr.erase(OutStr.begin());
155155
}
156+
// Place | after BB name to separate it into header
157+
OutStr.insert(OutStr.find_first_of('\n') + 1, "\\|");
156158

157159
unsigned ColNum = 0;
158160
unsigned LastSpace = 0;
@@ -178,8 +180,6 @@ std::string CompleteNodeLabelString(
178180
if (OutStr[i] == ' ')
179181
LastSpace = i;
180182
}
181-
// Replace \l after BB name with | to separate it into header
182-
OutStr.replace(OutStr.find_first_of('\\') + 1, 1, "|");
183183
return OutStr;
184184
}
185185

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -845,6 +845,15 @@ class TargetLoweringBase {
845845
return false;
846846
}
847847

848+
/// Return true if the target shall perform extract vector element and store
849+
/// given that the vector is known to be splat of constant.
850+
/// \p Index[out] gives the index of the vector element to be extracted when
851+
/// this is true.
852+
virtual bool shallExtractConstSplatVectorElementToStore(
853+
Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
854+
return false;
855+
}
856+
848857
/// Return true if inserting a scalar into a variable element of an undef
849858
/// vector is more efficiently handled by splatting the scalar instead.
850859
virtual bool shouldSplatInsEltVarIndex(EVT) const {

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 473678
19+
#define LLVM_MAIN_REVISION 473683
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7757,13 +7757,28 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
77577757
}
77587758

77597759
// If this store is smaller than the largest store see whether we can get
7760-
// the smaller value for free with a truncate.
7760+
// the smaller value for free with a truncate or extract vector element and
7761+
// then store.
77617762
SDValue Value = MemSetValue;
77627763
if (VT.bitsLT(LargestVT)) {
7764+
unsigned Index;
7765+
unsigned NElts = LargestVT.getSizeInBits() / VT.getSizeInBits();
7766+
EVT SVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), NElts);
77637767
if (!LargestVT.isVector() && !VT.isVector() &&
77647768
TLI.isTruncateFree(LargestVT, VT))
77657769
Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
7766-
else
7770+
else if (LargestVT.isVector() && !VT.isVector() &&
7771+
TLI.shallExtractConstSplatVectorElementToStore(
7772+
LargestVT.getTypeForEVT(*DAG.getContext()),
7773+
VT.getSizeInBits(), Index) &&
7774+
TLI.isTypeLegal(SVT) &&
7775+
LargestVT.getSizeInBits() == SVT.getSizeInBits()) {
7776+
// Target which can combine store(extractelement VectorTy, Idx) can get
7777+
// the smaller value for free.
7778+
SDValue TailValue = DAG.getNode(ISD::BITCAST, dl, SVT, MemSetValue);
7779+
Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, TailValue,
7780+
DAG.getVectorIdxConstant(Index, dl));
7781+
} else
77677782
Value = getMemsetValue(Src, VT, DAG, dl);
77687783
}
77697784
assert(Value.getValueType() == VT && "Value with wrong type.");

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,6 +1214,8 @@ class UpdateDPPPat<ValueType vt> : GCNPat <
12141214

12151215
def : UpdateDPPPat<i32>;
12161216
def : UpdateDPPPat<f32>;
1217+
def : UpdateDPPPat<v2i16>;
1218+
def : UpdateDPPPat<v2f16>;
12171219

12181220
} // End OtherPredicates = [isGFX8Plus]
12191221

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1635,6 +1635,27 @@ bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
16351635
return VT.isScalarInteger();
16361636
}
16371637

1638+
bool PPCTargetLowering::shallExtractConstSplatVectorElementToStore(
1639+
Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1640+
if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1641+
return false;
1642+
1643+
if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1644+
if (VTy->getScalarType()->isIntegerTy()) {
1645+
// ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1646+
if (ElemSizeInBits == 32) {
1647+
Index = Subtarget.isLittleEndian() ? 2 : 1;
1648+
return true;
1649+
}
1650+
if (ElemSizeInBits == 64) {
1651+
Index = Subtarget.isLittleEndian() ? 1 : 0;
1652+
return true;
1653+
}
1654+
}
1655+
}
1656+
return false;
1657+
}
1658+
16381659
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
16391660
switch ((PPCISD::NodeType)Opcode) {
16401661
case PPCISD::FIRST_NUMBER: break;
@@ -17086,10 +17107,20 @@ EVT PPCTargetLowering::getOptimalMemOpType(
1708617107
if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
1708717108
// We should use Altivec/VSX loads and stores when available. For unaligned
1708817109
// addresses, unaligned VSX loads are only fast starting with the P8.
17089-
if (Subtarget.hasAltivec() && Op.size() >= 16 &&
17090-
(Op.isAligned(Align(16)) ||
17091-
((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
17092-
return MVT::v4i32;
17110+
if (Subtarget.hasAltivec() && Op.size() >= 16) {
17111+
if (Op.isMemset() && Subtarget.hasVSX()) {
17112+
uint64_t TailSize = Op.size() % 16;
17113+
// For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
17114+
// element if vector element type matches tail store. For tail size
17115+
// 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
17116+
if (TailSize > 2 && TailSize <= 4) {
17117+
return MVT::v8i16;
17118+
}
17119+
return MVT::v4i32;
17120+
}
17121+
if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
17122+
return MVT::v4i32;
17123+
}
1709317124
}
1709417125

1709517126
if (Subtarget.isPPC64()) {

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -791,6 +791,11 @@ namespace llvm {
791791
return true;
792792
}
793793

794+
bool
795+
shallExtractConstSplatVectorElementToStore(Type *VectorTy,
796+
unsigned ElemSizeInBits,
797+
unsigned &Index) const override;
798+
794799
bool isCtlzFast() const override {
795800
return true;
796801
}

llvm/lib/Target/PowerPC/PPCInstrP10.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2031,8 +2031,15 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, IsLittleEndian] in {
20312031
(v8i16 (COPY_TO_REGCLASS (LXVRHX ForceXForm:$src), VSRC))>;
20322032
def : Pat<(v16i8 (scalar_to_vector (i32 (extloadi8 ForceXForm:$src)))),
20332033
(v16i8 (COPY_TO_REGCLASS (LXVRBX ForceXForm:$src), VSRC))>;
2034+
def : Pat<(store (i64 (extractelt v2i64:$A, 1)), ForceXForm:$src),
2035+
(XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
20342036
}
20352037

2038+
let Predicates = [IsISA3_1, IsBigEndian] in {
2039+
def : Pat<(store (i64 (extractelt v2i64:$A, 0)), ForceXForm:$src),
2040+
(XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
2041+
}
2042+
20362043
// FIXME: The swap is overkill when the shift amount is a constant.
20372044
// We should just fix the constant in the DAG.
20382045
let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in {

llvm/lib/Target/RISCV/RISCVInstrInfoZb.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -739,6 +739,9 @@ def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFF), non_imm12:$rs2)),
739739
(ADD_UW GPR:$rs1, GPR:$rs2)>;
740740
def : Pat<(i64 (and GPR:$rs, 0xFFFFFFFF)), (ADD_UW GPR:$rs, (XLenVT X0))>;
741741

742+
def : Pat<(i64 (or_is_add (and GPR:$rs1, 0xFFFFFFFF), non_imm12:$rs2)),
743+
(ADD_UW GPR:$rs1, GPR:$rs2)>;
744+
742745
def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 1)), non_imm12:$rs2)),
743746
(SH1ADD_UW GPR:$rs1, GPR:$rs2)>;
744747
def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 2)), non_imm12:$rs2)),

llvm/test/Analysis/DotMachineCFG/AMDGPU/functions.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ body: |
1212

1313
# MCFG: digraph "Machine CFG for 'func2' function"
1414
# MCFG-NEXT: label="Machine CFG for 'func2' function"
15-
# MCFG: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.0:| $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0\l $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0\l $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0\l S_ENDPGM 0\l}"];
15+
# MCFG: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.0:\l| $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0\l $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0\l $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0\l S_ENDPGM 0\l}"];
1616
---
1717
name: func2
1818
body: |

llvm/test/Analysis/DotMachineCFG/AMDGPU/irreducible.mir

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,22 @@
55

66
# MCFG: digraph "Machine CFG for 'irreducible' function"
77
# MCFG-NEXT: label="Machine CFG for 'irreducible' function"
8-
# MCFG: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.0:| successors: %bb.1(0x40000000), %bb.2(0x40000000)\l liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9,\l... $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16\l %0:sreg_32 = IMPLICIT_DEF\l %1:vgpr_32 = COPY $vgpr0\l %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec\l S_CMP_EQ_U32 %0:sreg_32, 0, implicit-def $scc\l S_CBRANCH_SCC1 %bb.1, implicit $scc\l S_BRANCH %bb.2\l}"];
8+
# MCFG: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.0:\l| successors: %bb.1(0x40000000), %bb.2(0x40000000)\l liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9,\l... $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16\l %0:sreg_32 = IMPLICIT_DEF\l %1:vgpr_32 = COPY $vgpr0\l %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec\l S_CMP_EQ_U32 %0:sreg_32, 0, implicit-def $scc\l S_CBRANCH_SCC1 %bb.1, implicit $scc\l S_BRANCH %bb.2\l}"];
99
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
1010
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
11-
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.1:|\l successors: %bb.3(0x80000000)\l\l %3:vgpr_32 = PHI %2:vgpr_32, %bb.0, %4:vgpr_32, %bb.5\l %5:vgpr_32 = V_ADD_U32_e64 %3:vgpr_32, 1, 0, implicit $exec\l S_BRANCH %bb.3\l}"];
11+
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.1:\l|\l successors: %bb.3(0x80000000)\l\l %3:vgpr_32 = PHI %2:vgpr_32, %bb.0, %4:vgpr_32, %bb.5\l %5:vgpr_32 = V_ADD_U32_e64 %3:vgpr_32, 1, 0, implicit $exec\l S_BRANCH %bb.3\l}"];
1212
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
13-
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.2:|\l successors: %bb.3(0x80000000)\l\l %6:vgpr_32 = PHI %2:vgpr_32, %bb.0, %4:vgpr_32, %bb.4\l %7:vgpr_32 = V_ADD_U32_e64 %6:vgpr_32, 2, 0, implicit $exec\l}"];
13+
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.2:\l|\l successors: %bb.3(0x80000000)\l\l %6:vgpr_32 = PHI %2:vgpr_32, %bb.0, %4:vgpr_32, %bb.4\l %7:vgpr_32 = V_ADD_U32_e64 %6:vgpr_32, 2, 0, implicit $exec\l}"];
1414
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
15-
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.3:|\l successors: %bb.4(0x80000000)\l\l %4:vgpr_32 = PHI %5:vgpr_32, %bb.1, %7:vgpr_32, %bb.2\l}"];
15+
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.3:\l|\l successors: %bb.4(0x80000000)\l\l %4:vgpr_32 = PHI %5:vgpr_32, %bb.1, %7:vgpr_32, %bb.2\l}"];
1616
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
17-
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.4:|\l successors: %bb.2(0x40000000), %bb.5(0x40000000)\l\l %8:vgpr_32 = V_AND_B32_e32 3, %1:vgpr_32, implicit $exec\l %9:sreg_64 = V_CMP_EQ_U32_e64 %8:vgpr_32, 2, implicit $exec\l %10:sreg_64 = SI_IF killed %9:sreg_64, %bb.2, implicit-def dead $exec,\l... implicit-def dead $scc, implicit $exec\l}"];
17+
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.4:\l|\l successors: %bb.2(0x40000000), %bb.5(0x40000000)\l\l %8:vgpr_32 = V_AND_B32_e32 3, %1:vgpr_32, implicit $exec\l %9:sreg_64 = V_CMP_EQ_U32_e64 %8:vgpr_32, 2, implicit $exec\l %10:sreg_64 = SI_IF killed %9:sreg_64, %bb.2, implicit-def dead $exec,\l... implicit-def dead $scc, implicit $exec\l}"];
1818
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
1919
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
20-
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.5:|\l successors: %bb.1(0x40000000), %bb.6(0x40000000)\l\l %11:sreg_64 = V_CMP_EQ_U32_e64 %8:vgpr_32, 1, implicit $exec\l %12:sreg_64 = SI_IF killed %11:sreg_64, %bb.1, implicit-def dead $exec,\l... implicit-def dead $scc, implicit $exec\l}"];
20+
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.5:\l|\l successors: %bb.1(0x40000000), %bb.6(0x40000000)\l\l %11:sreg_64 = V_CMP_EQ_U32_e64 %8:vgpr_32, 1, implicit $exec\l %12:sreg_64 = SI_IF killed %11:sreg_64, %bb.1, implicit-def dead $exec,\l... implicit-def dead $scc, implicit $exec\l}"];
2121
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
2222
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
23-
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.6:|\l\l S_ENDPGM 0\l}"];
23+
# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.6:\l|\l\l S_ENDPGM 0\l}"];
2424

2525
# MCFG-ONLY: digraph "Machine CFG for 'irreducible' function"
2626
# MCFG-ONLY-NEXT: label="Machine CFG for 'irreducible' function"

0 commit comments

Comments
 (0)