Merged main:71be020dda2c into amd-gfx:a4d6f75a4aba

SC llvm team · SC llvm team · commit c3c19225c0f4 · 2023-09-06T01:56:00.000-04:00
Local branch amd-gfx a4d6f75 Merged main:5b7982f2b223 into amd-gfx:ee2d08d63842 Remote branch main 71be020 [SelectionDAG][PowerPC] Memset reuse vector element for tail store
diff --git a/libunwind/cmake/config-ix.cmake b/libunwind/cmake/config-ix.cmake
@@ -11,7 +11,11 @@ include(CheckCSourceCompiles)
 # --unwindlib=none is supported, and use that if possible.
 llvm_check_compiler_linker_flag(C "--unwindlib=none" CXX_SUPPORTS_UNWINDLIB_EQ_NONE_FLAG)
 
-check_library_exists(c fopen "" LIBUNWIND_HAS_C_LIB)
+if (HAIKU)
+  check_library_exists(root fopen "" LIBUNWIND_HAS_ROOT_LIB)
+else()
+  check_library_exists(c fopen "" LIBUNWIND_HAS_C_LIB)
+endif()
 
 if (NOT LIBUNWIND_USE_COMPILER_RT)
   if (ANDROID)
@@ -45,6 +49,9 @@ if (CXX_SUPPORTS_NOSTDLIBXX_FLAG OR C_SUPPORTS_NODEFAULTLIBS_FLAG)
   if (LIBUNWIND_HAS_C_LIB)
     list(APPEND CMAKE_REQUIRED_LIBRARIES c)
   endif ()
+  if (LIBUNWIND_HAS_ROOT_LIB)
+    list(APPEND CMAKE_REQUIRED_LIBRARIES root)
+  endif ()
   if (LIBUNWIND_USE_COMPILER_RT)
     include(HandleCompilerRT)
     find_compiler_rt_library(builtins LIBUNWIND_BUILTINS_LIBRARY
@@ -111,3 +118,7 @@ else()
   check_library_exists(dl dladdr "" LIBUNWIND_HAS_DL_LIB)
   check_library_exists(pthread pthread_once "" LIBUNWIND_HAS_PTHREAD_LIB)
 endif()
+
+if(HAIKU)
+  check_library_exists(bsd dl_iterate_phdr "" LIBUNWIND_HAS_BSD_LIB)
+endif()
diff --git a/libunwind/include/__libunwind_config.h b/libunwind/include/__libunwind_config.h
@@ -36,6 +36,9 @@
 # if defined(__linux__)
 #  define _LIBUNWIND_TARGET_LINUX 1
 # endif
+# if defined(__HAIKU__)
+#  define _LIBUNWIND_TARGET_HAIKU 1
+# endif
 # if defined(__i386__)
 #  define _LIBUNWIND_TARGET_I386
 #  define _LIBUNWIND_CONTEXT_SIZE 8
diff --git a/libunwind/src/CMakeLists.txt b/libunwind/src/CMakeLists.txt
@@ -102,6 +102,16 @@ if (APPLE)
   endif ()
 endif ()
 
+if (HAIKU)
+  add_library_flags_if(LIBUNWIND_HAS_ROOT_LIB root)
+
+  add_library_flags_if(LIBUNWIND_HAS_BSD_LIB bsd)
+  add_compile_flags_if(LIBUNWIND_HAS_BSD_LIB -D_LIBUNWIND_USE_HAIKU_BSD_LIB=1)
+
+  add_compile_flags("-D_DEFAULT_SOURCE")
+  add_compile_flags("-DPT_GNU_EH_FRAME=PT_EH_FRAME")
+endif ()
+
 string(REPLACE ";" " " LIBUNWIND_COMPILE_FLAGS "${LIBUNWIND_COMPILE_FLAGS}")
 string(REPLACE ";" " " LIBUNWIND_CXX_FLAGS "${LIBUNWIND_CXX_FLAGS}")
 string(REPLACE ";" " " LIBUNWIND_C_FLAGS "${LIBUNWIND_C_FLAGS}")
diff --git a/libunwind/src/config.h b/libunwind/src/config.h
@@ -46,6 +46,12 @@
 #elif defined(_AIX)
 // The traceback table at the end of each function is used for unwinding.
 #define _LIBUNWIND_SUPPORT_TBTAB_UNWIND 1
+#elif defined(__HAIKU__)
+  #if defined(_LIBUNWIND_USE_HAIKU_BSD_LIB)
+    #define _LIBUNWIND_USE_DL_ITERATE_PHDR 1
+  #endif
+  #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1
+  #define _LIBUNWIND_SUPPORT_DWARF_INDEX 1
 #else
   // Assume an ELF system with a dl_iterate_phdr function.
   #define _LIBUNWIND_USE_DL_ITERATE_PHDR 1
diff --git a/libunwind/test/configs/llvm-libunwind-merged.cfg.in b/libunwind/test/configs/llvm-libunwind-merged.cfg.in
@@ -14,6 +14,9 @@ if @LIBUNWIND_ENABLE_CET@:
 if '@CMAKE_SYSTEM_NAME@' == 'Linux':
     link_flags.append('-Wl,--export-dynamic')
 
+if '@CMAKE_DL_LIBS@':
+    link_flags.append('-l@CMAKE_DL_LIBS@')
+
 # Stack unwinding tests need unwinding tables and these are not generated by default on all targets.
 compile_flags.append('-funwind-tables')
 
@@ -25,7 +28,7 @@ config.substitutions.append(('%{compile_flags}',
     '-nostdinc++ -I %{{include}} {}'.format(' '.join(compile_flags))
 ))
 config.substitutions.append(('%{link_flags}',
-    '-L %{{lib}} -Wl,-rpath,%{{lib}} -lc++ -ldl {}'.format(' '.join(link_flags))
+    '-L %{{lib}} -Wl,-rpath,%{{lib}} -lc++ {}'.format(' '.join(link_flags))
 ))
 config.substitutions.append(('%{exec}',
     '%{executor} --execdir %T -- '
diff --git a/libunwind/test/configs/llvm-libunwind-shared.cfg.in b/libunwind/test/configs/llvm-libunwind-shared.cfg.in
@@ -13,6 +13,9 @@ if @LIBUNWIND_ENABLE_CET@:
 if '@CMAKE_SYSTEM_NAME@' == 'Linux':
     link_flags.append('-Wl,--export-dynamic')
 
+if '@CMAKE_DL_LIBS@':
+    link_flags.append('-l@CMAKE_DL_LIBS@')
+
 # Stack unwinding tests need unwinding tables and these are not generated by default on all targets.
 compile_flags.append('-funwind-tables')
 
@@ -24,7 +27,7 @@ config.substitutions.append(('%{compile_flags}',
     '-nostdinc++ -I %{{include}} {}'.format(' '.join(compile_flags))
 ))
 config.substitutions.append(('%{link_flags}',
-    '-L %{{lib}} -Wl,-rpath,%{{lib}} -lunwind -ldl {}'.format(' '.join(link_flags))
+    '-L %{{lib}} -Wl,-rpath,%{{lib}} -lunwind {}'.format(' '.join(link_flags))
 ))
 config.substitutions.append(('%{exec}',
     '%{executor} --execdir %T -- '
diff --git a/libunwind/test/configs/llvm-libunwind-static.cfg.in b/libunwind/test/configs/llvm-libunwind-static.cfg.in
@@ -16,6 +16,9 @@ if @LIBUNWIND_ENABLE_CET@:
 if '@CMAKE_SYSTEM_NAME@' == 'Linux':
     link_flags.append('-Wl,--export-dynamic')
 
+if '@CMAKE_DL_LIBS@':
+    link_flags.append('-l@CMAKE_DL_LIBS@')
+
 # Stack unwinding tests need unwinding tables and these are not generated by default on all targets.
 compile_flags.append('-funwind-tables')
 
@@ -27,7 +30,7 @@ config.substitutions.append(('%{compile_flags}',
     '-nostdinc++ -I %{{include}} {}'.format(' '.join(compile_flags))
 ))
 config.substitutions.append(('%{link_flags}',
-    '%{{lib}}/libunwind.a -ldl {}'.format(' '.join(link_flags))
+    '%{{lib}}/libunwind.a {}'.format(' '.join(link_flags))
 ))
 config.substitutions.append(('%{exec}',
     '%{executor} --execdir %T -- '
diff --git a/llvm/include/llvm/Analysis/CFGPrinter.h b/llvm/include/llvm/Analysis/CFGPrinter.h
@@ -153,6 +153,8 @@ std::string CompleteNodeLabelString(
   if (OutStr[0] == '%') {
     OutStr.erase(OutStr.begin());
   }
+  // Place | after BB name to separate it into header
+  OutStr.insert(OutStr.find_first_of('\n') + 1, "\\|");
 
   unsigned ColNum = 0;
   unsigned LastSpace = 0;
@@ -178,8 +180,6 @@ std::string CompleteNodeLabelString(
     if (OutStr[i] == ' ')
       LastSpace = i;
   }
-  // Replace \l after BB name with | to separate it into header
-  OutStr.replace(OutStr.find_first_of('\\') + 1, 1, "|");
   return OutStr;
 }
 
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -845,6 +845,15 @@ class TargetLoweringBase {
     return false;
   }
 
+  /// Return true if the target shall perform extract vector element and store
+  /// given that the vector is known to be splat of constant.
+  /// \p Index[out] gives the index of the vector element to be extracted when
+  /// this is true.
+  virtual bool shallExtractConstSplatVectorElementToStore(
+      Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
+    return false;
+  }
+
   /// Return true if inserting a scalar into a variable element of an undef
   /// vector is more efficiently handled by splatting the scalar instead.
   virtual bool shouldSplatInsEltVarIndex(EVT) const {
diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake
@@ -16,7 +16,7 @@
 
 /* Indicate that this is LLVM compiled from the amd-gfx branch. */
 #define LLVM_HAVE_BRANCH_AMD_GFX
-#define LLVM_MAIN_REVISION 473678
+#define LLVM_MAIN_REVISION 473683
 
 /* Define if LLVM_ENABLE_DUMP is enabled */
 #cmakedefine LLVM_ENABLE_DUMP
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7757,13 +7757,28 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
     }
 
     // If this store is smaller than the largest store see whether we can get
-    // the smaller value for free with a truncate.
+    // the smaller value for free with a truncate or extract vector element and
+    // then store.
     SDValue Value = MemSetValue;
     if (VT.bitsLT(LargestVT)) {
+      unsigned Index;
+      unsigned NElts = LargestVT.getSizeInBits() / VT.getSizeInBits();
+      EVT SVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), NElts);
       if (!LargestVT.isVector() && !VT.isVector() &&
           TLI.isTruncateFree(LargestVT, VT))
         Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
-      else
+      else if (LargestVT.isVector() && !VT.isVector() &&
+               TLI.shallExtractConstSplatVectorElementToStore(
+                   LargestVT.getTypeForEVT(*DAG.getContext()),
+                   VT.getSizeInBits(), Index) &&
+               TLI.isTypeLegal(SVT) &&
+               LargestVT.getSizeInBits() == SVT.getSizeInBits()) {
+        // Target which can combine store(extractelement VectorTy, Idx) can get
+        // the smaller value for free.
+        SDValue TailValue = DAG.getNode(ISD::BITCAST, dl, SVT, MemSetValue);
+        Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, TailValue,
+                            DAG.getVectorIdxConstant(Index, dl));
+      } else
         Value = getMemsetValue(Src, VT, DAG, dl);
     }
     assert(Value.getValueType() == VT && "Value with wrong type.");
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -1214,6 +1214,8 @@ class UpdateDPPPat<ValueType vt> : GCNPat <
 
 def : UpdateDPPPat<i32>;
 def : UpdateDPPPat<f32>;
+def : UpdateDPPPat<v2i16>;
+def : UpdateDPPPat<v2f16>;
 
 } // End OtherPredicates = [isGFX8Plus]
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1635,6 +1635,27 @@ bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
   return VT.isScalarInteger();
 }
 
+bool PPCTargetLowering::shallExtractConstSplatVectorElementToStore(
+    Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
+  if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
+    return false;
+
+  if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
+    if (VTy->getScalarType()->isIntegerTy()) {
+      // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
+      if (ElemSizeInBits == 32) {
+        Index = Subtarget.isLittleEndian() ? 2 : 1;
+        return true;
+      }
+      if (ElemSizeInBits == 64) {
+        Index = Subtarget.isLittleEndian() ? 1 : 0;
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch ((PPCISD::NodeType)Opcode) {
   case PPCISD::FIRST_NUMBER:    break;
@@ -17086,10 +17107,20 @@ EVT PPCTargetLowering::getOptimalMemOpType(
   if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
     // We should use Altivec/VSX loads and stores when available. For unaligned
     // addresses, unaligned VSX loads are only fast starting with the P8.
-    if (Subtarget.hasAltivec() && Op.size() >= 16 &&
-        (Op.isAligned(Align(16)) ||
-         ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
-      return MVT::v4i32;
+    if (Subtarget.hasAltivec() && Op.size() >= 16) {
+      if (Op.isMemset() && Subtarget.hasVSX()) {
+        uint64_t TailSize = Op.size() % 16;
+        // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
+        // element if vector element type matches tail store. For tail size
+        // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
+        if (TailSize > 2 && TailSize <= 4) {
+          return MVT::v8i16;
+        }
+        return MVT::v4i32;
+      }
+      if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
+        return MVT::v4i32;
+    }
   }
 
   if (Subtarget.isPPC64()) {
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -791,6 +791,11 @@ namespace llvm {
       return true;
     }
 
+    bool
+    shallExtractConstSplatVectorElementToStore(Type *VectorTy,
+                                               unsigned ElemSizeInBits,
+                                               unsigned &Index) const override;
+
     bool isCtlzFast() const override {
       return true;
     }
diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td
@@ -2031,8 +2031,15 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, IsLittleEndian] in {
             (v8i16 (COPY_TO_REGCLASS (LXVRHX ForceXForm:$src), VSRC))>;
   def : Pat<(v16i8 (scalar_to_vector (i32 (extloadi8 ForceXForm:$src)))),
             (v16i8 (COPY_TO_REGCLASS (LXVRBX ForceXForm:$src), VSRC))>;
+  def : Pat<(store (i64 (extractelt v2i64:$A, 1)), ForceXForm:$src),
+            (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
  }
 
+let Predicates = [IsISA3_1, IsBigEndian] in {
+  def : Pat<(store (i64 (extractelt v2i64:$A, 0)), ForceXForm:$src),
+            (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), ForceXForm:$src)>;
+}
+
 // FIXME: The swap is overkill when the shift amount is a constant.
 // We should just fix the constant in the DAG.
 let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -739,6 +739,9 @@ def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFF), non_imm12:$rs2)),
           (ADD_UW GPR:$rs1, GPR:$rs2)>;
 def : Pat<(i64 (and GPR:$rs, 0xFFFFFFFF)), (ADD_UW GPR:$rs, (XLenVT X0))>;
 
+def : Pat<(i64 (or_is_add (and GPR:$rs1, 0xFFFFFFFF), non_imm12:$rs2)),
+          (ADD_UW GPR:$rs1, GPR:$rs2)>;
+
 def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 1)), non_imm12:$rs2)),
           (SH1ADD_UW GPR:$rs1, GPR:$rs2)>;
 def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 2)), non_imm12:$rs2)),
diff --git a/llvm/test/Analysis/DotMachineCFG/AMDGPU/functions.mir b/llvm/test/Analysis/DotMachineCFG/AMDGPU/functions.mir
@@ -12,7 +12,7 @@ body: |
 
 # MCFG: digraph "Machine CFG for 'func2' function"
 # MCFG-NEXT: label="Machine CFG for 'func2' function"
-# MCFG: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.0:| $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0\l $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0\l $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0\l S_ENDPGM 0\l}"];
+# MCFG: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.0:\l| $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0\l $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0\l $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0\l S_ENDPGM 0\l}"];
 ---
 name: func2
 body: |
diff --git a/llvm/test/Analysis/DotMachineCFG/AMDGPU/irreducible.mir b/llvm/test/Analysis/DotMachineCFG/AMDGPU/irreducible.mir
@@ -5,22 +5,22 @@
 
 # MCFG: digraph "Machine CFG for 'irreducible' function"
 # MCFG-NEXT: label="Machine CFG for 'irreducible' function"
-# MCFG: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.0:| successors: %bb.1(0x40000000), %bb.2(0x40000000)\l  liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9,\l... $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16\l  %0:sreg_32 = IMPLICIT_DEF\l  %1:vgpr_32 = COPY $vgpr0\l  %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec\l  S_CMP_EQ_U32 %0:sreg_32, 0, implicit-def $scc\l  S_CBRANCH_SCC1 %bb.1, implicit $scc\l  S_BRANCH %bb.2\l}"];
+# MCFG: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.0:\l| successors: %bb.1(0x40000000), %bb.2(0x40000000)\l  liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9,\l... $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16\l  %0:sreg_32 = IMPLICIT_DEF\l  %1:vgpr_32 = COPY $vgpr0\l  %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec\l  S_CMP_EQ_U32 %0:sreg_32, 0, implicit-def $scc\l  S_CBRANCH_SCC1 %bb.1, implicit $scc\l  S_BRANCH %bb.2\l}"];
 # MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
 # MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
-# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.1:|\l successors: %bb.3(0x80000000)\l\l  %3:vgpr_32 = PHI %2:vgpr_32, %bb.0, %4:vgpr_32, %bb.5\l  %5:vgpr_32 = V_ADD_U32_e64 %3:vgpr_32, 1, 0, implicit $exec\l  S_BRANCH %bb.3\l}"];
+# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.1:\l|\l successors: %bb.3(0x80000000)\l\l  %3:vgpr_32 = PHI %2:vgpr_32, %bb.0, %4:vgpr_32, %bb.5\l  %5:vgpr_32 = V_ADD_U32_e64 %3:vgpr_32, 1, 0, implicit $exec\l  S_BRANCH %bb.3\l}"];
 # MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
-# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.2:|\l successors: %bb.3(0x80000000)\l\l  %6:vgpr_32 = PHI %2:vgpr_32, %bb.0, %4:vgpr_32, %bb.4\l  %7:vgpr_32 = V_ADD_U32_e64 %6:vgpr_32, 2, 0, implicit $exec\l}"];
+# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.2:\l|\l successors: %bb.3(0x80000000)\l\l  %6:vgpr_32 = PHI %2:vgpr_32, %bb.0, %4:vgpr_32, %bb.4\l  %7:vgpr_32 = V_ADD_U32_e64 %6:vgpr_32, 2, 0, implicit $exec\l}"];
 # MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
-# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.3:|\l successors: %bb.4(0x80000000)\l\l  %4:vgpr_32 = PHI %5:vgpr_32, %bb.1, %7:vgpr_32, %bb.2\l}"];
+# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.3:\l|\l successors: %bb.4(0x80000000)\l\l  %4:vgpr_32 = PHI %5:vgpr_32, %bb.1, %7:vgpr_32, %bb.2\l}"];
 # MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
-# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.4:|\l successors: %bb.2(0x40000000), %bb.5(0x40000000)\l\l  %8:vgpr_32 = V_AND_B32_e32 3, %1:vgpr_32, implicit $exec\l  %9:sreg_64 = V_CMP_EQ_U32_e64 %8:vgpr_32, 2, implicit $exec\l  %10:sreg_64 = SI_IF killed %9:sreg_64, %bb.2, implicit-def dead $exec,\l... implicit-def dead $scc, implicit $exec\l}"];
+# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.4:\l|\l successors: %bb.2(0x40000000), %bb.5(0x40000000)\l\l  %8:vgpr_32 = V_AND_B32_e32 3, %1:vgpr_32, implicit $exec\l  %9:sreg_64 = V_CMP_EQ_U32_e64 %8:vgpr_32, 2, implicit $exec\l  %10:sreg_64 = SI_IF killed %9:sreg_64, %bb.2, implicit-def dead $exec,\l... implicit-def dead $scc, implicit $exec\l}"];
 # MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
 # MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
-# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.5:|\l successors: %bb.1(0x40000000), %bb.6(0x40000000)\l\l  %11:sreg_64 = V_CMP_EQ_U32_e64 %8:vgpr_32, 1, implicit $exec\l  %12:sreg_64 = SI_IF killed %11:sreg_64, %bb.1, implicit-def dead $exec,\l... implicit-def dead $scc, implicit $exec\l}"];
+# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.5:\l|\l successors: %bb.1(0x40000000), %bb.6(0x40000000)\l\l  %11:sreg_64 = V_CMP_EQ_U32_e64 %8:vgpr_32, 1, implicit $exec\l  %12:sreg_64 = SI_IF killed %11:sreg_64, %bb.1, implicit-def dead $exec,\l... implicit-def dead $scc, implicit $exec\l}"];
 # MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
 # MCFG-NEXT: Node{{[0-9A-Za-z]*}} -> Node{{[0-9A-Za-z]*}};
-# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.6:|\l\l S_ENDPGM 0\l}"];
+# MCFG-NEXT: Node{{[0-9A-Za-z]*}} [shape=record,label="{bb.6:\l|\l\l S_ENDPGM 0\l}"];
 
 # MCFG-ONLY: digraph "Machine CFG for 'irreducible' function"
 # MCFG-ONLY-NEXT: label="Machine CFG for 'irreducible' function"
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
diff --git a/llvm/test/CodeGen/PowerPC/memset-tail.ll b/llvm/test/CodeGen/PowerPC/memset-tail.ll
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
diff --git a/llvm/test/Other/cfg-multiline-header.ll b/llvm/test/Other/cfg-multiline-header.ll
diff --git a/llvm/test/Other/cfg_deopt_unreach.ll b/llvm/test/Other/cfg_deopt_unreach.ll
diff --git a/polly/test/ScopDetect/dot-scops-npm.ll b/polly/test/ScopDetect/dot-scops-npm.ll

Original file line number	Diff line number	Diff line change
`@@ -153,6 +153,8 @@ std::string CompleteNodeLabelString(`
`153`	`153`	`if (OutStr[0] == '%') {`
`154`	`154`	`OutStr.erase(OutStr.begin());`
`155`	`155`	`}`
	`156`	`+ // Place \| after BB name to separate it into header`
	`157`	`+ OutStr.insert(OutStr.find_first_of('\n') + 1, "\\\|");`
`156`	`158`
`157`	`159`	`unsigned ColNum = 0;`
`158`	`160`	`unsigned LastSpace = 0;`
`@@ -178,8 +180,6 @@ std::string CompleteNodeLabelString(`
`178`	`180`	`if (OutStr[i] == ' ')`
`179`	`181`	`LastSpace = i;`
`180`	`182`	`}`
`181`		`- // Replace \l after BB name with \| to separate it into header`
`182`		`- OutStr.replace(OutStr.find_first_of('\\') + 1, 1, "\|");`
`183`	`183`	`return OutStr;`
`184`	`184`	`}`
`185`	`185`
Original file line number	Diff line number	Diff line change
`@@ -791,6 +791,11 @@ namespace llvm {`
`791`	`791`	`return true;`
`792`	`792`	`}`
`793`	`793`
	`794`	`+ bool`
	`795`	`+ shallExtractConstSplatVectorElementToStore(Type *VectorTy,`
	`796`	`+ unsigned ElemSizeInBits,`
	`797`	`+ unsigned &Index) const override;`
	`798`	`+`
`794`	`799`	`bool isCtlzFast() const override {`
`795`	`800`	`return true;`
`796`	`801`	`}`