Skip to content

Commit 08f1863

Browse files
SC llvm teamSC llvm team
SC llvm team
authored and
SC llvm team
committed
Merged main:6f618a7b8249e7baa3b2d18f8bbec3c5b6f6d24e into amd-gfx:e5edfda5900b
Local branch amd-gfx e5edfda Merged main:65b7cbbd8735b90933369364153b982d498f649a into amd-gfx:2f9a9b483f9f Remote branch main 6f618a7 Update my email
2 parents e5edfda + 6f618a7 commit 08f1863

18 files changed

+636
-259
lines changed

.mailmap

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
<i@maskray.me> <maskray@google.com>
3131
<JCTremoulet@gmail.com> <jotrem@microsoft.com>
3232
<min@myhsu.dev> <minyihh@uci.edu>
33-
<qiucofan@cn.ibm.com> <qiucf@cn.ibm.com>
33+
<qcf@ecnelises.com> <qiucofan@cn.ibm.com> <qiucf@cn.ibm.com>
3434
<rnk@google.com> <reid@kleckner.net>
3535
<thakis@chromium.org> <nicolasweber@gmx.de>
3636
Jianjian GUAN <jacquesguan@me.com>

libcxx/test/support/atomic_helpers.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,15 @@
3030
# define TEST_ATOMIC_LONG_LOCK_FREE __GCC_ATOMIC_LONG_LOCK_FREE
3131
# define TEST_ATOMIC_LLONG_LOCK_FREE __GCC_ATOMIC_LLONG_LOCK_FREE
3232
# define TEST_ATOMIC_POINTER_LOCK_FREE __GCC_ATOMIC_POINTER_LOCK_FREE
33-
#elif TEST_COMPILER_MSVC
33+
#elif defined(TEST_COMPILER_MSVC)
3434
// This is lifted from STL/stl/inc/atomic on github for the purposes of
3535
// keeping the tests compiling for MSVC's STL. It's not a perfect solution
3636
// but at least the tests will keep running.
3737
//
3838
// Note MSVC's STL never produces a type that is sometimes lock free, but not always lock free.
3939
template <class T, size_t Size = sizeof(T)>
40-
constexpr bool msvc_is_lock_free_macro_value() {
41-
return (Size <= 8 && (Size & Size - 1) == 0) ? 2 : 0;
40+
constexpr int msvc_is_lock_free_macro_value() {
41+
return (Size <= 8 && (Size & (Size - 1)) == 0) ? 2 : 0;
4242
}
4343
# define TEST_ATOMIC_CHAR_LOCK_FREE ::msvc_is_lock_free_macro_value<char>()
4444
# define TEST_ATOMIC_SHORT_LOCK_FREE ::msvc_is_lock_free_macro_value<short>()

llvm/CREDITS.TXT

-4
Original file line numberDiff line numberDiff line change
@@ -432,10 +432,6 @@ W: http://vladimir_prus.blogspot.com
432432
E: ghost@cs.msu.su
433433
D: Made inst_iterator behave like a proper iterator, LowerConstantExprs pass
434434

435-
N: QIU Chaofan
436-
E: qiucofan@cn.ibm.com
437-
D: PowerPC Backend Developer
438-
439435
N: Kalle Raiskila
440436
E: kalle.rasikila@nokia.com
441437
D: Some bugfixes to CellSPU

llvm/include/llvm/Config/llvm-config.h.cmake

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 509536
19+
#define LLVM_MAIN_REVISION 509543
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/include/llvm/IR/VPIntrinsics.def

+7-5
Original file line numberDiff line numberDiff line change
@@ -722,27 +722,29 @@ HELPER_REGISTER_REDUCTION_VP(vp_reduce_fminimum, VP_REDUCE_FMINIMUM,
722722
#error \
723723
"The internal helper macro HELPER_REGISTER_REDUCTION_SEQ_VP is already defined!"
724724
#endif
725-
#define HELPER_REGISTER_REDUCTION_SEQ_VP(VPID, VPSD, SEQ_VPSD, INTRIN) \
725+
#define HELPER_REGISTER_REDUCTION_SEQ_VP(VPID, VPSD, SEQ_VPSD, SDOPC, SEQ_SDOPC, INTRIN) \
726726
BEGIN_REGISTER_VP_INTRINSIC(VPID, 2, 3) \
727727
BEGIN_REGISTER_VP_SDNODE(VPSD, 1, VPID, 2, 3) \
728728
VP_PROPERTY_REDUCTION(0, 1) \
729+
VP_PROPERTY_FUNCTIONAL_SDOPC(SDOPC) \
729730
END_REGISTER_VP_SDNODE(VPSD) \
730731
BEGIN_REGISTER_VP_SDNODE(SEQ_VPSD, 1, VPID, 2, 3) \
731732
HELPER_MAP_VPID_TO_VPSD(VPID, SEQ_VPSD) \
733+
VP_PROPERTY_FUNCTIONAL_SDOPC(SEQ_SDOPC) \
732734
VP_PROPERTY_REDUCTION(0, 1) \
733735
END_REGISTER_VP_SDNODE(SEQ_VPSD) \
734736
VP_PROPERTY_FUNCTIONAL_INTRINSIC(INTRIN) \
735737
END_REGISTER_VP_INTRINSIC(VPID)
736738

737739
// llvm.vp.reduce.fadd(start,x,mask,vlen)
738740
HELPER_REGISTER_REDUCTION_SEQ_VP(vp_reduce_fadd, VP_REDUCE_FADD,
739-
VP_REDUCE_SEQ_FADD,
740-
vector_reduce_fadd)
741+
VP_REDUCE_SEQ_FADD, VECREDUCE_FADD,
742+
VECREDUCE_SEQ_FADD, vector_reduce_fadd)
741743

742744
// llvm.vp.reduce.fmul(start,x,mask,vlen)
743745
HELPER_REGISTER_REDUCTION_SEQ_VP(vp_reduce_fmul, VP_REDUCE_FMUL,
744-
VP_REDUCE_SEQ_FMUL,
745-
vector_reduce_fmul)
746+
VP_REDUCE_SEQ_FMUL, VECREDUCE_FMUL,
747+
VECREDUCE_SEQ_FMUL, vector_reduce_fmul)
746748

747749
#undef HELPER_REGISTER_REDUCTION_SEQ_VP
748750

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

+16-4
Original file line numberDiff line numberDiff line change
@@ -7311,8 +7311,6 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
73117311
// Generate a vp.reduce_op if it is custom/legal for the target. This avoids
73127312
// needing to pad the source vector, because the inactive lanes can simply be
73137313
// disabled and not contribute to the result.
7314-
// TODO: VECREDUCE_FADD, VECREDUCE_FMUL aren't currently mapped correctly,
7315-
// and thus don't take this path.
73167314
if (auto VPOpcode = ISD::getVPForBaseOpcode(Opc);
73177315
VPOpcode && TLI.isOperationLegalOrCustom(*VPOpcode, WideVT)) {
73187316
SDValue Start = NeutralElem;
@@ -7351,6 +7349,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) {
73517349
SDValue VecOp = N->getOperand(1);
73527350
SDValue Op = GetWidenedVector(VecOp);
73537351

7352+
EVT VT = N->getValueType(0);
73547353
EVT OrigVT = VecOp.getValueType();
73557354
EVT WideVT = Op.getValueType();
73567355
EVT ElemVT = OrigVT.getVectorElementType();
@@ -7364,6 +7363,19 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) {
73647363
unsigned OrigElts = OrigVT.getVectorMinNumElements();
73657364
unsigned WideElts = WideVT.getVectorMinNumElements();
73667365

7366+
// Generate a vp.reduce_op if it is custom/legal for the target. This avoids
7367+
// needing to pad the source vector, because the inactive lanes can simply be
7368+
// disabled and not contribute to the result.
7369+
if (auto VPOpcode = ISD::getVPForBaseOpcode(Opc);
7370+
VPOpcode && TLI.isOperationLegalOrCustom(*VPOpcode, WideVT)) {
7371+
EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
7372+
WideVT.getVectorElementCount());
7373+
SDValue Mask = DAG.getAllOnesConstant(dl, WideMaskVT);
7374+
SDValue EVL = DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(),
7375+
OrigVT.getVectorElementCount());
7376+
return DAG.getNode(*VPOpcode, dl, VT, {AccOp, Op, Mask, EVL}, Flags);
7377+
}
7378+
73677379
if (WideVT.isScalableVector()) {
73687380
unsigned GCD = std::gcd(OrigElts, WideElts);
73697381
EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
@@ -7372,14 +7384,14 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) {
73727384
for (unsigned Idx = OrigElts; Idx < WideElts; Idx = Idx + GCD)
73737385
Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Op, SplatNeutral,
73747386
DAG.getVectorIdxConstant(Idx, dl));
7375-
return DAG.getNode(Opc, dl, N->getValueType(0), AccOp, Op, Flags);
7387+
return DAG.getNode(Opc, dl, VT, AccOp, Op, Flags);
73767388
}
73777389

73787390
for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
73797391
Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem,
73807392
DAG.getVectorIdxConstant(Idx, dl));
73817393

7382-
return DAG.getNode(Opc, dl, N->getValueType(0), AccOp, Op, Flags);
7394+
return DAG.getNode(Opc, dl, VT, AccOp, Op, Flags);
73837395
}
73847396

73857397
SDValue DAGTypeLegalizer::WidenVecOp_VP_REDUCE(SDNode *N) {

llvm/lib/IR/StructuralHash.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ namespace {
2424
// by the MergeFunctions pass.
2525

2626
class StructuralHashImpl {
27-
uint64_t Hash;
27+
uint64_t Hash = 4;
2828

2929
void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
3030

@@ -43,7 +43,7 @@ class StructuralHashImpl {
4343
}
4444

4545
public:
46-
StructuralHashImpl() : Hash(4) {}
46+
StructuralHashImpl() = default;
4747

4848
void updateOperand(Value *Operand) {
4949
hashType(Operand->getType());

llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp

+1-3
Original file line numberDiff line numberDiff line change
@@ -2517,9 +2517,7 @@ static void updateRegisterMapForDbgValueListAfterMove(
25172517
if (RegIt == RegisterMap.end())
25182518
return;
25192519
auto &InstrVec = RegIt->getSecond();
2520-
for (unsigned I = 0; I < InstrVec.size(); I++)
2521-
if (InstrVec[I] == InstrToReplace)
2522-
InstrVec[I] = DbgValueListInstr;
2520+
llvm::replace(InstrVec, InstrToReplace, DbgValueListInstr);
25232521
});
25242522
}
25252523

llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp

+1-4
Original file line numberDiff line numberDiff line change
@@ -2002,10 +2002,7 @@ SmallVector<uint32_t, 8> HvxSelector::getPerfectCompletions(ShuffleMask SM,
20022002
if ((unsigned)llvm::popcount(P) < Count) {
20032003
// Reset all occurences of P, if there are more occurrences of P
20042004
// than there are bits in P.
2005-
for (unsigned &Q : Worklist) {
2006-
if (Q == P)
2007-
Q = 0;
2008-
}
2005+
llvm::replace(Worklist, P, 0U);
20092006
}
20102007
}
20112008

llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,11 @@ bool NVPTXProxyRegErasure::runOnMachineFunction(MachineFunction &MF) {
7878
assert(InOp.isReg() && "ProxyReg input should be a register.");
7979
assert(OutOp.isReg() && "ProxyReg output should be a register.");
8080
RemoveList.push_back(&MI);
81-
RAUWBatch.try_emplace(OutOp.getReg(), InOp.getReg());
81+
Register replacement = InOp.getReg();
82+
// Check if the replacement itself has been replaced.
83+
if (auto it = RAUWBatch.find(replacement); it != RAUWBatch.end())
84+
replacement = it->second;
85+
RAUWBatch.try_emplace(OutOp.getReg(), replacement);
8286
break;
8387
}
8488
}

llvm/lib/Target/X86/X86ISelLowering.cpp

+1-3
Original file line numberDiff line numberDiff line change
@@ -35781,9 +35781,7 @@ X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI,
3578135781
// Zero out any registers that are already used.
3578235782
for (const auto &MO : MI.operands()) {
3578335783
if (MO.isReg() && MO.isUse())
35784-
for (unsigned &Reg : AvailableRegs)
35785-
if (Reg == MO.getReg())
35786-
Reg = 0;
35784+
llvm::replace(AvailableRegs, static_cast<unsigned>(MO.getReg()), 0U);
3578735785
}
3578835786

3578935787
// Choose the first remaining non-zero available register.

llvm/test/CodeGen/NVPTX/proxy-reg-erasure-mir.ll

-25
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# RUN: llc %s --run-pass=nvptx-proxyreg-erasure -march=nvptx64 -o - | FileCheck %s
2+
3+
--- |
4+
; ModuleID = 'third-party/llvm-project/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-mir.ll'
5+
source_filename = "third-party/llvm-project/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-mir.ll"
6+
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
7+
8+
declare <4 x i32> @callee_vec_i32()
9+
10+
define <4 x i32> @check_vec_i32() {
11+
%ret = call <4 x i32> @callee_vec_i32()
12+
ret <4 x i32> %ret
13+
}
14+
15+
...
16+
---
17+
name: check_vec_i32
18+
alignment: 1
19+
exposesReturnsTwice: false
20+
legalized: false
21+
regBankSelected: false
22+
selected: false
23+
failedISel: false
24+
tracksRegLiveness: true
25+
hasWinCFI: false
26+
callsEHReturn: false
27+
callsUnwindInit: false
28+
hasEHCatchret: false
29+
hasEHScopes: false
30+
hasEHFunclets: false
31+
isOutlined: false
32+
debugInstrRef: false
33+
failsVerification: false
34+
tracksDebugUserValues: false
35+
registers:
36+
- { id: 0, class: int32regs, preferred-register: '' }
37+
- { id: 1, class: int32regs, preferred-register: '' }
38+
- { id: 2, class: int32regs, preferred-register: '' }
39+
- { id: 3, class: int32regs, preferred-register: '' }
40+
- { id: 4, class: int32regs, preferred-register: '' }
41+
- { id: 5, class: int32regs, preferred-register: '' }
42+
- { id: 6, class: int32regs, preferred-register: '' }
43+
- { id: 7, class: int32regs, preferred-register: '' }
44+
- { id: 8, class: int32regs, preferred-register: '' }
45+
- { id: 9, class: int32regs, preferred-register: '' }
46+
- { id: 10, class: int32regs, preferred-register: '' }
47+
- { id: 11, class: int32regs, preferred-register: '' }
48+
liveins: []
49+
frameInfo:
50+
isFrameAddressTaken: false
51+
isReturnAddressTaken: false
52+
hasStackMap: false
53+
hasPatchPoint: false
54+
stackSize: 0
55+
offsetAdjustment: 0
56+
maxAlignment: 1
57+
adjustsStack: false
58+
hasCalls: true
59+
stackProtector: ''
60+
functionContext: ''
61+
maxCallFrameSize: 4294967295
62+
cvBytesOfCalleeSavedRegisters: 0
63+
hasOpaqueSPAdjustment: false
64+
hasVAStart: false
65+
hasMustTailInVarArgFunc: false
66+
hasTailCall: false
67+
isCalleeSavedInfoValid: false
68+
localFrameSize: 0
69+
savePoint: ''
70+
restorePoint: ''
71+
fixedStack: []
72+
stack: []
73+
entry_values: []
74+
callSites: []
75+
debugValueSubstitutions: []
76+
constants: []
77+
machineFunctionInfo: {}
78+
body: |
79+
bb.0:
80+
%0:int32regs, %1:int32regs, %2:int32regs, %3:int32regs = LoadParamMemV4I32 0
81+
; CHECK-NOT: ProxyReg
82+
%4:int32regs = ProxyRegI32 killed %0
83+
%5:int32regs = ProxyRegI32 killed %1
84+
%6:int32regs = ProxyRegI32 killed %2
85+
%7:int32regs = ProxyRegI32 killed %3
86+
; CHECK: StoreRetvalV4I32 killed %0, killed %1, killed %2, killed %3
87+
StoreRetvalV4I32 killed %4, killed %5, killed %6, killed %7, 0
88+
89+
%8:int32regs = LoadParamMemI32 0
90+
; CHECK-NOT: ProxyReg
91+
%9:int32regs = ProxyRegI32 killed %8
92+
%10:int32regs = ProxyRegI32 killed %9
93+
%11:int32regs = ProxyRegI32 killed %10
94+
; CHECK: StoreRetvalI32 killed %8
95+
StoreRetvalI32 killed %11, 0
96+
Return
97+
98+
...

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll

+2-7
Original file line numberDiff line numberDiff line change
@@ -791,12 +791,7 @@ define float @reduce_fadd_16xi32_prefix5(ptr %p) {
791791
; CHECK-NEXT: vle32.v v8, (a0)
792792
; CHECK-NEXT: lui a0, 524288
793793
; CHECK-NEXT: vmv.s.x v10, a0
794-
; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma
795-
; CHECK-NEXT: vslideup.vi v8, v10, 5
796-
; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma
797-
; CHECK-NEXT: vslideup.vi v8, v10, 6
798-
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
799-
; CHECK-NEXT: vslideup.vi v8, v10, 7
794+
; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
800795
; CHECK-NEXT: vfredusum.vs v8, v8, v10
801796
; CHECK-NEXT: vfmv.f.s fa0, v8
802797
; CHECK-NEXT: ret
@@ -880,7 +875,7 @@ define float @reduce_fadd_4xi32_non_associative(ptr %p) {
880875
; CHECK-NEXT: vfmv.f.s fa5, v9
881876
; CHECK-NEXT: lui a0, 524288
882877
; CHECK-NEXT: vmv.s.x v9, a0
883-
; CHECK-NEXT: vslideup.vi v8, v9, 3
878+
; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
884879
; CHECK-NEXT: vfredusum.vs v8, v8, v9
885880
; CHECK-NEXT: vfmv.f.s fa4, v8
886881
; CHECK-NEXT: fadd.s fa0, fa4, fa5

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll

-12
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,6 @@ define half @vreduce_fadd_v7f16(ptr %x, half %s) {
9898
; CHECK: # %bb.0:
9999
; CHECK-NEXT: vsetivli zero, 7, e16, m1, ta, ma
100100
; CHECK-NEXT: vle16.v v8, (a0)
101-
; CHECK-NEXT: lui a0, 1048568
102-
; CHECK-NEXT: vmv.s.x v9, a0
103-
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
104-
; CHECK-NEXT: vslideup.vi v8, v9, 7
105101
; CHECK-NEXT: vfmv.s.f v9, fa0
106102
; CHECK-NEXT: vfredusum.vs v8, v8, v9
107103
; CHECK-NEXT: vfmv.f.s fa0, v8
@@ -470,10 +466,6 @@ define float @vreduce_fadd_v7f32(ptr %x, float %s) {
470466
; CHECK: # %bb.0:
471467
; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma
472468
; CHECK-NEXT: vle32.v v8, (a0)
473-
; CHECK-NEXT: lui a0, 524288
474-
; CHECK-NEXT: vmv.s.x v10, a0
475-
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
476-
; CHECK-NEXT: vslideup.vi v8, v10, 7
477469
; CHECK-NEXT: vfmv.s.f v10, fa0
478470
; CHECK-NEXT: vfredusum.vs v8, v8, v10
479471
; CHECK-NEXT: vfmv.f.s fa0, v8
@@ -488,10 +480,6 @@ define float @vreduce_ord_fadd_v7f32(ptr %x, float %s) {
488480
; CHECK: # %bb.0:
489481
; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma
490482
; CHECK-NEXT: vle32.v v8, (a0)
491-
; CHECK-NEXT: lui a0, 524288
492-
; CHECK-NEXT: vmv.s.x v10, a0
493-
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
494-
; CHECK-NEXT: vslideup.vi v8, v10, 7
495483
; CHECK-NEXT: vfmv.s.f v10, fa0
496484
; CHECK-NEXT: vfredosum.vs v8, v8, v10
497485
; CHECK-NEXT: vfmv.f.s fa0, v8

0 commit comments

Comments
 (0)