Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Avoid calling GC write barrier for byrefs [#89064 + more HasGCByRef logic] #89116

Closed
wants to merge 15 commits into from
Closed
11 changes: 3 additions & 8 deletions src/coreclr/jit/codegenarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -821,8 +821,6 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
sourceIsLocal = true;
}

bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR);

#ifdef DEBUG
assert(!dstAddr->isContained());

Expand Down Expand Up @@ -852,7 +850,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
unsigned slots = layout->GetSlotCount();

// If we can prove it's on the stack we don't need to use the write barrier.
if (dstOnStack)
if (dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
for (unsigned i = 0; i < slots; ++i)
{
Expand All @@ -866,12 +864,10 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
}
else
{
unsigned gcPtrCount = layout->GetGCPtrCount();

unsigned i = 0;
while (i < slots)
{
if (!layout->IsGCPtr(i))
if (!layout->IsGCRef(i))
{
emit->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
INS_FLAGS_DONT_CARE, INS_OPTS_LDST_POST_INC);
Expand All @@ -881,11 +877,10 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
else
{
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
}

++i;
}
assert(gcPtrCount == 0);
}

if (cpObjNode->IsVolatile())
Expand Down
14 changes: 4 additions & 10 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3582,8 +3582,6 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
sourceIsLocal = true;
}

bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR);

#ifdef DEBUG
assert(!dstAddr->isContained());

Expand Down Expand Up @@ -3627,7 +3625,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
emitter* emit = GetEmitter();

// If we can prove it's on the stack we don't need to use the write barrier.
if (dstOnStack)
if (dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
unsigned i = 0;
// Check if two or more remaining slots and use a ldp/stp sequence
Expand Down Expand Up @@ -3656,15 +3654,13 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
}
else
{
unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount();

unsigned i = 0;
while (i < slots)
{
if (!layout->IsGCPtr(i))
if (!layout->IsGCRef(i))
{
// Check if the next slot's type is also TYP_GC_NONE and use ldp/stp
if ((i + 1 < slots) && !layout->IsGCPtr(i + 1))
// Check if the next slot's type is also non-ref and use ldp/stp
if ((i + 1 < slots) && !layout->IsGCRef(i + 1))
{
emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF,
2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
Expand All @@ -3684,11 +3680,9 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
{
// In the case of a GC-Pointer we'll call the ByRef write barrier helper
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
}
++i;
}
assert(gcPtrCount == 0);
}

if (cpObjNode->IsVolatile())
Expand Down
14 changes: 4 additions & 10 deletions src/coreclr/jit/codegenloongarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2746,8 +2746,6 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
sourceIsLocal = true;
}

bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR);

#ifdef DEBUG
assert(!dstAddr->isContained());

Expand Down Expand Up @@ -2794,7 +2792,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
emitAttr attrDstAddr = emitActualTypeSize(dstAddr->TypeGet());

// If we can prove it's on the stack we don't need to use the write barrier.
if (dstOnStack)
if (dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
unsigned i = 0;
// Check if two or more remaining slots and use two load/store sequence
Expand Down Expand Up @@ -2839,15 +2837,13 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
}
else
{
unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount();

unsigned i = 0;
while (i < slots)
{
if (!layout->IsGCPtr(i))
if (!layout->IsGCRef(i))
{
// Check if the next slot's type is also TYP_GC_NONE and use two load/store
if ((i + 1 < slots) && !layout->IsGCPtr(i + 1))
// Check if the next slot's type is also non-ref and use two load/store
if ((i + 1 < slots) && !layout->IsGCRef(i + 1))
{
if ((i + 2) == slots)
{
Expand Down Expand Up @@ -2883,11 +2879,9 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
{
// In the case of a GC-Pointer we'll call the ByRef write barrier helper
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
}
++i;
}
assert(gcPtrCount == 0);
}

if (cpObjNode->IsVolatile())
Expand Down
14 changes: 4 additions & 10 deletions src/coreclr/jit/codegenriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2382,8 +2382,6 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
sourceIsLocal = true;
}

bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR);

#ifdef DEBUG
assert(!dstAddr->isContained());

Expand Down Expand Up @@ -2430,7 +2428,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
emitAttr attrDstAddr = emitActualTypeSize(dstAddr->TypeGet());

// If we can prove it's on the stack we don't need to use the write barrier.
if (dstOnStack)
if (dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
unsigned i = 0;
// Check if two or more remaining slots and use two ld/sd sequence
Expand Down Expand Up @@ -2475,15 +2473,13 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
}
else
{
unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount();

unsigned i = 0;
while (i < slots)
{
if (!layout->IsGCPtr(i))
if (!layout->IsGCRef(i))
{
// Check if the next slot's type is also TYP_GC_NONE and use two ld/sd
if ((i + 1 < slots) && !layout->IsGCPtr(i + 1))
// Check if the next slot's type is also non-ref and use two ld/sd
if ((i + 1 < slots) && !layout->IsGCRef(i + 1))
{
if ((i + 2) == slots)
{
Expand Down Expand Up @@ -2519,11 +2515,9 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
{
// In the case of a GC-Pointer we'll call the ByRef write barrier helper
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
}
++i;
}
assert(gcPtrCount == 0);
}

if (cpObjNode->IsVolatile())
Expand Down
28 changes: 11 additions & 17 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4091,7 +4091,6 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
GenTree* dstAddr = cpObjNode->Addr();
GenTree* source = cpObjNode->Data();
var_types srcAddrType = TYP_BYREF;
bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR);

// If the GenTree node has data about GC pointers, this means we're dealing
// with CpObj, so this requires special logic.
Expand Down Expand Up @@ -4137,10 +4136,11 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddrType);
gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet());

unsigned slots = cpObjNode->GetLayout()->GetSlotCount();
ClassLayout* layout = cpObjNode->GetLayout();
unsigned slots = layout->GetSlotCount();

// If we can prove it's on the stack we don't need to use the write barrier.
if (dstOnStack)
if (dstAddr->gtSkipReloadOrCopy()->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
if (slots >= CPOBJ_NONGC_SLOTS_LIMIT)
{
Expand All @@ -4164,32 +4164,29 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
}
else
{
ClassLayout* layout = cpObjNode->GetLayout();
unsigned gcPtrCount = layout->GetGCPtrCount();

unsigned i = 0;
while (i < slots)
{
if (!layout->IsGCPtr(i))
if (!layout->IsGCRef(i))
{
// Let's see if we can use rep movsp instead of a sequence of movsp instructions
// to save cycles and code size.
unsigned nonGcSlotCount = 0;
unsigned nonRefSlotCount = 0;

do
{
nonGcSlotCount++;
nonRefSlotCount++;
i++;
} while ((i < slots) && !layout->IsGCPtr(i));
} while ((i < slots) && !layout->IsGCRef(i));

// If we have a very small contiguous non-gc region, it's better just to
// emit a sequence of movsp instructions
if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
if (nonRefSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
{
while (nonGcSlotCount > 0)
while (nonRefSlotCount > 0)
{
instGen(INS_movsp);
nonGcSlotCount--;
nonRefSlotCount--;
}
}
else
Expand All @@ -4198,19 +4195,16 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
// rep movsp (alias for movsd/movsq for x86/x64)
assert((cpObjNode->gtRsvdRegs & RBM_RCX) != 0);

GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonRefSlotCount);
instGen(INS_r_movsp);
}
}
else
{
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
i++;
}
}

assert(gcPtrCount == 0);
}

// Clear the gcInfo for RSI and RDI.
Expand Down
20 changes: 10 additions & 10 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,37 +422,37 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
// we can use REP MOVSD/Q instead of a sequence of MOVSD/Q instructions. According to the
// Intel Manual, the sweet spot for small structs is between 4 to 12 slots of size where
// the entire operation takes 20 cycles and encodes in 5 bytes (loading RCX and REP MOVSD/Q).
unsigned nonGCSlots = 0;
unsigned nonRefSlots = 0;

if (dstAddr->OperIs(GT_LCL_ADDR))
if (dstAddr->OperIs(GT_LCL_ADDR) || layout->HasGCByRef())
{
// If the destination is on the stack then no write barriers are needed.
nonGCSlots = layout->GetSlotCount();
nonRefSlots = layout->GetSlotCount();
}
else
{
// Otherwise a write barrier is needed for every GC pointer in the layout
// so we need to check if there's a long enough sequence of non-GC slots.
// Otherwise a write barrier is needed for every TYP_REF pointer in the layout
// so we need to check if there's a long enough sequence of non-TYP_REF slots.
unsigned slots = layout->GetSlotCount();
for (unsigned i = 0; i < slots; i++)
{
if (layout->IsGCPtr(i))
if (layout->IsGCRef(i))
{
nonGCSlots = 0;
nonRefSlots = 0;
}
else
{
nonGCSlots++;
nonRefSlots++;

if (nonGCSlots >= CPOBJ_NONGC_SLOTS_LIMIT)
if (nonRefSlots >= CPOBJ_NONGC_SLOTS_LIMIT)
{
break;
}
}
}
}

if (nonGCSlots >= CPOBJ_NONGC_SLOTS_LIMIT)
if (nonRefSlots >= CPOBJ_NONGC_SLOTS_LIMIT)
{
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindCpObjRepInstr;
}
Expand Down