Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

JIT: Optimize struct parameter register accesses in the backend #110819

Merged
merged 40 commits into from
Feb 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
794536f
Foo
jakobbotsch Dec 17, 2024
db15f94
Remove optimization for now
jakobbotsch Dec 17, 2024
8e8c913
Add an lvTracked check, remove dead code
jakobbotsch Dec 17, 2024
e504b4b
Support insertions for now
jakobbotsch Dec 17, 2024
bf196bd
Run jit-format
jakobbotsch Dec 17, 2024
559baf0
Avoid homing Swift parameters if they are !lvOnFrame
jakobbotsch Dec 18, 2024
6d1b57e
Rename
jakobbotsch Dec 18, 2024
058e8dc
JIT: Optimize struct parameter register accesses in the backend
jakobbotsch Dec 18, 2024
5f626ab
Parameters in OSR functions cannot be mapped
jakobbotsch Dec 18, 2024
1283951
Fix build on platforms with implicit byrefs, run jit-format
jakobbotsch Dec 18, 2024
0792ab5
Spill to both parameter and new mappings
jakobbotsch Dec 19, 2024
3279c71
Fix arm32 build
jakobbotsch Dec 19, 2024
2b8cf74
Induce mappings before lowering runs
jakobbotsch Dec 19, 2024
22b755d
Avoid double lowering, always back to nop
jakobbotsch Dec 19, 2024
43f6ba3
Add check for DNER when reusing local
jakobbotsch Dec 19, 2024
1d3aba6
Add function header
jakobbotsch Dec 19, 2024
6f21d49
Ensure we check both the parameter and mapped target when determining…
jakobbotsch Dec 19, 2024
4f89b1a
Change a lcl description
jakobbotsch Dec 19, 2024
e47919d
Skip promoted locals for optimization
jakobbotsch Dec 19, 2024
cfff900
Unify Swift parameter register homing
jakobbotsch Dec 20, 2024
f0e6d2d
Run jit-format
jakobbotsch Dec 20, 2024
4f06d3c
Skip optimization for arm32 prespilled registers
jakobbotsch Dec 20, 2024
1c9bd2c
Merge branch 'main' of github.com:dotnet/runtime into physical-promot…
jakobbotsch Dec 26, 2024
3903dae
Clean up after merge
jakobbotsch Dec 26, 2024
2d683e0
Run jit-format
jakobbotsch Dec 26, 2024
0db299f
Attach info to reused temps
jakobbotsch Dec 26, 2024
9a23ca3
Run jit-format
jakobbotsch Dec 26, 2024
8a6edaf
Add quick early-out that avoids IR walk of first BB
jakobbotsch Dec 26, 2024
9b40281
Avoid introducing new locals when it needs a callee save
jakobbotsch Dec 27, 2024
fc8fa5e
Run jit-format
jakobbotsch Dec 27, 2024
2f4b176
Fix local var dump output length for frame locs
jakobbotsch Dec 27, 2024
bb7eb9f
JIT: Unify Swift parameter register homing with normal homing
jakobbotsch Dec 20, 2024
1ceb64c
Fix store size for Swift CC
jakobbotsch Dec 26, 2024
84101a1
Run jit-format
jakobbotsch Dec 27, 2024
aa7e6a5
Allow homing prespilled registers on arm32
jakobbotsch Dec 28, 2024
62289d7
Skip induced parameter register locals for profiler hook on arm32
jakobbotsch Jan 10, 2025
eeaaaf4
Merge branch 'main' into physical-promotion-parameters-abi
jakobbotsch Feb 8, 2025
151cddd
Do placed arg LSRA opt for PUTARG_SPLIT
jakobbotsch Feb 9, 2025
95d2b04
Run jit-format
jakobbotsch Feb 9, 2025
a5289d6
Add liveness info to JITDUMP
jakobbotsch Feb 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,8 @@ class CodeGen final : public CodeGenInterface
void genPopFltRegs(regMaskTP regMask);
regMaskTP genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskCalleeSavedFloat);

regMaskTP genPrespilledUnmappedRegs();

regMaskTP genJmpCallArgMask();

void genFreeLclFrame(unsigned frameSize,
Expand Down
24 changes: 24 additions & 0 deletions src/coreclr/jit/codegenarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2102,6 +2102,30 @@ regMaskTP CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskC
}
}

//-----------------------------------------------------------------------------------
// genPrespilledUnmappedRegs: Get a mask of the registers that are prespilled
// and also not mapped to any locals.
//
// Returns:
// Mask of those registers. These registers can be used safely in prolog as
// they won't be needed after prespilling.
//
regMaskTP CodeGen::genPrespilledUnmappedRegs()
{
regMaskTP regs = regSet.rsMaskPreSpillRegs(false);

if (compiler->m_paramRegLocalMappings != nullptr)
{
for (int i = 0; i < compiler->m_paramRegLocalMappings->Height(); i++)
{
const ParameterRegisterLocalMapping& mapping = compiler->m_paramRegLocalMappings->BottomRef(i);
regs &= ~mapping.RegisterSegment->GetRegisterMask();
}
}

return regs;
}

//-----------------------------------------------------------------------------------
// instGen_MemoryBarrier: Emit a MemoryBarrier instruction
//
Expand Down
31 changes: 28 additions & 3 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3411,6 +3411,12 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed)
// top of the underlying registers.
RegGraph graph(compiler);

// Add everything to the graph, or spill directly to stack when needed.
// Note that some registers may be homed in multiple (stack) places.
// Particularly if there is a mapping to a local that does not share its
// (stack) home with the parameter local, in which case we will home it
// both into the parameter local's stack home (if it is used), but also to
// the mapping target.
for (unsigned lclNum = 0; lclNum < compiler->info.compArgsCount; lclNum++)
{
LclVarDsc* lclDsc = compiler->lvaGetDesc(lclNum);
Expand All @@ -3426,11 +3432,26 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed)
const ParameterRegisterLocalMapping* mapping =
compiler->FindParameterRegisterLocalMappingByRegister(segment.GetRegister());

bool spillToBaseLocal = true;
if (mapping != nullptr)
{
genSpillOrAddRegisterParam(mapping->LclNum, mapping->Offset, lclNum, segment, &graph);

// If home is shared with base local, then skip spilling to the
// base local.
if (lclDsc->lvPromoted)
{
spillToBaseLocal = false;
}
}
else

#ifdef TARGET_ARM
// For arm32 the spills to the base local happen as part of
// prespilling sometimes, so skip it in that case.
spillToBaseLocal &= (regSet.rsMaskPreSpillRegs(false) & segment.GetRegisterMask()) == 0;
#endif

if (spillToBaseLocal)
{
genSpillOrAddRegisterParam(lclNum, segment.Offset, lclNum, segment, &graph);
}
Expand Down Expand Up @@ -3915,7 +3936,7 @@ void CodeGen::genCheckUseBlockInit()
// must force spill R4/R5/R6 so that we can use them during
// zero-initialization process.
//
int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~regSet.rsMaskPreSpillRegs(false)) - 1;
int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~genPrespilledUnmappedRegs()) - 1;
if (forceSpillRegCount > 0)
regSet.rsSetRegsModified(RBM_R4);
if (forceSpillRegCount > 1)
Expand Down Expand Up @@ -5347,7 +5368,7 @@ void CodeGen::genFnProlog()
// These registers will be available to use for the initReg. We just remove
// all of these registers from the rsCalleeRegArgMaskLiveIn.
//
intRegState.rsCalleeRegArgMaskLiveIn &= ~regSet.rsMaskPreSpillRegs(false);
intRegState.rsCalleeRegArgMaskLiveIn &= ~genPrespilledUnmappedRegs();
#endif

/* Choose the register to use for zero initialization */
Expand Down Expand Up @@ -5751,6 +5772,10 @@ void CodeGen::genFnProlog()
#else
genEnregisterOSRArgsAndLocals();
#endif
// OSR functions take no parameters in registers. Ensure no mappings
// are present.
// assert((compiler->m_paramRegLocalMappings == nullptr) || compiler->m_paramRegLocalMappings->Empty());
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't recall why I commented this, will uncomment and make sure it doesn't trigger.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually let me do that in a follow-up. The problem is that LSRA is expecting these mappings to exist even for OSR functions, since it gets used to pick an initial preferred register. That doesn't really make sense for OSR functions, but changing that will come with diffs, so I don't think it should be done here.


compiler->lvaUpdateArgsWithInitialReg();
}
else
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10412,7 +10412,7 @@ JITDBGAPI void __cdecl dVN(ValueNum vn)
cVN(JitTls::GetCompiler(), vn);
}

JITDBGAPI void __cdecl dRegMask(regMaskTP mask)
JITDBGAPI void __cdecl dRegMask(const regMaskTP& mask)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function was not working for me for ARM64 because regMaskTP is a struct, and struct args are not supported in VS's debugger eval.

{
static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
printf("===================================================================== dRegMask %u\n", sequenceNumber++);
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -4202,7 +4202,7 @@ class Compiler

#ifdef DEBUG
void lvaDumpRegLocation(unsigned lclNum);
void lvaDumpFrameLocation(unsigned lclNum);
void lvaDumpFrameLocation(unsigned lclNum, int minLength);
void lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t refCntWtdWidth = 6);
void lvaTableDump(FrameLayoutState curState = NO_FRAME_LAYOUT); // NO_FRAME_LAYOUT means use the current frame
// layout state defined by lvaDoneFrameLayout
Expand Down
5 changes: 3 additions & 2 deletions src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4260,8 +4260,9 @@ inline void Compiler::CLR_API_Leave(API_ICorJitInfo_Names ename)
bool Compiler::fgVarIsNeverZeroInitializedInProlog(unsigned varNum)
{
LclVarDsc* varDsc = lvaGetDesc(varNum);
bool result = varDsc->lvIsParam || lvaIsOSRLocal(varNum) || (varNum == lvaGSSecurityCookie) ||
(varNum == lvaInlinedPInvokeFrameVar) || (varNum == lvaStubArgumentVar) || (varNum == lvaRetAddrVar);
bool result = varDsc->lvIsParam || varDsc->lvIsParamRegTarget || lvaIsOSRLocal(varNum) ||
(varNum == lvaGSSecurityCookie) || (varNum == lvaInlinedPInvokeFrameVar) ||
(varNum == lvaStubArgumentVar) || (varNum == lvaRetAddrVar);

#ifdef TARGET_ARM64
result = result || (varNum == lvaFfrRegister);
Expand Down
20 changes: 15 additions & 5 deletions src/coreclr/jit/lclvars.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4939,8 +4939,8 @@ void Compiler::lvaComputeRefCounts(bool isRecompute, bool setSlotNumbers)
// that was set by past phases.
if (!isRecompute)
{
varDsc->lvSingleDef = varDsc->lvIsParam;
varDsc->lvSingleDefRegCandidate = varDsc->lvIsParam;
varDsc->lvSingleDef = varDsc->lvIsParam || varDsc->lvIsParamRegTarget;
varDsc->lvSingleDefRegCandidate = varDsc->lvIsParam || varDsc->lvIsParamRegTarget;

varDsc->lvAllDefsAreNoGc = (varDsc->lvImplicitlyReferenced == false);
}
Expand Down Expand Up @@ -5033,6 +5033,11 @@ void Compiler::lvaComputeRefCounts(bool isRecompute, bool setSlotNumbers)
varDsc->incRefCnts(BB_UNITY_WEIGHT, this);
}
}
else if (varDsc->lvIsParamRegTarget && (varDsc->lvRefCnt() > 0))
{
varDsc->incRefCnts(BB_UNITY_WEIGHT, this);
varDsc->incRefCnts(BB_UNITY_WEIGHT, this);
}

// If we have JMP, all arguments must have a location
// even if we don't use them inside the method
Expand Down Expand Up @@ -7370,7 +7375,7 @@ void Compiler::lvaDumpRegLocation(unsigned lclNum)
* in its home location.
*/

void Compiler::lvaDumpFrameLocation(unsigned lclNum)
void Compiler::lvaDumpFrameLocation(unsigned lclNum, int minLength)
{
int offset;
regNumber baseReg;
Expand All @@ -7383,7 +7388,12 @@ void Compiler::lvaDumpFrameLocation(unsigned lclNum)
baseReg = EBPbased ? REG_FPBASE : REG_SPBASE;
#endif

printf("[%2s%1s0x%02X] ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset));
int printed =
printf("[%2s%1s0x%02X] ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset));
if ((printed >= 0) && (printed < minLength))
{
printf("%*s", minLength - printed, "");
}
}

/*****************************************************************************
Expand Down Expand Up @@ -7474,7 +7484,7 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r
// location. Otherwise, it's always on the stack.
if (lvaDoneFrameLayout != NO_FRAME_LAYOUT)
{
lvaDumpFrameLocation(lclNum);
lvaDumpFrameLocation(lclNum, (int)strlen("zero-ref "));
}
}
}
Expand Down
Loading
Loading