Skip to content

Commit

Permalink
Fix perf issues discovered in "For software performance, can you alwa…
Browse files Browse the repository at this point in the history
…ys trust inlining" blog (#61408)
  • Loading branch information
EgorBo authored Dec 7, 2021
1 parent 9872424 commit 0ddc132
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 20 deletions.
4 changes: 1 addition & 3 deletions src/coreclr/jit/fgbasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -894,12 +894,10 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
const bool isForceInline = (info.compFlags & CORINFO_FLG_FORCEINLINE) != 0;
const bool makeInlineObservations = (compInlineResult != nullptr);
const bool isInlining = compIsForInlining();
const bool isPreJit = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT);
const bool isTier1 = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1);
unsigned retBlocks = 0;
int prefixFlags = 0;
bool preciseScan = makeInlineObservations && compInlineResult->GetPolicy()->RequiresPreciseScan();
const bool resolveTokens = preciseScan && (isPreJit || isTier1);
const bool resolveTokens = preciseScan;

// Track offsets where IL instructions begin in DEBUG builds. Used to
// validate debug info generated by the JIT.
Expand Down
63 changes: 46 additions & 17 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2620,29 +2620,58 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp)
unsigned level = 0;
unsigned lvl2 = 0;

#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH)
if (multiOp->OperIs(GT_HWINTRINSIC) && (multiOp->GetOperandCount() == 1) &&
multiOp->AsHWIntrinsic()->OperIsMemoryLoadOrStore())
#if defined(FEATURE_HW_INTRINSICS)
if (multiOp->OperIs(GT_HWINTRINSIC))
{
costEx = IND_COST_EX;
costSz = 2;
GenTreeHWIntrinsic* hwTree = multiOp->AsHWIntrinsic();
#if defined(TARGET_XARCH)
if ((hwTree->GetOperandCount() == 1) && hwTree->OperIsMemoryLoadOrStore())
{
costEx = IND_COST_EX;
costSz = 2;

GenTree* addr = multiOp->Op(1)->gtEffectiveVal();
level = gtSetEvalOrder(addr);
GenTree* addr = hwTree->Op(1)->gtEffectiveVal();
level = gtSetEvalOrder(addr);

// See if we can form a complex addressing mode.
if (addr->OperIs(GT_ADD) && gtMarkAddrMode(addr, &costEx, &costSz, multiOp->TypeGet()))
{
// Nothing to do, costs have been set.
// See if we can form a complex addressing mode.
if (addr->OperIs(GT_ADD) && gtMarkAddrMode(addr, &costEx, &costSz, hwTree->TypeGet()))
{
// Nothing to do, costs have been set.
}
else
{
costEx += addr->GetCostEx();
costSz += addr->GetCostSz();
}

hwTree->SetCosts(costEx, costSz);
return level;
}
else
#endif
switch (hwTree->GetHWIntrinsicId())
{
costEx += addr->GetCostEx();
costSz += addr->GetCostSz();
#if defined(TARGET_XARCH)
case NI_Vector128_Create:
case NI_Vector256_Create:
#elif defined(TARGET_ARM64)
case NI_Vector64_Create:
case NI_Vector128_Create:
#endif
{
if ((hwTree->GetOperandCount() == 1) && hwTree->Op(1)->OperIsConst())
{
// Vector.Create(cns) is cheap but not that cheap to be (1,1)
costEx = IND_COST_EX;
costSz = 2;
level = gtSetEvalOrder(hwTree->Op(1));
hwTree->SetCosts(costEx, costSz);
return level;
}
break;
}
default:
break;
}

multiOp->SetCosts(costEx, costSz);
return level;
}
#endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS)

Expand Down

0 comments on commit 0ddc132

Please # to comment.