Skip to content

Commit af4dc96

Browse files
sushgokhkugan-nv
andcommitted
[OpenMP] Fix stack corruption due to argument mismatch
While lowering (#pragma omp target update from), clang's generated .omp_task_entry. is setting up 9 arguments while calling __tgt_target_data_update_nowait_mapper. At the same time, in __tgt_target_data_update_nowait_mapper, call to targetData<TaskAsyncInfoWrapperTy>() is converted to a sibcall assuming it has the argument count listed in the signature. AARCH64 asm sequence for this is as follows (removed unrelated insns): .omp_task_entry..108: sub sp, sp, #32 stp x29, x30, sp, #16 // 16-byte Folded Spill add x29, sp, #16 str x8, sp, #8. // stack canary str xzr, [sp] bl __tgt_target_data_update_nowait_mapper __tgt_target_data_update_nowait_mapper: sub sp, sp, #32 stp x29, x30, sp, #16 // 16-byte Folded Spill add x29, sp, #16 str x8, sp, #8 // stack canary // Sibcall argument setup adrp x8, :got:_Z16targetDataUpdateP7ident_tR8DeviceTyiPPvS4_PlS5_S4_S4_R11AsyncInfoTyb ldr x8, [x8, :got_lo12:_Z16targetDataUpdateP7ident_tR8DeviceTyiPPvS4_PlS5_S4_S4_R11AsyncInfoTyb] stp x9, x8, x29, #16 adrp x8, .L.str.8 add x8, x8, :lo12:.L.str.8 str x8, x29, #32. <==. This is the insn that erases $fp ldp x29, x30, sp, #16 // 16-byte Folded Reload add sp, sp, #32 // Sibcall b ZL10targetDataI22TaskAsyncInfoWrapperTyEvP7ident_tliPPvS4_PlS5_S4_S4_PFiS2_R8DeviceTyiS4_S4_S5_S5_S4_S4_R11AsyncInfoTybEPKcSD On AArch64, call to __tgt_target_data_update_nowait_mapper in .omp_task_entry. sets up only single space on stack and this results in ovewriting $fp and subsequent stack corruption. This issue can be credited to discrepancy of __tgt_target_data_update_nowait_mapper signature in openmp/libomptarget/include/omptarget.h taking 13 arguments while clang/lib/CodeGen/CGOpenMPRuntime.cpp and llvm/include/llvm/Frontend/OpenMP/OMPKinds.def taking only 9 arguments. This patch modifies __tgt_target_data_update_nowait_mapper signature to match .omp_task_entry usage(and other 2 files mentioned above). Co-authored-by: Kugan Vivekanandarajah <kvivekananda@nvidia.com>
1 parent 4145ad2 commit af4dc96

File tree

2 files changed

+44
-14
lines changed

2 files changed

+44
-14
lines changed

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10343,6 +10343,23 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
1034310343
MapNamesArray,
1034410344
InputInfo.MappersArray.emitRawPointer(CGF)};
1034510345

10346+
// Nowait calls have header declarations that take 13 arguments. Hence, the
10347+
// divergence from the OffloadingArgs definition.
10348+
llvm::Value *NowaitOffloadingArgs[] = {
10349+
RTLoc,
10350+
DeviceID,
10351+
PointerNum,
10352+
InputInfo.BasePointersArray.emitRawPointer(CGF),
10353+
InputInfo.PointersArray.emitRawPointer(CGF),
10354+
InputInfo.SizesArray.emitRawPointer(CGF),
10355+
MapTypesArray,
10356+
MapNamesArray,
10357+
InputInfo.MappersArray.emitRawPointer(CGF),
10358+
llvm::Constant::getNullValue(CGF.Int32Ty),
10359+
llvm::Constant::getNullValue(CGF.VoidPtrTy),
10360+
llvm::Constant::getNullValue(CGF.Int32Ty),
10361+
llvm::Constant::getNullValue(CGF.VoidPtrTy)};
10362+
1034610363
// Select the right runtime function call for each standalone
1034710364
// directive.
1034810365
const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
@@ -10430,9 +10447,14 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
1043010447
llvm_unreachable("Unexpected standalone target data directive.");
1043110448
break;
1043210449
}
10433-
CGF.EmitRuntimeCall(
10434-
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10435-
OffloadingArgs);
10450+
if (HasNowait)
10451+
CGF.EmitRuntimeCall(
10452+
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10453+
NowaitOffloadingArgs);
10454+
else
10455+
CGF.EmitRuntimeCall(
10456+
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10457+
OffloadingArgs);
1043610458
};
1043710459

1043810460
auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,

llvm/include/llvm/Frontend/OpenMP/OMPKinds.def

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -438,19 +438,22 @@ __OMP_RTL(__tgt_target_kernel_nowait, false, Int32, IdentPtr, Int64, Int32,
438438
Int32, VoidPtr, KernelArgsPtr, Int32, VoidPtr, Int32, VoidPtr)
439439
__OMP_RTL(__tgt_target_data_begin_mapper, false, Void, IdentPtr, Int64, Int32, VoidPtrPtr,
440440
VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
441-
__OMP_RTL(__tgt_target_data_begin_nowait_mapper, false, Void, IdentPtr, Int64, Int32,
442-
VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
441+
__OMP_RTL(__tgt_target_data_begin_nowait_mapper, false, Void, IdentPtr, Int64,
442+
Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr,
443+
VoidPtrPtr, Int32, VoidPtr, Int32, VoidPtr)
443444
__OMP_RTL(__tgt_target_data_begin_mapper_issue, false, Void, IdentPtr, Int64, Int32,
444445
VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr, AsyncInfoPtr)
445446
__OMP_RTL(__tgt_target_data_begin_mapper_wait, false, Void, Int64, AsyncInfoPtr)
446447
__OMP_RTL(__tgt_target_data_end_mapper, false, Void, IdentPtr, Int64, Int32, VoidPtrPtr,
447448
VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
448-
__OMP_RTL(__tgt_target_data_end_nowait_mapper, false, Void, IdentPtr, Int64, Int32,
449-
VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
449+
__OMP_RTL(__tgt_target_data_end_nowait_mapper, false, Void, IdentPtr, Int64,
450+
Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr,
451+
VoidPtrPtr, Int32, VoidPtr, Int32, VoidPtr)
450452
__OMP_RTL(__tgt_target_data_update_mapper, false, Void, IdentPtr, Int64, Int32,
451453
VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
452-
__OMP_RTL(__tgt_target_data_update_nowait_mapper, false, Void, IdentPtr, Int64, Int32,
453-
VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr)
454+
__OMP_RTL(__tgt_target_data_update_nowait_mapper, false, Void, IdentPtr, Int64,
455+
Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr,
456+
VoidPtrPtr, Int32, VoidPtr, Int32, VoidPtr)
454457
__OMP_RTL(__tgt_mapper_num_components, false, Int64, VoidPtr)
455458
__OMP_RTL(__tgt_push_mapper_component, false, Void, VoidPtr, VoidPtr, VoidPtr,
456459
Int64, Int64, VoidPtr)
@@ -1026,10 +1029,12 @@ __OMP_RTL_ATTRS(__tgt_target_kernel_nowait, ForkAttrs, SExt,
10261029
SExt))
10271030
__OMP_RTL_ATTRS(__tgt_target_data_begin_mapper, ForkAttrs, AttributeSet(),
10281031
ParamAttrs(AttributeSet(), AttributeSet(), SExt))
1029-
__OMP_RTL_ATTRS(__tgt_target_data_begin_nowait_mapper, ForkAttrs, AttributeSet(),
1032+
__OMP_RTL_ATTRS(__tgt_target_data_begin_nowait_mapper, ForkAttrs,
1033+
AttributeSet(),
10301034
ParamAttrs(AttributeSet(), AttributeSet(), SExt, AttributeSet(),
10311035
AttributeSet(), AttributeSet(), AttributeSet(),
1032-
AttributeSet(), AttributeSet()))
1036+
AttributeSet(), AttributeSet(), SExt, AttributeSet(),
1037+
SExt, AttributeSet()))
10331038
__OMP_RTL_ATTRS(__tgt_target_data_begin_mapper_issue, AttributeSet(),
10341039
AttributeSet(),
10351040
ParamAttrs(AttributeSet(), AttributeSet(), SExt))
@@ -1038,13 +1043,16 @@ __OMP_RTL_ATTRS(__tgt_target_data_end_mapper, ForkAttrs, AttributeSet(),
10381043
__OMP_RTL_ATTRS(__tgt_target_data_end_nowait_mapper, ForkAttrs, AttributeSet(),
10391044
ParamAttrs(AttributeSet(), AttributeSet(), SExt, AttributeSet(),
10401045
AttributeSet(), AttributeSet(), AttributeSet(),
1041-
AttributeSet(), AttributeSet()))
1046+
AttributeSet(), AttributeSet(), SExt, AttributeSet(),
1047+
SExt, AttributeSet()))
10421048
__OMP_RTL_ATTRS(__tgt_target_data_update_mapper, ForkAttrs, AttributeSet(),
10431049
ParamAttrs(AttributeSet(), AttributeSet(), SExt))
1044-
__OMP_RTL_ATTRS(__tgt_target_data_update_nowait_mapper, ForkAttrs, AttributeSet(),
1050+
__OMP_RTL_ATTRS(__tgt_target_data_update_nowait_mapper, ForkAttrs,
1051+
AttributeSet(),
10451052
ParamAttrs(AttributeSet(), AttributeSet(), SExt, AttributeSet(),
10461053
AttributeSet(), AttributeSet(), AttributeSet(),
1047-
AttributeSet(), AttributeSet()))
1054+
AttributeSet(), AttributeSet(), SExt, AttributeSet(),
1055+
SExt, AttributeSet()))
10481056
__OMP_RTL_ATTRS(__tgt_mapper_num_components, ForkAttrs, AttributeSet(),
10491057
ParamAttrs())
10501058
__OMP_RTL_ATTRS(__tgt_push_mapper_component, ForkAttrs, AttributeSet(),

0 commit comments

Comments
 (0)