Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Disable host device for macros for SYCL/DPC++ #2969

Merged
merged 2 commits into from
Oct 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 107 additions & 0 deletions Src/Base/AMReX_GpuLaunch.H
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,8 @@ namespace Gpu {

#ifdef AMREX_USE_GPU

#ifndef AMREX_USE_DPCPP

#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \
{ using amrex_i_inttype = typename std::remove_const<decltype(n)>::type; \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
Expand Down Expand Up @@ -366,6 +368,111 @@ namespace Gpu {
block3; \
}

#else
// xxxxx DPCPP todo: host disabled in host device

#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \
{ using amrex_i_inttype = typename std::remove_const<decltype(n)>::type; \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
block \
); \
} \
else { \
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
}}

#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
block \
); \
} \
else { \
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
}

#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
block \
); \
} \
else { \
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
}

#define AMREX_HOST_DEVICE_FOR_1D_FLAG(where_to_run,n,i,block) \
{ using amrex_i_inttype = typename std::remove_const<decltype(n)>::type; \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (amrex_i_inttype i) noexcept \
block \
); \
} \
else { \
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
}}

#define AMREX_HOST_DEVICE_FOR_3D_FLAG(where_to_run,box,i,j,k,block) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
amrex::ParallelFor(box, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept \
block \
); \
} \
else { \
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
}

#define AMREX_HOST_DEVICE_FOR_4D_FLAG(where_to_run,box,nc,i,j,k,n,block) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
amrex::ParallelFor(box, nc, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept \
block \
); \
} \
else { \
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
}

#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(where_to_run,box,tbox,block) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
AMREX_LAUNCH_DEVICE_LAMBDA(box,tbox,block); \
} else { \
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
}

#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_FLAG(where_to_run,bx1,tbx1,block1) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1); \
} else { \
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
}

#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2); \
} else { \
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
}

#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(where_to_run,bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3) \
if ((where_to_run == RunOn::Device) && (Gpu::inLaunchRegion())) \
{ \
AMREX_LAUNCH_DEVICE_LAMBDA(bx1,tbx1,block1,bx2,tbx2,block2,bx3,tbx3,block3); \
} else { \
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile"); \
}

#endif

#else

#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D_FLAG(where_to_run,n,i,block) \
Expand Down
52 changes: 52 additions & 0 deletions Src/Base/AMReX_GpuLaunchFunctsG.H
Original file line number Diff line number Diff line change
Expand Up @@ -1629,8 +1629,12 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept
if (Gpu::inLaunchRegion()) {
ParallelFor<AMREX_GPU_MAX_THREADS>(info,n,std::forward<L>(f));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
AMREX_PRAGMA_SIMD
for (T i = 0; i < n; ++i) f(i);
#endif
}
}

Expand All @@ -1641,8 +1645,12 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept
if (Gpu::inLaunchRegion()) {
ParallelFor<MT>(info,n,std::forward<L>(f));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
AMREX_PRAGMA_SIMD
for (T i = 0; i < n; ++i) f(i);
#endif
}
}

Expand All @@ -1667,7 +1675,11 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexc
if (Gpu::inLaunchRegion()) {
ParallelFor<AMREX_GPU_MAX_THREADS>(info, box,std::forward<L>(f));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
LoopConcurrentOnCpu(box,std::forward<L>(f));
#endif
}
}

Expand All @@ -1678,7 +1690,11 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexc
if (Gpu::inLaunchRegion()) {
ParallelFor<MT>(info, box,std::forward<L>(f));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
LoopConcurrentOnCpu(box,std::forward<L>(f));
#endif
}
}

Expand All @@ -1689,7 +1705,11 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&&
if (Gpu::inLaunchRegion()) {
ParallelFor<AMREX_GPU_MAX_THREADS>(info, box,ncomp,std::forward<L>(f));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
LoopConcurrentOnCpu(box,ncomp,std::forward<L>(f));
#endif
}
}

Expand All @@ -1700,7 +1720,11 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&&
if (Gpu::inLaunchRegion()) {
ParallelFor<MT>(info, box,ncomp,std::forward<L>(f));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
LoopConcurrentOnCpu(box,ncomp,std::forward<L>(f));
#endif
}
}

Expand All @@ -1712,8 +1736,12 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info,
if (Gpu::inLaunchRegion()) {
ParallelFor<AMREX_GPU_MAX_THREADS>(info,box1,box2,std::forward<L1>(f1),std::forward<L2>(f2));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
LoopConcurrentOnCpu(box1,std::forward<L1>(f1));
LoopConcurrentOnCpu(box2,std::forward<L2>(f2));
#endif
}
}

Expand All @@ -1725,8 +1753,12 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info,
if (Gpu::inLaunchRegion()) {
ParallelFor<MT>(info,box1,box2,std::forward<L1>(f1),std::forward<L2>(f2));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
LoopConcurrentOnCpu(box1,std::forward<L1>(f1));
LoopConcurrentOnCpu(box2,std::forward<L2>(f2));
#endif
}
}

Expand All @@ -1740,9 +1772,13 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info,
ParallelFor<MT>(info,box1,box2,box3,
std::forward<L1>(f1),std::forward<L2>(f2),std::forward<L3>(f3));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
LoopConcurrentOnCpu(box1,std::forward<L1>(f1));
LoopConcurrentOnCpu(box2,std::forward<L2>(f2));
LoopConcurrentOnCpu(box3,std::forward<L3>(f3));
#endif
}
}

Expand All @@ -1757,8 +1793,12 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info,
if (Gpu::inLaunchRegion()) {
ParallelFor<AMREX_GPU_MAX_THREADS>(info,box1,ncomp1,std::forward<L1>(f1),box2,ncomp2,std::forward<L2>(f2));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
LoopConcurrentOnCpu(box1,ncomp1,std::forward<L1>(f1));
LoopConcurrentOnCpu(box2,ncomp2,std::forward<L2>(f2));
#endif
}
}

Expand All @@ -1773,8 +1813,12 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info,
if (Gpu::inLaunchRegion()) {
ParallelFor<MT>(info,box1,ncomp1,std::forward<L1>(f1),box2,ncomp2,std::forward<L2>(f2));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
LoopConcurrentOnCpu(box1,ncomp1,std::forward<L1>(f1));
LoopConcurrentOnCpu(box2,ncomp2,std::forward<L2>(f2));
#endif
}
}

Expand All @@ -1794,9 +1838,13 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info,
box2,ncomp2,std::forward<L2>(f2),
box3,ncomp3,std::forward<L3>(f3));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
LoopConcurrentOnCpu(box1,ncomp1,std::forward<L1>(f1));
LoopConcurrentOnCpu(box2,ncomp2,std::forward<L2>(f2));
LoopConcurrentOnCpu(box3,ncomp3,std::forward<L3>(f3));
#endif
}
}

Expand All @@ -1816,9 +1864,13 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info,
box2,ncomp2,std::forward<L2>(f2),
box3,ncomp3,std::forward<L3>(f3));
} else {
#ifdef AMREX_USE_DPCPP
amrex::Abort("amrex:: HOST_DEVICE disabled for Intel. It takes too long to compile");
#else
LoopConcurrentOnCpu(box1,ncomp1,std::forward<L1>(f1));
LoopConcurrentOnCpu(box2,ncomp2,std::forward<L2>(f2));
LoopConcurrentOnCpu(box3,ncomp3,std::forward<L3>(f3));
#endif
}
}

Expand Down
Loading