From ed02536b4bbde1b455d577c23c0f37f8d15423c4 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 25 May 2021 13:20:14 -0400 Subject: [PATCH 1/2] [SYCL] Fix upper bound in GenericCall Previous upper bound considered only the offset, allowing a memcpy for the final chunk to walk off the end of the byte array. Signed-off-by: John Pennycook --- sycl/include/CL/sycl/detail/spirv.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/CL/sycl/detail/spirv.hpp b/sycl/include/CL/sycl/detail/spirv.hpp index cc89053783485..69e07b6540d52 100644 --- a/sycl/include/CL/sycl/detail/spirv.hpp +++ b/sycl/include/CL/sycl/detail/spirv.hpp @@ -48,7 +48,7 @@ template void GenericCall(const Functor &ApplyToBytes) { if (sizeof(T) >= sizeof(ShuffleChunkT)) { #pragma unroll - for (size_t Offset = 0; Offset < sizeof(T); + for (size_t Offset = 0; Offset + sizeof(ShuffleChunkT) <= sizeof(T); Offset += sizeof(ShuffleChunkT)) { ApplyToBytes(Offset, sizeof(ShuffleChunkT)); } From 54eace07a654b41d51fd359777e43d0d909f44ad Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Tue, 25 May 2021 13:25:56 -0400 Subject: [PATCH 2/2] [SYCL] Replace detail::memcpy with std::memcpy sycl::detail::memcpy is implemented as a loop, resulting in different optimizations than std::memcpy. Signed-off-by: John Pennycook --- sycl/include/CL/sycl/detail/spirv.hpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/sycl/include/CL/sycl/detail/spirv.hpp b/sycl/include/CL/sycl/detail/spirv.hpp index 69e07b6540d52..ec1a5cb184fa5 100644 --- a/sycl/include/CL/sycl/detail/spirv.hpp +++ b/sycl/include/CL/sycl/detail/spirv.hpp @@ -153,9 +153,9 @@ EnableIfGenericBroadcast GroupBroadcast(T x, IdT local_id) { char *ResultBytes = reinterpret_cast(&Result); auto BroadcastBytes = [=](size_t Offset, size_t Size) { uint64_t BroadcastX, BroadcastResult; - detail::memcpy(&BroadcastX, XBytes + Offset, Size); + std::memcpy(&BroadcastX, XBytes + Offset, Size); BroadcastResult = GroupBroadcast(BroadcastX, local_id); - detail::memcpy(ResultBytes + Offset, &BroadcastResult, Size); + std::memcpy(ResultBytes + Offset, &BroadcastResult, Size); }; GenericCall(BroadcastBytes); return Result; @@ -206,9 +206,9 @@ EnableIfGenericBroadcast GroupBroadcast(T x, id local_id) { char *ResultBytes = reinterpret_cast(&Result); auto BroadcastBytes = [=](size_t Offset, size_t Size) { uint64_t BroadcastX, BroadcastResult; - detail::memcpy(&BroadcastX, XBytes + Offset, Size); + std::memcpy(&BroadcastX, XBytes + Offset, Size); BroadcastResult = GroupBroadcast(BroadcastX, local_id); - detail::memcpy(ResultBytes + Offset, &BroadcastResult, Size); + std::memcpy(ResultBytes + Offset, &BroadcastResult, Size); }; GenericCall(BroadcastBytes); return Result; @@ -682,9 +682,9 @@ EnableIfGenericShuffle SubgroupShuffle(T x, id<1> local_id) { char *ResultBytes = reinterpret_cast(&Result); auto ShuffleBytes = [=](size_t Offset, size_t Size) { ShuffleChunkT ShuffleX, ShuffleResult; - detail::memcpy(&ShuffleX, XBytes + Offset, Size); + std::memcpy(&ShuffleX, XBytes + Offset, Size); ShuffleResult = SubgroupShuffle(ShuffleX, local_id); - detail::memcpy(ResultBytes + Offset, &ShuffleResult, Size); + std::memcpy(ResultBytes + Offset, &ShuffleResult, Size); }; GenericCall(ShuffleBytes); return Result; @@ -697,9 +697,9 @@ EnableIfGenericShuffle SubgroupShuffleXor(T x, id<1> local_id) { char *ResultBytes = reinterpret_cast(&Result); auto ShuffleBytes = [=](size_t Offset, size_t Size) { ShuffleChunkT ShuffleX, ShuffleResult; - detail::memcpy(&ShuffleX, XBytes + Offset, Size); + std::memcpy(&ShuffleX, XBytes + Offset, Size); ShuffleResult = SubgroupShuffleXor(ShuffleX, local_id); - detail::memcpy(ResultBytes + Offset, &ShuffleResult, Size); + std::memcpy(ResultBytes + Offset, &ShuffleResult, Size); }; GenericCall(ShuffleBytes); return Result; @@ -712,9 +712,9 @@ EnableIfGenericShuffle SubgroupShuffleDown(T x, id<1> local_id) { char *ResultBytes = reinterpret_cast(&Result); auto ShuffleBytes = [=](size_t Offset, size_t Size) { ShuffleChunkT ShuffleX, ShuffleResult; - detail::memcpy(&ShuffleX, XBytes + Offset, Size); + std::memcpy(&ShuffleX, XBytes + Offset, Size); ShuffleResult = SubgroupShuffleDown(ShuffleX, local_id); - detail::memcpy(ResultBytes + Offset, &ShuffleResult, Size); + std::memcpy(ResultBytes + Offset, &ShuffleResult, Size); }; GenericCall(ShuffleBytes); return Result; @@ -727,9 +727,9 @@ EnableIfGenericShuffle SubgroupShuffleUp(T x, id<1> local_id) { char *ResultBytes = reinterpret_cast(&Result); auto ShuffleBytes = [=](size_t Offset, size_t Size) { ShuffleChunkT ShuffleX, ShuffleResult; - detail::memcpy(&ShuffleX, XBytes + Offset, Size); + std::memcpy(&ShuffleX, XBytes + Offset, Size); ShuffleResult = SubgroupShuffleUp(ShuffleX, local_id); - detail::memcpy(ResultBytes + Offset, &ShuffleResult, Size); + std::memcpy(ResultBytes + Offset, &ShuffleResult, Size); }; GenericCall(ShuffleBytes); return Result;