@@ -55,7 +55,7 @@ template <typename T, typename Functor>
55
55
void GenericCall (const Functor &ApplyToBytes) {
56
56
if (sizeof (T) >= sizeof (ShuffleChunkT)) {
57
57
#pragma unroll
58
- for (size_t Offset = 0 ; Offset < sizeof (T);
58
+ for (size_t Offset = 0 ; Offset + sizeof (ShuffleChunkT) <= sizeof (T);
59
59
Offset += sizeof (ShuffleChunkT)) {
60
60
ApplyToBytes (Offset, sizeof (ShuffleChunkT));
61
61
}
@@ -160,9 +160,9 @@ EnableIfGenericBroadcast<T, IdT> GroupBroadcast(T x, IdT local_id) {
160
160
char *ResultBytes = reinterpret_cast <char *>(&Result);
161
161
auto BroadcastBytes = [=](size_t Offset, size_t Size) {
162
162
uint64_t BroadcastX, BroadcastResult;
163
- detail ::memcpy (&BroadcastX, XBytes + Offset, Size);
163
+ std ::memcpy (&BroadcastX, XBytes + Offset, Size);
164
164
BroadcastResult = GroupBroadcast<Group>(BroadcastX, local_id);
165
- detail ::memcpy (ResultBytes + Offset, &BroadcastResult, Size);
165
+ std ::memcpy (ResultBytes + Offset, &BroadcastResult, Size);
166
166
};
167
167
GenericCall<T>(BroadcastBytes);
168
168
return Result;
@@ -213,9 +213,9 @@ EnableIfGenericBroadcast<T> GroupBroadcast(T x, id<Dimensions> local_id) {
213
213
char *ResultBytes = reinterpret_cast <char *>(&Result);
214
214
auto BroadcastBytes = [=](size_t Offset, size_t Size) {
215
215
uint64_t BroadcastX, BroadcastResult;
216
- detail ::memcpy (&BroadcastX, XBytes + Offset, Size);
216
+ std ::memcpy (&BroadcastX, XBytes + Offset, Size);
217
217
BroadcastResult = GroupBroadcast<Group>(BroadcastX, local_id);
218
- detail ::memcpy (ResultBytes + Offset, &BroadcastResult, Size);
218
+ std ::memcpy (ResultBytes + Offset, &BroadcastResult, Size);
219
219
};
220
220
GenericCall<T>(BroadcastBytes);
221
221
return Result;
@@ -697,9 +697,9 @@ EnableIfGenericShuffle<T> SubgroupShuffle(T x, id<1> local_id) {
697
697
char *ResultBytes = reinterpret_cast <char *>(&Result);
698
698
auto ShuffleBytes = [=](size_t Offset, size_t Size) {
699
699
ShuffleChunkT ShuffleX, ShuffleResult;
700
- detail ::memcpy (&ShuffleX, XBytes + Offset, Size);
700
+ std ::memcpy (&ShuffleX, XBytes + Offset, Size);
701
701
ShuffleResult = SubgroupShuffle (ShuffleX, local_id);
702
- detail ::memcpy (ResultBytes + Offset, &ShuffleResult, Size);
702
+ std ::memcpy (ResultBytes + Offset, &ShuffleResult, Size);
703
703
};
704
704
GenericCall<T>(ShuffleBytes);
705
705
return Result;
@@ -712,9 +712,9 @@ EnableIfGenericShuffle<T> SubgroupShuffleXor(T x, id<1> local_id) {
712
712
char *ResultBytes = reinterpret_cast <char *>(&Result);
713
713
auto ShuffleBytes = [=](size_t Offset, size_t Size) {
714
714
ShuffleChunkT ShuffleX, ShuffleResult;
715
- detail ::memcpy (&ShuffleX, XBytes + Offset, Size);
715
+ std ::memcpy (&ShuffleX, XBytes + Offset, Size);
716
716
ShuffleResult = SubgroupShuffleXor (ShuffleX, local_id);
717
- detail ::memcpy (ResultBytes + Offset, &ShuffleResult, Size);
717
+ std ::memcpy (ResultBytes + Offset, &ShuffleResult, Size);
718
718
};
719
719
GenericCall<T>(ShuffleBytes);
720
720
return Result;
@@ -727,9 +727,9 @@ EnableIfGenericShuffle<T> SubgroupShuffleDown(T x, id<1> local_id) {
727
727
char *ResultBytes = reinterpret_cast <char *>(&Result);
728
728
auto ShuffleBytes = [=](size_t Offset, size_t Size) {
729
729
ShuffleChunkT ShuffleX, ShuffleResult;
730
- detail ::memcpy (&ShuffleX, XBytes + Offset, Size);
730
+ std ::memcpy (&ShuffleX, XBytes + Offset, Size);
731
731
ShuffleResult = SubgroupShuffleDown (ShuffleX, local_id);
732
- detail ::memcpy (ResultBytes + Offset, &ShuffleResult, Size);
732
+ std ::memcpy (ResultBytes + Offset, &ShuffleResult, Size);
733
733
};
734
734
GenericCall<T>(ShuffleBytes);
735
735
return Result;
@@ -742,9 +742,9 @@ EnableIfGenericShuffle<T> SubgroupShuffleUp(T x, id<1> local_id) {
742
742
char *ResultBytes = reinterpret_cast <char *>(&Result);
743
743
auto ShuffleBytes = [=](size_t Offset, size_t Size) {
744
744
ShuffleChunkT ShuffleX, ShuffleResult;
745
- detail ::memcpy (&ShuffleX, XBytes + Offset, Size);
745
+ std ::memcpy (&ShuffleX, XBytes + Offset, Size);
746
746
ShuffleResult = SubgroupShuffleUp (ShuffleX, local_id);
747
- detail ::memcpy (ResultBytes + Offset, &ShuffleResult, Size);
747
+ std ::memcpy (ResultBytes + Offset, &ShuffleResult, Size);
748
748
};
749
749
GenericCall<T>(ShuffleBytes);
750
750
return Result;
0 commit comments