Skip to content

Commit 1669350

Browse files
dkolsen-pgibrycelelbach
authored andcommitted
Changes necessary to support Feta.
Reviewed-by: Bryce Adelstein Lelbach aka wash <brycelelbach@gmail.com> Bug 2839527
1 parent 3cfcc0c commit 1669350

30 files changed

+364
-278
lines changed

dependencies/cub

Submodule cub updated from 629f01e to 35e4f69

examples/dot_products_with_zip.cu

+18-18
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
#include <thrust/random.h>
77

88

9-
// This example shows how thrust::zip_iterator can be used to create a
10-
// 'virtual' array of structures. In this case the structure is a 3d
11-
// vector type (Float3) whose (x,y,z) components will be stored in
9+
// This example shows how thrust::zip_iterator can be used to create a
10+
// 'virtual' array of structures. In this case the structure is a 3d
11+
// vector type (Float3) whose (x,y,z) components will be stored in
1212
// three separate float arrays. The zip_iterator "zips" these arrays
1313
// into a single virtual Float3 array.
1414

@@ -54,17 +54,17 @@ int main(void)
5454
// We'll store the components of the 3d vectors in separate arrays. One set of
5555
// arrays will store the 'A' vectors and another set will store the 'B' vectors.
5656

57-
// This 'structure of arrays' (SoA) approach is usually more efficient than the
57+
// This 'structure of arrays' (SoA) approach is usually more efficient than the
5858
// 'array of structures' (AoS) approach. The primary reason is that structures,
5959
// like Float3, don't always obey the memory coalescing rules, so they are not
6060
// efficiently transferred to and from memory. Another reason to prefer SoA to
6161
// AoS is that we don't aways want to process all members of the structure. For
62-
// example, if we only need to look at first element of the structure then it
62+
// example, if we only need to look at first element of the structure then it
6363
// is wasteful to load the entire structure from memory. With the SoA approach,
6464
// we can chose which elements of the structure we wish to read.
6565

6666
thrust::device_vector<float> A0 = random_vector(N); // x components of the 'A' vectors
67-
thrust::device_vector<float> A1 = random_vector(N); // y components of the 'A' vectors
67+
thrust::device_vector<float> A1 = random_vector(N); // y components of the 'A' vectors
6868
thrust::device_vector<float> A2 = random_vector(N); // z components of the 'A' vectors
6969

7070
thrust::device_vector<float> B0 = random_vector(N); // x components of the 'B' vectors
@@ -78,7 +78,7 @@ int main(void)
7878
// We'll now illustrate two ways to use zip_iterator to compute the dot
7979
// products. The first method is verbose but shows how the parts fit together.
8080
// The second method hides these details and is more concise.
81-
81+
8282

8383
// METHOD #1
8484
// Defining a zip_iterator type can be a little cumbersome ...
@@ -87,24 +87,24 @@ int main(void)
8787
typedef thrust::zip_iterator<FloatIteratorTuple> Float3Iterator;
8888

8989
// Now we'll create some zip_iterators for A and B
90-
Float3Iterator A_first = thrust::make_zip_iterator(make_tuple(A0.begin(), A1.begin(), A2.begin()));
91-
Float3Iterator A_last = thrust::make_zip_iterator(make_tuple(A0.end(), A1.end(), A2.end()));
92-
Float3Iterator B_first = thrust::make_zip_iterator(make_tuple(B0.begin(), B1.begin(), B2.begin()));
93-
90+
Float3Iterator A_first = thrust::make_zip_iterator(thrust::make_tuple(A0.begin(), A1.begin(), A2.begin()));
91+
Float3Iterator A_last = thrust::make_zip_iterator(thrust::make_tuple(A0.end(), A1.end(), A2.end()));
92+
Float3Iterator B_first = thrust::make_zip_iterator(thrust::make_tuple(B0.begin(), B1.begin(), B2.begin()));
93+
9494
// Finally, we pass the zip_iterators into transform() as if they
9595
// were 'normal' iterators for a device_vector<Float3>.
9696
thrust::transform(A_first, A_last, B_first, result.begin(), DotProduct());
9797

9898

9999
// METHOD #2
100-
// Alternatively, we can avoid creating variables for X_first, X_last,
100+
// Alternatively, we can avoid creating variables for X_first, X_last,
101101
// and Y_first and invoke transform() directly.
102-
thrust::transform( thrust::make_zip_iterator(make_tuple(A0.begin(), A1.begin(), A2.begin())),
103-
thrust::make_zip_iterator(make_tuple(A0.end(), A1.end(), A2.end())),
104-
thrust::make_zip_iterator(make_tuple(B0.begin(), B1.begin(), B2.begin())),
102+
thrust::transform( thrust::make_zip_iterator(thrust::make_tuple(A0.begin(), A1.begin(), A2.begin())),
103+
thrust::make_zip_iterator(thrust::make_tuple(A0.end(), A1.end(), A2.end())),
104+
thrust::make_zip_iterator(thrust::make_tuple(B0.begin(), B1.begin(), B2.begin())),
105105
result.begin(),
106106
DotProduct() );
107-
107+
108108

109109

110110
// Finally, we'll print a few results
@@ -126,8 +126,8 @@ int main(void)
126126
std::cout << "(" << thrust::get<0>(b) << "," << thrust::get<1>(b) << "," << thrust::get<2>(b) << ")";
127127
std::cout << " = ";
128128
std::cout << dot << std::endl;
129-
}
129+
}
130130

131131
return 0;
132132
}
133-
133+

thrust/detail/allocator/temporary_allocator.inl

+11-6
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020
#include <thrust/system/detail/bad_alloc.h>
2121
#include <cassert>
2222

23-
#if defined(__CUDA_ARCH__) && THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
23+
#if (defined(__NVCOMPILER_CUDA__) || defined(__CUDA_ARCH__)) && \
24+
THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
2425
#include <thrust/system/cuda/detail/terminate.h>
2526
#endif
2627

@@ -45,11 +46,15 @@ __host__ __device__
4546
// note that we pass cnt to deallocate, not a value derived from result.second
4647
deallocate(result.first, cnt);
4748

48-
#if !defined(__CUDA_ARCH__)
49-
throw thrust::system::detail::bad_alloc("temporary_buffer::allocate: get_temporary_buffer failed");
50-
#elif THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
51-
thrust::system::cuda::detail::terminate_with_message("temporary_buffer::allocate: get_temporary_buffer failed");
52-
#endif
49+
if (THRUST_IS_HOST_CODE) {
50+
#if THRUST_INCLUDE_HOST_CODE
51+
throw thrust::system::detail::bad_alloc("temporary_buffer::allocate: get_temporary_buffer failed");
52+
#endif
53+
} else {
54+
#if THRUST_INCLUDE_DEVICE_CODE
55+
thrust::system::cuda::detail::terminate_with_message("temporary_buffer::allocate: get_temporary_buffer failed");
56+
#endif
57+
}
5358
} // end if
5459

5560
return result.first;

thrust/detail/config/cpp_compatibility.h

+19-2
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,13 @@
4949
// FIXME: Combine THRUST_INLINE_CONSTANT and
5050
// THRUST_INLINE_INTEGRAL_MEMBER_CONSTANT into one macro when NVCC properly
5151
// supports `constexpr` globals in host and device code.
52-
#ifdef __CUDA_ARCH__
52+
#if defined(__CUDA_ARCH__) || defined(__NVCOMPILER_CUDA__)
5353
// FIXME: Add this when NVCC supports inline variables.
5454
//# if THRUST_CPP_DIALECT >= 2017
5555
//# define THRUST_INLINE_CONSTANT inline constexpr
5656
//# define THRUST_INLINE_INTEGRAL_MEMBER_CONSTANT inline constexpr
5757
# if THRUST_CPP_DIALECT >= 2011
58-
# define THRUST_INLINE_CONSTANT static constexpr
58+
# define THRUST_INLINE_CONSTANT static const __device__
5959
# define THRUST_INLINE_INTEGRAL_MEMBER_CONSTANT static constexpr
6060
# else
6161
# define THRUST_INLINE_CONSTANT static const __device__
@@ -75,3 +75,20 @@
7575
# endif
7676
#endif
7777

78+
#if defined(__NVCOMPILER_CUDA__)
79+
# define THRUST_IS_DEVICE_CODE __builtin_is_device_code()
80+
# define THRUST_IS_HOST_CODE (!__builtin_is_device_code())
81+
# define THRUST_INCLUDE_DEVICE_CODE 1
82+
# define THRUST_INCLUDE_HOST_CODE 1
83+
#elif defined(__CUDA_ARCH__)
84+
# define THRUST_IS_DEVICE_CODE 1
85+
# define THRUST_IS_HOST_CODE 0
86+
# define THRUST_INCLUDE_DEVICE_CODE 1
87+
# define THRUST_INCLUDE_HOST_CODE 0
88+
#else
89+
# define THRUST_IS_DEVICE_CODE 0
90+
# define THRUST_IS_HOST_CODE 1
91+
# define THRUST_INCLUDE_DEVICE_CODE 0
92+
# define THRUST_INCLUDE_HOST_CODE 1
93+
#endif
94+

thrust/detail/config/exec_check_disable.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,11 @@
2222

2323
#include <thrust/detail/config.h>
2424

25-
#if defined(__CUDACC__) && !(defined(__CUDA__) && defined(__clang__))
25+
// #pragma nv_exec_check_disable is only recognized by NVCC. Having a macro
26+
// expand to a #pragma (rather than _Pragma) only works with NVCC's compilation
27+
// model, not with other compilers.
28+
#if defined(__CUDACC__) && !defined(__NVCOMPILER_CUDA__) && \
29+
!(defined(__CUDA__) && defined(__clang__))
2630

2731
#define __thrust_exec_check_disable__ #pragma nv_exec_check_disable
2832

thrust/detail/contiguous_storage.inl

+12-8
Original file line numberDiff line numberDiff line change
@@ -430,15 +430,19 @@ __host__ __device__
430430
void contiguous_storage<T,Alloc>
431431
::swap_allocators(false_type, Alloc &other)
432432
{
433-
#ifdef __CUDA_ARCH__
434-
// allocators must be equal when swapping containers with allocators that propagate on swap
435-
assert(!is_allocator_not_equal(other));
436-
#else
437-
if (is_allocator_not_equal(other))
438-
{
439-
throw allocator_mismatch_on_swap();
433+
if (THRUST_IS_DEVICE_CODE) {
434+
#if THRUST_INCLUDE_DEVICE_CODE
435+
// allocators must be equal when swapping containers with allocators that propagate on swap
436+
assert(!is_allocator_not_equal(other));
437+
#endif
438+
} else {
439+
#if THRUST_INCLUDE_HOST_CODE
440+
if (is_allocator_not_equal(other))
441+
{
442+
throw allocator_mismatch_on_swap();
443+
}
444+
#endif
440445
}
441-
#endif
442446
thrust::swap(m_allocator, other);
443447
} // end contiguous_storage::swap_allocators()
444448

thrust/detail/functional/actor.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ template<typename Eval>
5252
typedef Eval eval_type;
5353

5454
__host__ __device__
55-
actor(void);
55+
THRUST_CONSTEXPR actor();
5656

5757
__host__ __device__
5858
actor(const Eval &base);

thrust/detail/functional/actor.inl

+2-2
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ namespace functional
3838

3939
template<typename Eval>
4040
__host__ __device__
41-
actor<Eval>
42-
::actor(void)
41+
THRUST_CONSTEXPR actor<Eval>
42+
::actor()
4343
: eval_type()
4444
{}
4545

thrust/detail/functional/argument.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ template<unsigned int i>
5959
};
6060

6161
__host__ __device__
62-
argument(void){}
62+
THRUST_CONSTEXPR argument(){}
6363

6464
template<typename Env>
6565
__host__ __device__

thrust/detail/integer_math.h

+20-15
Original file line numberDiff line numberDiff line change
@@ -32,22 +32,27 @@ template <typename Integer>
3232
__host__ __device__ __thrust_forceinline__
3333
Integer clz(Integer x)
3434
{
35-
#if __CUDA_ARCH__
36-
return ::__clz(x);
37-
#else
38-
int num_bits = 8 * sizeof(Integer);
39-
int num_bits_minus_one = num_bits - 1;
40-
41-
for (int i = num_bits_minus_one; i >= 0; --i)
42-
{
43-
if ((Integer(1) << i) & x)
44-
{
45-
return num_bits_minus_one - i;
46-
}
35+
Integer result;
36+
if (THRUST_IS_DEVICE_CODE) {
37+
#if THRUST_INCLUDE_DEVICE_CODE
38+
result = ::__clz(x);
39+
#endif
40+
} else {
41+
#if THRUST_INCLUDE_HOST_CODE
42+
int num_bits = 8 * sizeof(Integer);
43+
int num_bits_minus_one = num_bits - 1;
44+
result = num_bits;
45+
for (int i = num_bits_minus_one; i >= 0; --i)
46+
{
47+
if ((Integer(1) << i) & x)
48+
{
49+
result = num_bits_minus_one - i;
50+
break;
51+
}
52+
}
53+
#endif
4754
}
48-
49-
return num_bits;
50-
#endif
55+
return result;
5156
}
5257

5358
template <typename Integer>

thrust/detail/seq.h

+2-6
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ struct seq_t : thrust::system::detail::sequential::execution_policy<seq_t>,
3131
thrust::system::detail::sequential::execution_policy>
3232
{
3333
__host__ __device__
34-
seq_t() : thrust::system::detail::sequential::execution_policy<seq_t>() {}
34+
THRUST_CONSTEXPR seq_t() : thrust::system::detail::sequential::execution_policy<seq_t>() {}
3535

3636
// allow any execution_policy to convert to seq_t
3737
template<typename DerivedPolicy>
@@ -45,11 +45,7 @@ struct seq_t : thrust::system::detail::sequential::execution_policy<seq_t>,
4545
} // end detail
4646

4747

48-
#ifdef __CUDA_ARCH__
49-
static const __device__ detail::seq_t seq;
50-
#else
51-
static const detail::seq_t seq;
52-
#endif
48+
THRUST_INLINE_CONSTANT detail::seq_t seq;
5349

5450

5551
} // end thrust

thrust/execution_policy.h

+1-5
Original file line numberDiff line numberDiff line change
@@ -344,11 +344,7 @@ static const detail::host_t host;
344344
* \see host_execution_policy
345345
* \see thrust::device
346346
*/
347-
#ifdef __CUDA_ARCH__
348-
static const __device__ detail::device_t device;
349-
#else
350-
static const detail::device_t device;
351-
#endif
347+
THRUST_INLINE_CONSTANT detail::device_t device;
352348

353349

354350
// define seq for the purpose of Doxygenating it

0 commit comments

Comments
 (0)