@@ -797,39 +797,17 @@ class __SYCL_EXPORT handler {
797
797
// Get the kernal name to check condition 3.
798
798
std::string KName = typeid (NameT *).name ();
799
799
800
- // Force instantiation of the kernel before we use the kernel info to make
801
- // sure __builtin_sycl_unique_stable_name doesn't cause problems.
802
- // The instantiations of kernel_parallel_for must match the logic caused by
803
- // kernel_parallel_for_wrapper.
804
- using WrapperTy =
805
- decltype (getRangeRoundedKernelLambda<TransformedArgType, Dims>(
806
- KernelFunc, NumWorkItems));
807
- using NameWT = typename detail::get_kernel_wrapper_name_t <NameT>::name;
808
-
809
- (void )kernel_parallel_for_wrapper_instantiator<NameWT, TransformedArgType,
810
- WrapperTy>::value;
811
- (void )kernel_parallel_for_wrapper_instantiator<NameT, TransformedArgType,
812
- KernelType>::value;
813
-
814
- using KI = detail::KernelInfo<KernelName>;
815
- bool DisableRounding =
816
- (getenv (" SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING" ) != nullptr ) ||
817
- (KName.find (" SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING" ) !=
818
- std::string::npos) ||
819
- (KI::getName () == nullptr || KI::getName ()[0 ] == ' \0 ' ) ||
820
- (KI::callsThisItem ());
821
-
822
800
// Perform range rounding if rounding-up is enabled
823
801
// and there are sufficient work-items to need rounding
824
802
// and the user-specified range is not a multiple of a "good" value.
825
- if (!DisableRounding && (NumWorkItems[0 ] >= MinRangeX) &&
826
- (NumWorkItems[0 ] % MinFactorX != 0 )) {
803
+ if ((NumWorkItems[0 ] >= MinRangeX) && (NumWorkItems[0 ] % MinFactorX != 0 )) {
827
804
// It is sufficient to round up just the first dimension.
828
805
// Multiplying the rounded-up value of the first dimension
829
806
// by the values of the remaining dimensions (if any)
830
807
// will yield a rounded-up value for the total range.
831
808
size_t NewValX =
832
809
((NumWorkItems[0 ] + GoodFactorX - 1 ) / GoodFactorX) * GoodFactorX;
810
+ using NameWT = typename detail::get_kernel_wrapper_name_t <NameT>::name;
833
811
if (getenv (" SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE" ) != nullptr )
834
812
std::cout << " parallel_for range adjusted from " << NumWorkItems[0 ]
835
813
<< " to " << NewValX << std::endl;
@@ -924,7 +902,7 @@ class __SYCL_EXPORT handler {
924
902
// NOTE: the name of these functions - "kernel_parallel_for" - are used by the
925
903
// Front End to determine kernel invocation kind.
926
904
template <typename KernelName, typename ElementType, typename KernelType>
927
- __SYCL_KERNEL_ATTR__ static void
905
+ __SYCL_KERNEL_ATTR__ void
928
906
#ifdef __SYCL_NONCONST_FUNCTOR__
929
907
kernel_parallel_for (KernelType KernelFunc) {
930
908
#else
@@ -940,7 +918,7 @@ class __SYCL_EXPORT handler {
940
918
// NOTE: the name of these functions - "kernel_parallel_for" - are used by the
941
919
// Front End to determine kernel invocation kind.
942
920
template <typename KernelName, typename ElementType, typename KernelType>
943
- __SYCL_KERNEL_ATTR__ static void
921
+ __SYCL_KERNEL_ATTR__ void
944
922
#ifdef __SYCL_NONCONST_FUNCTOR__
945
923
kernel_parallel_for (KernelType KernelFunc, kernel_handler KH) {
946
924
#else
@@ -1030,34 +1008,6 @@ class __SYCL_EXPORT handler {
1030
1008
}
1031
1009
}
1032
1010
1033
- // Helper instantiator type for kernel_parallel_for_wrapper that
1034
- // instantiates but not calls the appropriate kernel. Needed to support use of
1035
- // KernelInfo in parallel_for_lambda_impl when supporting
1036
- // SCYL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING.
1037
- template <typename KernelName, typename ElementType, typename KernelType>
1038
- class kernel_parallel_for_wrapper_instantiator {
1039
- static constexpr auto func_loader () {
1040
- #ifdef __SYCL_NONCONST_FUNCTOR__
1041
- using ParamTy = KernelType;
1042
- #else
1043
- using ParamTy = const KernelType &;
1044
- #endif
1045
- if constexpr (detail::isKernelLambdaCallableWithKernelHandler<
1046
- KernelType, ElementType>()) {
1047
- using FuncTy = void (*)(ParamTy, kernel_handler);
1048
- return static_cast <FuncTy>(
1049
- kernel_parallel_for<KernelName, ElementType, KernelType>);
1050
- } else {
1051
- using FuncTy = void (*)(ParamTy);
1052
- return static_cast <FuncTy>(
1053
- kernel_parallel_for<KernelName, ElementType, KernelType>);
1054
- }
1055
- }
1056
-
1057
- public:
1058
- static constexpr auto value = func_loader();
1059
- };
1060
-
1061
1011
// Wrappers for kernel_parallel_for_work_group(...)
1062
1012
1063
1013
template <typename KernelName, typename ElementType, typename KernelType>
0 commit comments