From c2c087f5928837c31fe281ea48926d300b25eb41 Mon Sep 17 00:00:00 2001 From: viclafargue Date: Wed, 12 Feb 2025 12:35:54 +0000 Subject: [PATCH] Manual casting of TPB_X --- cpp/src/umap/fuzzy_simpl_set/naive.cuh | 14 +++++++------- cpp/src/umap/fuzzy_simpl_set/runner.cuh | 2 +- cpp/src/umap/runner.cuh | 18 +++++++++--------- cpp/src/umap/simpl_set_embed/algo.cuh | 6 +++--- .../simpl_set_embed/optimize_batch_kernel.cuh | 10 +++++----- cpp/src/umap/simpl_set_embed/runner.cuh | 2 +- cpp/src/umap/supervised.cuh | 16 ++++++++-------- 7 files changed, 34 insertions(+), 34 deletions(-) diff --git a/cpp/src/umap/fuzzy_simpl_set/naive.cuh b/cpp/src/umap/fuzzy_simpl_set/naive.cuh index 30a8c7e918..2462931c6a 100644 --- a/cpp/src/umap/fuzzy_simpl_set/naive.cuh +++ b/cpp/src/umap/fuzzy_simpl_set/naive.cuh @@ -79,7 +79,7 @@ static const float MIN_K_DIST_SCALE = 1e-3; * Descriptions adapted from: https://github.com/lmcinnes/umap/blob/master/umap/umap_.py * */ -template +template CUML_KERNEL void smooth_knn_dist_kernel(const value_t* knn_dists, int n, float mean_dist, @@ -191,7 +191,7 @@ CUML_KERNEL void smooth_knn_dist_kernel(const value_t* knn_dists, * * Descriptions adapted from: https://github.com/lmcinnes/umap/blob/master/umap/umap_.py */ -template +template CUML_KERNEL void compute_membership_strength_kernel( const value_idx* knn_indices, const float* knn_dists, // nn outputs @@ -205,7 +205,7 @@ CUML_KERNEL void compute_membership_strength_kernel( { // model params // row-based matrix is best - nnz_t idx = (blockIdx.x * TPB_X) + threadIdx.x; + nnz_t idx = (blockIdx.x * static_cast(TPB_X)) + threadIdx.x; if (idx < to_process) { int row = idx / n_neighbors; // one neighbor per thread @@ -238,7 +238,7 @@ CUML_KERNEL void compute_membership_strength_kernel( /* * Sets up and runs the knn dist smoothing */ -template +template void smooth_knn_dist(nnz_t n, const value_idx* knn_indices, const float* knn_dists, @@ -249,7 +249,7 @@ void smooth_knn_dist(nnz_t n, float local_connectivity, cudaStream_t stream) { - dim3 grid(raft::ceildiv(n, TPB_X), 1, 1); + dim3 grid(raft::ceildiv(n, static_cast(TPB_X)), 1, 1); dim3 blk(TPB_X, 1, 1); rmm::device_uvector dist_means_dev(n_neighbors, stream); @@ -286,7 +286,7 @@ void smooth_knn_dist(nnz_t n, * @param params UMAPParams config object * @param stream cuda stream to use for device operations */ -template +template void launcher(nnz_t n, const value_idx* knn_indices, const value_t* knn_dists, @@ -331,7 +331,7 @@ void launcher(nnz_t n, */ nnz_t to_process = static_cast(in.n_rows) * n_neighbors; - dim3 grid_elm(raft::ceildiv(to_process, TPB_X), 1, 1); + dim3 grid_elm(raft::ceildiv(to_process, static_cast(TPB_X)), 1, 1); dim3 blk_elm(TPB_X, 1, 1); compute_membership_strength_kernel diff --git a/cpp/src/umap/fuzzy_simpl_set/runner.cuh b/cpp/src/umap/fuzzy_simpl_set/runner.cuh index 03ee9f59b7..270b1f87bc 100644 --- a/cpp/src/umap/fuzzy_simpl_set/runner.cuh +++ b/cpp/src/umap/fuzzy_simpl_set/runner.cuh @@ -38,7 +38,7 @@ using namespace ML; * @param stream cuda stream * @param algorithm algo type to choose */ -template +template void run(int n, const value_idx* knn_indices, const T* knn_dists, diff --git a/cpp/src/umap/runner.cuh b/cpp/src/umap/runner.cuh index 6ca1e697cc..21e9ec8fcf 100644 --- a/cpp/src/umap/runner.cuh +++ b/cpp/src/umap/runner.cuh @@ -91,7 +91,7 @@ inline void find_ab(UMAPParams* params, cudaStream_t stream) Optimize::find_params_ab(params, stream); } -template +template void _get_graph(const raft::handle_t& handle, const umap_inputs& inputs, UMAPParams* params, @@ -148,7 +148,7 @@ void _get_graph(const raft::handle_t& handle, raft::common::nvtx::pop_range(); } -template +template void _get_graph_supervised(const raft::handle_t& handle, const umap_inputs& inputs, UMAPParams* params, @@ -239,7 +239,7 @@ void _get_graph_supervised(const raft::handle_t& handle, raft::common::nvtx::pop_range(); } -template +template void _refine(const raft::handle_t& handle, const umap_inputs& inputs, UMAPParams* params, @@ -255,7 +255,7 @@ void _refine(const raft::handle_t& handle, SimplSetEmbed::run(inputs.n, inputs.d, graph, params, embeddings, stream); } -template +template void _init_and_refine(const raft::handle_t& handle, const umap_inputs& inputs, UMAPParams* params, @@ -273,7 +273,7 @@ void _init_and_refine(const raft::handle_t& handle, SimplSetEmbed::run(inputs.n, inputs.d, graph, params, embeddings, stream); } -template +template void _fit(const raft::handle_t& handle, const umap_inputs& inputs, UMAPParams* params, @@ -311,7 +311,7 @@ void _fit(const raft::handle_t& handle, RAFT_CUDA_TRY(cudaPeekAtLastError()); } -template +template void _fit_supervised(const raft::handle_t& handle, const umap_inputs& inputs, UMAPParams* params, @@ -352,7 +352,7 @@ void _fit_supervised(const raft::handle_t& handle, /** * */ -template +template void _transform(const raft::handle_t& handle, const umap_inputs& inputs, umap_inputs& orig_x_inputs, @@ -411,7 +411,7 @@ void _transform(const raft::handle_t& handle, RAFT_CUDA_TRY(cudaMemsetAsync(sigmas.data(), 0, inputs.n * sizeof(value_t), stream)); RAFT_CUDA_TRY(cudaMemsetAsync(rhos.data(), 0, inputs.n * sizeof(value_t), stream)); - dim3 grid_n(raft::ceildiv(inputs.n, TPB_X), 1, 1); + dim3 grid_n(raft::ceildiv(inputs.n, static_cast(TPB_X)), 1, 1); dim3 blk(TPB_X, 1, 1); FuzzySimplSetImpl::smooth_knn_dist(inputs.n, @@ -431,7 +431,7 @@ void _transform(const raft::handle_t& handle, nnz_t nnz = static_cast(inputs.n) * params->n_neighbors; - dim3 grid_nnz(raft::ceildiv(nnz, TPB_X), 1, 1); + dim3 grid_nnz(raft::ceildiv(nnz, static_cast(TPB_X)), 1, 1); CUML_LOG_DEBUG("Executing fuzzy simplicial set"); diff --git a/cpp/src/umap/simpl_set_embed/algo.cuh b/cpp/src/umap/simpl_set_embed/algo.cuh index 6203fbf39e..7cd86efaf0 100644 --- a/cpp/src/umap/simpl_set_embed/algo.cuh +++ b/cpp/src/umap/simpl_set_embed/algo.cuh @@ -194,7 +194,7 @@ T create_gradient_rounding_factor( * positive weights (neighbors in the 1-skeleton) and repelling * negative weights (non-neighbors in the 1-skeleton). */ -template +template void optimize_layout(T* head_embedding, int head_n, T* tail_embedding, @@ -246,7 +246,7 @@ void optimize_layout(T* head_embedding, d_tail_buffer = tail_buffer.data(); } - dim3 grid(raft::ceildiv(nnz, TPB_X), 1, 1); + dim3 grid(raft::ceildiv(nnz, static_cast(TPB_X)), 1, 1); dim3 blk(TPB_X, 1, 1); uint64_t seed = params->random_state; @@ -295,7 +295,7 @@ void optimize_layout(T* head_embedding, * the fuzzy set cross entropy between the embeddings * and their 1-skeletons. */ -template +template void launcher( int m, int n, raft::sparse::COO* in, UMAPParams* params, T* embedding, cudaStream_t stream) { diff --git a/cpp/src/umap/simpl_set_embed/optimize_batch_kernel.cuh b/cpp/src/umap/simpl_set_embed/optimize_batch_kernel.cuh index bbc0e9eb61..db0ee82ed8 100644 --- a/cpp/src/umap/simpl_set_embed/optimize_batch_kernel.cuh +++ b/cpp/src/umap/simpl_set_embed/optimize_batch_kernel.cuh @@ -98,7 +98,7 @@ DI T truncate_gradient(T const rounding_factor, T const x) return (rounding_factor + x) - rounding_factor; } -template +template CUML_KERNEL void optimize_batch_kernel_reg(T const* head_embedding, T* head_buffer, T const* tail_embedding, @@ -119,7 +119,7 @@ CUML_KERNEL void optimize_batch_kernel_reg(T const* head_embedding, T nsr_inv, T rounding) { - nnz_t row = (blockIdx.x * TPB_X) + threadIdx.x; + nnz_t row = (blockIdx.x * static_cast(TPB_X)) + threadIdx.x; if (row >= nnz) return; auto _epoch_of_next_sample = epoch_of_next_sample[row]; if (_epoch_of_next_sample > epoch) return; @@ -211,7 +211,7 @@ CUML_KERNEL void optimize_batch_kernel_reg(T const* head_embedding, _epoch_of_next_negative_sample + n_neg_samples * epochs_per_negative_sample; } -template +template CUML_KERNEL void optimize_batch_kernel(T const* head_embedding, T* head_buffer, T const* tail_embedding, @@ -233,7 +233,7 @@ CUML_KERNEL void optimize_batch_kernel(T const* head_embedding, T rounding) { extern __shared__ T embedding_shared_mem_updates[]; - nnz_t row = (blockIdx.x * TPB_X) + threadIdx.x; + nnz_t row = (blockIdx.x * static_cast(TPB_X)) + threadIdx.x; if (row >= nnz) return; auto _epoch_of_next_sample = epoch_of_next_sample[row]; if (_epoch_of_next_sample > epoch) return; @@ -350,7 +350,7 @@ CUML_KERNEL void optimize_batch_kernel(T const* head_embedding, * @param rounding: Floating rounding factor used to truncate the gradient update for * deterministic result. */ -template +template void call_optimize_batch_kernel(T const* head_embedding, T* head_buffer, T const* tail_embedding, diff --git a/cpp/src/umap/simpl_set_embed/runner.cuh b/cpp/src/umap/simpl_set_embed/runner.cuh index a2a6d08e27..c979150e04 100644 --- a/cpp/src/umap/simpl_set_embed/runner.cuh +++ b/cpp/src/umap/simpl_set_embed/runner.cuh @@ -28,7 +28,7 @@ namespace SimplSetEmbed { using namespace ML; -template +template void run(int m, int n, raft::sparse::COO* coo, diff --git a/cpp/src/umap/supervised.cuh b/cpp/src/umap/supervised.cuh index 2e4f8a8dfb..1a245e9cfc 100644 --- a/cpp/src/umap/supervised.cuh +++ b/cpp/src/umap/supervised.cuh @@ -101,14 +101,14 @@ void reset_local_connectivity(raft::sparse::COO* in_coo, * and this will update the fuzzy simplicial set to respect that label * data. */ -template +template void categorical_simplicial_set_intersection(raft::sparse::COO* graph_coo, value_t* target, cudaStream_t stream, float far_dist = 5.0, float unknown_dist = 1.0) { - dim3 grid(raft::ceildiv(graph_coo->nnz, TPB_X), 1, 1); + dim3 grid(raft::ceildiv(graph_coo->nnz, static_cast(TPB_X)), 1, 1); dim3 blk(TPB_X, 1, 1); fast_intersection_kernel<<>>(graph_coo->rows(), graph_coo->cols(), @@ -119,7 +119,7 @@ void categorical_simplicial_set_intersection(raft::sparse::COO* graph_c far_dist); } -template +template CUML_KERNEL void sset_intersection_kernel(int* row_ind1, int* cols1, value_t* vals1, @@ -177,7 +177,7 @@ CUML_KERNEL void sset_intersection_kernel(int* row_ind1, * Computes the CSR column index pointer and values * for the general simplicial set intersecftion. */ -template +template void general_simplicial_set_intersection(int* row1_ind, raft::sparse::COO* in1, int* row2_ind, @@ -233,7 +233,7 @@ void general_simplicial_set_intersection(int* row1_ind, T left_min = max(min1 / 2.0, 1e-8); T right_min = max(min2 / 2.0, 1e-8); - dim3 grid(raft::ceildiv(in1->nnz, TPB_X), 1, 1); + dim3 grid(raft::ceildiv(in1->nnz, static_cast(TPB_X)), 1, 1); dim3 blk(TPB_X, 1, 1); sset_intersection_kernel<<>>(row1_ind, @@ -254,10 +254,10 @@ void general_simplicial_set_intersection(int* row1_ind, weight); RAFT_CUDA_TRY(cudaGetLastError()); - dim3 grid_n(raft::ceildiv(result->nnz, TPB_X), 1, 1); + dim3 grid_n(raft::ceildiv(result->nnz, static_cast(TPB_X)), 1, 1); } -template +template void perform_categorical_intersection(T* y, raft::sparse::COO* rgraph_coo, raft::sparse::COO* final_coo, @@ -277,7 +277,7 @@ void perform_categorical_intersection(T* y, RAFT_CUDA_TRY(cudaPeekAtLastError()); } -template +template void perform_general_intersection(const raft::handle_t& handle, value_t* y, raft::sparse::COO* rgraph_coo,