Skip to content

Commit

Permalink
Manual casting of TPB_X
Browse files Browse the repository at this point in the history
  • Loading branch information
viclafargue committed Feb 12, 2025
1 parent e7967e3 commit c2c087f
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 34 deletions.
14 changes: 7 additions & 7 deletions cpp/src/umap/fuzzy_simpl_set/naive.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ static const float MIN_K_DIST_SCALE = 1e-3;
* Descriptions adapted from: https://github.com/lmcinnes/umap/blob/master/umap/umap_.py
*
*/
template <typename value_t, typename nnz_t, nnz_t TPB_X>
template <typename value_t, typename nnz_t, int TPB_X>
CUML_KERNEL void smooth_knn_dist_kernel(const value_t* knn_dists,
int n,
float mean_dist,
Expand Down Expand Up @@ -191,7 +191,7 @@ CUML_KERNEL void smooth_knn_dist_kernel(const value_t* knn_dists,
*
* Descriptions adapted from: https://github.com/lmcinnes/umap/blob/master/umap/umap_.py
*/
template <typename value_t, typename value_idx, typename nnz_t, nnz_t TPB_X>
template <typename value_t, typename value_idx, typename nnz_t, int TPB_X>
CUML_KERNEL void compute_membership_strength_kernel(
const value_idx* knn_indices,
const float* knn_dists, // nn outputs
Expand All @@ -205,7 +205,7 @@ CUML_KERNEL void compute_membership_strength_kernel(
{ // model params

// row-based matrix is best
nnz_t idx = (blockIdx.x * TPB_X) + threadIdx.x;
nnz_t idx = (blockIdx.x * static_cast<nnz_t>(TPB_X)) + threadIdx.x;

if (idx < to_process) {
int row = idx / n_neighbors; // one neighbor per thread
Expand Down Expand Up @@ -238,7 +238,7 @@ CUML_KERNEL void compute_membership_strength_kernel(
/*
* Sets up and runs the knn dist smoothing
*/
template <typename value_t, typename value_idx, typename nnz_t, nnz_t TPB_X>
template <typename value_t, typename value_idx, typename nnz_t, int TPB_X>
void smooth_knn_dist(nnz_t n,
const value_idx* knn_indices,
const float* knn_dists,
Expand All @@ -249,7 +249,7 @@ void smooth_knn_dist(nnz_t n,
float local_connectivity,
cudaStream_t stream)
{
dim3 grid(raft::ceildiv(n, TPB_X), 1, 1);
dim3 grid(raft::ceildiv(n, static_cast<nnz_t>(TPB_X)), 1, 1);
dim3 blk(TPB_X, 1, 1);

rmm::device_uvector<value_t> dist_means_dev(n_neighbors, stream);
Expand Down Expand Up @@ -286,7 +286,7 @@ void smooth_knn_dist(nnz_t n,
* @param params UMAPParams config object
* @param stream cuda stream to use for device operations
*/
template <typename value_t, typename value_idx, typename nnz_t, nnz_t TPB_X>
template <typename value_t, typename value_idx, typename nnz_t, int TPB_X>
void launcher(nnz_t n,
const value_idx* knn_indices,
const value_t* knn_dists,
Expand Down Expand Up @@ -331,7 +331,7 @@ void launcher(nnz_t n,
*/

nnz_t to_process = static_cast<nnz_t>(in.n_rows) * n_neighbors;
dim3 grid_elm(raft::ceildiv(to_process, TPB_X), 1, 1);
dim3 grid_elm(raft::ceildiv(to_process, static_cast<nnz_t>(TPB_X)), 1, 1);
dim3 blk_elm(TPB_X, 1, 1);

compute_membership_strength_kernel<value_t, value_idx, nnz_t, TPB_X>
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/umap/fuzzy_simpl_set/runner.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ using namespace ML;
* @param stream cuda stream
* @param algorithm algo type to choose
*/
template <typename T, typename value_idx, typename nnz_t, nnz_t TPB_X>
template <typename T, typename value_idx, typename nnz_t, int TPB_X>
void run(int n,
const value_idx* knn_indices,
const T* knn_dists,
Expand Down
18 changes: 9 additions & 9 deletions cpp/src/umap/runner.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ inline void find_ab(UMAPParams* params, cudaStream_t stream)
Optimize::find_params_ab(params, stream);
}

template <typename value_idx, typename value_t, typename umap_inputs, typename nnz_t, nnz_t TPB_X>
template <typename value_idx, typename value_t, typename umap_inputs, typename nnz_t, int TPB_X>
void _get_graph(const raft::handle_t& handle,
const umap_inputs& inputs,
UMAPParams* params,
Expand Down Expand Up @@ -148,7 +148,7 @@ void _get_graph(const raft::handle_t& handle,
raft::common::nvtx::pop_range();
}

template <typename value_idx, typename value_t, typename umap_inputs, typename nnz_t, nnz_t TPB_X>
template <typename value_idx, typename value_t, typename umap_inputs, typename nnz_t, int TPB_X>
void _get_graph_supervised(const raft::handle_t& handle,
const umap_inputs& inputs,
UMAPParams* params,
Expand Down Expand Up @@ -239,7 +239,7 @@ void _get_graph_supervised(const raft::handle_t& handle,
raft::common::nvtx::pop_range();
}

template <typename value_idx, typename value_t, typename umap_inputs, typename nnz_t, nnz_t TPB_X>
template <typename value_idx, typename value_t, typename umap_inputs, typename nnz_t, int TPB_X>
void _refine(const raft::handle_t& handle,
const umap_inputs& inputs,
UMAPParams* params,
Expand All @@ -255,7 +255,7 @@ void _refine(const raft::handle_t& handle,
SimplSetEmbed::run<value_t, nnz_t, TPB_X>(inputs.n, inputs.d, graph, params, embeddings, stream);
}

template <typename value_idx, typename value_t, typename umap_inputs, typename nnz_t, nnz_t TPB_X>
template <typename value_idx, typename value_t, typename umap_inputs, typename nnz_t, int TPB_X>
void _init_and_refine(const raft::handle_t& handle,
const umap_inputs& inputs,
UMAPParams* params,
Expand All @@ -273,7 +273,7 @@ void _init_and_refine(const raft::handle_t& handle,
SimplSetEmbed::run<value_t, nnz_t, TPB_X>(inputs.n, inputs.d, graph, params, embeddings, stream);
}

template <typename value_idx, typename value_t, typename umap_inputs, typename nnz_t, nnz_t TPB_X>
template <typename value_idx, typename value_t, typename umap_inputs, typename nnz_t, int TPB_X>
void _fit(const raft::handle_t& handle,
const umap_inputs& inputs,
UMAPParams* params,
Expand Down Expand Up @@ -311,7 +311,7 @@ void _fit(const raft::handle_t& handle,
RAFT_CUDA_TRY(cudaPeekAtLastError());
}

template <typename value_idx, typename value_t, typename umap_inputs, typename nnz_t, nnz_t TPB_X>
template <typename value_idx, typename value_t, typename umap_inputs, typename nnz_t, int TPB_X>
void _fit_supervised(const raft::handle_t& handle,
const umap_inputs& inputs,
UMAPParams* params,
Expand Down Expand Up @@ -352,7 +352,7 @@ void _fit_supervised(const raft::handle_t& handle,
/**
*
*/
template <typename value_idx, typename value_t, typename umap_inputs, typename nnz_t, nnz_t TPB_X>
template <typename value_idx, typename value_t, typename umap_inputs, typename nnz_t, int TPB_X>
void _transform(const raft::handle_t& handle,
const umap_inputs& inputs,
umap_inputs& orig_x_inputs,
Expand Down Expand Up @@ -411,7 +411,7 @@ void _transform(const raft::handle_t& handle,
RAFT_CUDA_TRY(cudaMemsetAsync(sigmas.data(), 0, inputs.n * sizeof(value_t), stream));
RAFT_CUDA_TRY(cudaMemsetAsync(rhos.data(), 0, inputs.n * sizeof(value_t), stream));

dim3 grid_n(raft::ceildiv(inputs.n, TPB_X), 1, 1);
dim3 grid_n(raft::ceildiv(inputs.n, static_cast<nnz_t>(TPB_X)), 1, 1);
dim3 blk(TPB_X, 1, 1);

FuzzySimplSetImpl::smooth_knn_dist<value_t, value_idx, nnz_t, TPB_X>(inputs.n,
Expand All @@ -431,7 +431,7 @@ void _transform(const raft::handle_t& handle,

nnz_t nnz = static_cast<nnz_t>(inputs.n) * params->n_neighbors;

dim3 grid_nnz(raft::ceildiv(nnz, TPB_X), 1, 1);
dim3 grid_nnz(raft::ceildiv(nnz, static_cast<nnz_t>(TPB_X)), 1, 1);

CUML_LOG_DEBUG("Executing fuzzy simplicial set");

Expand Down
6 changes: 3 additions & 3 deletions cpp/src/umap/simpl_set_embed/algo.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ T create_gradient_rounding_factor(
* positive weights (neighbors in the 1-skeleton) and repelling
* negative weights (non-neighbors in the 1-skeleton).
*/
template <typename T, typename nnz_t, nnz_t TPB_X>
template <typename T, typename nnz_t, int TPB_X>
void optimize_layout(T* head_embedding,
int head_n,
T* tail_embedding,
Expand Down Expand Up @@ -246,7 +246,7 @@ void optimize_layout(T* head_embedding,
d_tail_buffer = tail_buffer.data();
}

dim3 grid(raft::ceildiv(nnz, TPB_X), 1, 1);
dim3 grid(raft::ceildiv(nnz, static_cast<nnz_t>(TPB_X)), 1, 1);
dim3 blk(TPB_X, 1, 1);
uint64_t seed = params->random_state;

Expand Down Expand Up @@ -295,7 +295,7 @@ void optimize_layout(T* head_embedding,
* the fuzzy set cross entropy between the embeddings
* and their 1-skeletons.
*/
template <typename T, typename nnz_t, nnz_t TPB_X>
template <typename T, typename nnz_t, int TPB_X>
void launcher(
int m, int n, raft::sparse::COO<T>* in, UMAPParams* params, T* embedding, cudaStream_t stream)
{
Expand Down
10 changes: 5 additions & 5 deletions cpp/src/umap/simpl_set_embed/optimize_batch_kernel.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ DI T truncate_gradient(T const rounding_factor, T const x)
return (rounding_factor + x) - rounding_factor;
}

template <typename T, typename nnz_t, nnz_t TPB_X, nnz_t n_components>
template <typename T, typename nnz_t, int TPB_X, nnz_t n_components>
CUML_KERNEL void optimize_batch_kernel_reg(T const* head_embedding,
T* head_buffer,
T const* tail_embedding,
Expand All @@ -119,7 +119,7 @@ CUML_KERNEL void optimize_batch_kernel_reg(T const* head_embedding,
T nsr_inv,
T rounding)
{
nnz_t row = (blockIdx.x * TPB_X) + threadIdx.x;
nnz_t row = (blockIdx.x * static_cast<nnz_t>(TPB_X)) + threadIdx.x;
if (row >= nnz) return;
auto _epoch_of_next_sample = epoch_of_next_sample[row];
if (_epoch_of_next_sample > epoch) return;
Expand Down Expand Up @@ -211,7 +211,7 @@ CUML_KERNEL void optimize_batch_kernel_reg(T const* head_embedding,
_epoch_of_next_negative_sample + n_neg_samples * epochs_per_negative_sample;
}

template <typename T, typename nnz_t, nnz_t TPB_X, bool use_shared_mem>
template <typename T, typename nnz_t, int TPB_X, bool use_shared_mem>
CUML_KERNEL void optimize_batch_kernel(T const* head_embedding,
T* head_buffer,
T const* tail_embedding,
Expand All @@ -233,7 +233,7 @@ CUML_KERNEL void optimize_batch_kernel(T const* head_embedding,
T rounding)
{
extern __shared__ T embedding_shared_mem_updates[];
nnz_t row = (blockIdx.x * TPB_X) + threadIdx.x;
nnz_t row = (blockIdx.x * static_cast<nnz_t>(TPB_X)) + threadIdx.x;
if (row >= nnz) return;
auto _epoch_of_next_sample = epoch_of_next_sample[row];
if (_epoch_of_next_sample > epoch) return;
Expand Down Expand Up @@ -350,7 +350,7 @@ CUML_KERNEL void optimize_batch_kernel(T const* head_embedding,
* @param rounding: Floating rounding factor used to truncate the gradient update for
* deterministic result.
*/
template <typename T, typename nnz_t, nnz_t TPB_X>
template <typename T, typename nnz_t, int TPB_X>
void call_optimize_batch_kernel(T const* head_embedding,
T* head_buffer,
T const* tail_embedding,
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/umap/simpl_set_embed/runner.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ namespace SimplSetEmbed {

using namespace ML;

template <typename T, typename nnz_t, nnz_t TPB_X>
template <typename T, typename nnz_t, int TPB_X>
void run(int m,
int n,
raft::sparse::COO<T>* coo,
Expand Down
16 changes: 8 additions & 8 deletions cpp/src/umap/supervised.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -101,14 +101,14 @@ void reset_local_connectivity(raft::sparse::COO<T>* in_coo,
* and this will update the fuzzy simplicial set to respect that label
* data.
*/
template <typename value_t, typename nnz_t, nnz_t TPB_X>
template <typename value_t, typename nnz_t, int TPB_X>
void categorical_simplicial_set_intersection(raft::sparse::COO<value_t>* graph_coo,
value_t* target,
cudaStream_t stream,
float far_dist = 5.0,
float unknown_dist = 1.0)
{
dim3 grid(raft::ceildiv(graph_coo->nnz, TPB_X), 1, 1);
dim3 grid(raft::ceildiv(graph_coo->nnz, static_cast<nnz_t>(TPB_X)), 1, 1);
dim3 blk(TPB_X, 1, 1);
fast_intersection_kernel<TPB_X, value_t><<<grid, blk, 0, stream>>>(graph_coo->rows(),
graph_coo->cols(),
Expand All @@ -119,7 +119,7 @@ void categorical_simplicial_set_intersection(raft::sparse::COO<value_t>* graph_c
far_dist);
}

template <typename value_t, typename nnz_t, nnz_t TPB_X>
template <typename value_t, typename nnz_t, int TPB_X>
CUML_KERNEL void sset_intersection_kernel(int* row_ind1,
int* cols1,
value_t* vals1,
Expand Down Expand Up @@ -177,7 +177,7 @@ CUML_KERNEL void sset_intersection_kernel(int* row_ind1,
* Computes the CSR column index pointer and values
* for the general simplicial set intersecftion.
*/
template <typename T, typename nnz_t, nnz_t TPB_X>
template <typename T, typename nnz_t, int TPB_X>
void general_simplicial_set_intersection(int* row1_ind,
raft::sparse::COO<T>* in1,
int* row2_ind,
Expand Down Expand Up @@ -233,7 +233,7 @@ void general_simplicial_set_intersection(int* row1_ind,
T left_min = max(min1 / 2.0, 1e-8);
T right_min = max(min2 / 2.0, 1e-8);

dim3 grid(raft::ceildiv(in1->nnz, TPB_X), 1, 1);
dim3 grid(raft::ceildiv(in1->nnz, static_cast<nnz_t>(TPB_X)), 1, 1);
dim3 blk(TPB_X, 1, 1);

sset_intersection_kernel<T, nnz_t, TPB_X><<<grid, blk, 0, stream>>>(row1_ind,
Expand All @@ -254,10 +254,10 @@ void general_simplicial_set_intersection(int* row1_ind,
weight);
RAFT_CUDA_TRY(cudaGetLastError());

dim3 grid_n(raft::ceildiv(result->nnz, TPB_X), 1, 1);
dim3 grid_n(raft::ceildiv(result->nnz, static_cast<nnz_t>(TPB_X)), 1, 1);
}

template <typename T, typename nnz_t, nnz_t TPB_X>
template <typename T, typename nnz_t, int TPB_X>
void perform_categorical_intersection(T* y,
raft::sparse::COO<T>* rgraph_coo,
raft::sparse::COO<T>* final_coo,
Expand All @@ -277,7 +277,7 @@ void perform_categorical_intersection(T* y,
RAFT_CUDA_TRY(cudaPeekAtLastError());
}

template <typename value_idx, typename value_t, typename nnz_t, nnz_t TPB_X>
template <typename value_idx, typename value_t, typename nnz_t, int TPB_X>
void perform_general_intersection(const raft::handle_t& handle,
value_t* y,
raft::sparse::COO<value_t>* rgraph_coo,
Expand Down

0 comments on commit c2c087f

Please # to comment.