Skip to content

Commit

Permalink
gpu: fix integer overflow in offset_t
Browse files Browse the repository at this point in the history
  • Loading branch information
rjoursler authored and vpirogov committed May 12, 2023
1 parent 148006b commit be05c33
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 30 deletions.
2 changes: 1 addition & 1 deletion src/gpu/ocl/gemm/gemm_with_post_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ status_t gemm_with_post_ops_t::pd_t::init_kernel_ctx(
const memory_desc_wrapper bia_d(src_md(2));
const memory_desc_wrapper dst_d(gemm_pd_->dst_md(0));
offsets_t off;
int bia_off[4][MAX_NDIMS];
dim_t bia_off[4][MAX_NDIMS];
set_offsets(dst_d, off.dst_off);
set_offsets(bia_d, bia_off);
int ndims = dst_d.ndims();
Expand Down
6 changes: 3 additions & 3 deletions src/gpu/ocl/gen9_concat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ std::pair<int, int> gen9_concat_t::pd_t::calculate_iter_dim_idx_chunk(
}
}
const int iter_dim_idx = max_dim_idx;
const int all_elems = utils::array_product(dst_dims, conf.ndims);
const int max_iter_dim_chunk = 1024;
const dim_t all_elems = utils::array_product(dst_dims, conf.ndims);
const dim_t max_iter_dim_chunk = 1024;
const int min_threads = num_threads * 4;
int iter_dim_chunk = std::min(dst_dims[iter_dim_idx], max_iter_dim_chunk);
dim_t iter_dim_chunk = std::min(dst_dims[iter_dim_idx], max_iter_dim_chunk);
const auto get_num_threads = [&]() {
return ceil(static_cast<float>(all_elems)
/ (iter_dim_chunk * conf.sub_group_size));
Expand Down
53 changes: 27 additions & 26 deletions src/gpu/primitive_conf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,31 +231,31 @@ struct attr_info_t {
};

struct offsets_t {
int src_off[4][MAX_NDIMS];
int wei_off[4][MAX_NDIMS];
int dst_off[4][MAX_NDIMS];
dim_t src_off[4][MAX_NDIMS];
dim_t wei_off[4][MAX_NDIMS];
dim_t dst_off[4][MAX_NDIMS];
};

struct rnn_offsets_t {
int src_layer_off[4][MAX_NDIMS];
int src_iter_off[4][MAX_NDIMS];
int src_iter_c_off[4][MAX_NDIMS];
int weights_layer_off[4][MAX_NDIMS];
int weights_iter_off[4][MAX_NDIMS];
int bias_off[4][MAX_NDIMS];
int dst_layer_off[4][MAX_NDIMS];
int dst_iter_off[4][MAX_NDIMS];
int dst_iter_c_off[4][MAX_NDIMS];
int diff_src_layer_off[4][MAX_NDIMS];
int diff_src_iter_off[4][MAX_NDIMS];
int diff_src_iter_c_off[4][MAX_NDIMS];
int diff_weights_layer_off[4][MAX_NDIMS];
int diff_weights_iter_off[4][MAX_NDIMS];
int diff_bias_off[4][MAX_NDIMS];
int diff_dst_layer_off[4][MAX_NDIMS];
int diff_dst_iter_off[4][MAX_NDIMS];
int diff_dst_iter_c_off[4][MAX_NDIMS];
int ws_off[4][MAX_NDIMS];
dim_t src_layer_off[4][MAX_NDIMS];
dim_t src_iter_off[4][MAX_NDIMS];
dim_t src_iter_c_off[4][MAX_NDIMS];
dim_t weights_layer_off[4][MAX_NDIMS];
dim_t weights_iter_off[4][MAX_NDIMS];
dim_t bias_off[4][MAX_NDIMS];
dim_t dst_layer_off[4][MAX_NDIMS];
dim_t dst_iter_off[4][MAX_NDIMS];
dim_t dst_iter_c_off[4][MAX_NDIMS];
dim_t diff_src_layer_off[4][MAX_NDIMS];
dim_t diff_src_iter_off[4][MAX_NDIMS];
dim_t diff_src_iter_c_off[4][MAX_NDIMS];
dim_t diff_weights_layer_off[4][MAX_NDIMS];
dim_t diff_weights_iter_off[4][MAX_NDIMS];
dim_t diff_bias_off[4][MAX_NDIMS];
dim_t diff_dst_layer_off[4][MAX_NDIMS];
dim_t diff_dst_iter_off[4][MAX_NDIMS];
dim_t diff_dst_iter_c_off[4][MAX_NDIMS];
dim_t ws_off[4][MAX_NDIMS];
};

// Convolution
Expand Down Expand Up @@ -1086,7 +1086,7 @@ inline void set_offsets(compute::kernel_ctx_t &kernel_ctx,
md.compute_strides_compat(strides_compat);

for (int d = 0; d < MAX_NDIMS; ++d) {
const int block = block_dims[d];
const dim_t block = block_dims[d];

kernel_ctx.define_int(
utils::format("%s_B%d", str, d), (d < md.ndims()) ? block : 1);
Expand All @@ -1099,7 +1099,8 @@ inline void set_offsets(compute::kernel_ctx_t &kernel_ctx,
kernel_ctx.define_int(utils::format("%s_OFFSET_PAD", str), md.md_->offset0);
}

inline void set_offsets(const memory_desc_wrapper &md, int offs[4][MAX_NDIMS]) {
inline void set_offsets(
const memory_desc_wrapper &md, dim_t offs[4][MAX_NDIMS]) {
dim_t block_dims[DNNL_MAX_NDIMS];
dim_t strides_compat[2][DNNL_MAX_NDIMS];

Expand All @@ -1108,7 +1109,7 @@ inline void set_offsets(const memory_desc_wrapper &md, int offs[4][MAX_NDIMS]) {
const dims_t &dims = md.dims();

for (int d = 0; d < md.ndims(); ++d) {
const int block = block_dims[d];
const dim_t block = block_dims[d];

offs[0][d] = block;
offs[1][d] = strides_compat[0][d];
Expand All @@ -1117,7 +1118,7 @@ inline void set_offsets(const memory_desc_wrapper &md, int offs[4][MAX_NDIMS]) {
}
}

inline void def_offsets(const int offs[4][MAX_NDIMS],
inline void def_offsets(const dim_t offs[4][MAX_NDIMS],
compute::kernel_ctx_t &kernel_ctx, const char *str, const int ndims) {

for (int d = 0; d < MAX_NDIMS; d++) {
Expand Down

0 comments on commit be05c33

Please # to comment.