Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
enabling multithreading in broadcast_reduce (#9444)
Browse files Browse the repository at this point in the history
  • Loading branch information
asmushetzel authored and piiswrong committed Jan 15, 2018
1 parent 02b69db commit c74cf1b
Showing 1 changed file with 2 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/operator/tensor/broadcast_reduce-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ void seq_reduce_compute(const int N, const int M, const bool addto,
const DType *big, DType *small, const Shape<ndim> bshape,
const Shape<ndim> sshape, const Shape<ndim> rshape,
const Shape<ndim> rstride) {
#pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())
for (int idx = 0; idx < N; ++idx) {
seq_reduce_assign<Reducer, ndim, DType, OP>(idx, M, addto, big, small, bshape, sshape, rshape,
rstride);
Expand Down Expand Up @@ -266,6 +267,7 @@ void seq_reduce_compute(const int N, const int M, const bool addto,
const Shape<ndim> lhs_shape, const Shape<ndim> lhs_stride,
const Shape<ndim> rhs_shape, const Shape<ndim> rhs_stride,
const Shape<ndim>& lhs_shape0, const Shape<ndim>& rhs_shape0) {
#pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())
for (int idx = 0; idx < N; ++idx) {
seq_reduce_assign<Reducer, ndim, DType, OP1, OP2>(idx, M, addto, big, lhs, rhs, small,
big_shape, lhs_shape0, rhs_shape0, small_shape, rshape, lhs_shape, rhs_shape, rstride,
Expand Down

0 comments on commit c74cf1b

Please # to comment.