Skip to content

Commit

Permalink
cpu: aarch64: hot fix for aux tensor management of stateless gemm-con…
Browse files Browse the repository at this point in the history
…v and winograd conv without lock.

Signed-off-by: Ye Tao <ye.tao@arm.com>
Change-Id: Ifb30292a8bfc5219c44515eb4d29b277a0f0b24a
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/oncpuml/oneDNN/+/680780
Tested-by: svc_mongoosetron <svc_mongoosetron@arm.com>
Reviewed-by: Hamza Butt <hamza.butt@arm.com>
IP-Review: Hamza Butt <hamza.butt@arm.com>
  • Loading branch information
taoye9 committed Oct 8, 2024
1 parent 395994a commit 62f4fda
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 43 deletions.
45 changes: 26 additions & 19 deletions src/cpu/aarch64/acl_gemm_convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,37 +89,44 @@ template <data_type_t src_t, data_type_t wei_t, data_type_t dst_t,
data_type_t bia_t>
status_t acl_gemm_convolution_fwd_t<src_t, wei_t, dst_t, bia_t>::init(
engine_t *engine) {
// commented due to hot fix solution for stateless API which should be replaced soon.
// auto acp_ = pd()->acp_;
// acl_obj_->conv.configure(&acp_.src_tensor_info, &acp_.wei_tensor_info,
// acp_.with_bias ? &acp_.bia_tensor_info : nullptr,
// &acp_.dst_tensor_info, acp_.padstride_info, acp_.weights_info,
// acp_.dilation_info, acp_.act_info, acp_.fast_math);
// acl_obj_->aux_mem_req = acl_obj_->conv.workspace();
return status::success;
}

template <data_type_t src_type, data_type_t wei_type, data_type_t dst_type,
data_type_t bia_type>
std::unique_ptr<acl_obj_t<typename acl_gemm_convolution_fwd_t<src_type,
wei_type, dst_type, bia_type>::Op>>
acl_gemm_convolution_fwd_t<src_type, wei_type, dst_type,
bia_type>::reinitialize_acl_obj() const {
auto acp_ = pd()->acp_;
acl_obj_->conv.configure(&acp_.src_tensor_info, &acp_.wei_tensor_info,
std::unique_ptr<acl_obj_t<Op>> acl_obj = std::make_unique<acl_obj_t<Op>>();
acl_obj->conv.configure(&acp_.src_tensor_info, &acp_.wei_tensor_info,
acp_.with_bias ? &acp_.bia_tensor_info : nullptr,
&acp_.dst_tensor_info, acp_.padstride_info, acp_.weights_info,
acp_.dilation_info, acp_.act_info, acp_.fast_math);
acl_obj_->aux_mem_req = acl_obj_->conv.workspace();
return status::success;
}

template <data_type_t src_t, data_type_t wei_t, data_type_t dst_t,
data_type_t bia_t>
void acl_gemm_convolution_fwd_t<src_t, wei_t, dst_t,
bia_t>::reinitialize_acl_obj() const {
auto acp = pd()->acp_;
std::lock_guard<std::mutex> _lock {this->mtx};
acl_obj_ = std::make_unique<acl_obj_t<Op>>();
acl_obj_->conv.configure(&acp.src_tensor_info, &acp.wei_tensor_info,
acp.with_bias ? &acp.bia_tensor_info : nullptr,
&acp.dst_tensor_info, acp.padstride_info, acp.weights_info,
acp.dilation_info, acp.act_info, acp.fast_math);
acl_obj_->aux_mem_req = acl_obj_->conv.workspace();
acl_obj->aux_mem_req = acl_obj->conv.workspace();
return acl_obj;
}

template <data_type_t src_t, data_type_t wei_t, data_type_t dst_t,
data_type_t bia_t>
status_t
acl_gemm_convolution_fwd_t<src_t, wei_t, dst_t, bia_t>::execute_forward(
const exec_ctx_t &ctx) const {
reinitialize_acl_obj();
// Temporary hotfix: We're using a local acl_obj instance in this method
// instead of the class member acl_obj_. This hotfix is to bypass persistent aux mem requirements but is not the ideal solution.
// It should be refactored or removed in the future when a more permanent fix is implemented.
auto acl_obj = reinitialize_acl_obj();

return execute_forward_conv_acl<acl_obj_t<Op>, pd_t, src_data_t, wei_data_t,
dst_data_t, bia_data_t>(ctx, acl_obj_.get(), pd(), gemm_conv_keys);
dst_data_t, bia_data_t>(ctx, acl_obj.get(), pd(), gemm_conv_keys);
}

using namespace data_type;
Expand Down
13 changes: 8 additions & 5 deletions src/cpu/aarch64/acl_gemm_convolution.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,10 @@ struct acl_gemm_convolution_fwd_t : public primitive_t {
acl_post_ops_t post_ops;
};

acl_gemm_convolution_fwd_t(const pd_t *apd)
: primitive_t(apd), acl_obj_(std::make_unique<acl_obj_t<Op>>()) {}
// hot fix solution for stateless API which should be replaced soon.
// acl_gemm_convolution_fwd_t(const pd_t *apd)
// : primitive_t(apd), acl_obj_(std::make_unique<acl_obj_t<Op>>()) {}
acl_gemm_convolution_fwd_t(const pd_t *apd) : primitive_t(apd) {}

status_t init(engine_t *engine) override;

Expand All @@ -67,11 +69,12 @@ struct acl_gemm_convolution_fwd_t : public primitive_t {
status_t execute_forward(const exec_ctx_t &ctx) const;

// hot fix solution for stateless API which should be replaced soon.
mutable std::mutex mtx;
void reinitialize_acl_obj() const;
std::unique_ptr<acl_obj_t<Op>> reinitialize_acl_obj() const;

const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); }
mutable std::unique_ptr<acl_obj_t<Op>> acl_obj_;

// commented due to hot fix solution for stateless API which should be replaced soon.
// std::unique_ptr<acl_obj_t<Op>> acl_obj_;

}; // acl_gemm_convolution_fwd_t

Expand Down
33 changes: 19 additions & 14 deletions src/cpu/aarch64/acl_winograd_convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,14 @@ status_t acl_wino_convolution_fwd_t::pd_t::init(engine_t *engine) {
}

status_t acl_wino_convolution_fwd_t::init(engine_t *engine) {
auto acp = pd()->acp_;
acl_obj_->conv.configure(&acp.src_tensor_info, &acp.wei_tensor_info,
acp.with_bias ? &acp.bia_tensor_info : nullptr,
&acp.dst_tensor_info, acp.padstride_info, acp.act_info,
true); // to support 5x5, 7x7 filter shapes in addition to 3x3

acl_obj_->aux_mem_req = acl_obj_->conv.workspace();
// commented due to hot fix solution for stateless API which should be replaced soon.
// auto acp = pd()->acp_;
// acl_obj_->conv.configure(&acp.src_tensor_info, &acp.wei_tensor_info,
// acp.with_bias ? &acp.bia_tensor_info : nullptr,
// &acp.dst_tensor_info, acp.padstride_info, acp.act_info,
// true); // to support 5x5, 7x7 filter shapes in addition to 3x3

// acl_obj_->aux_mem_req = acl_obj_->conv.workspace();
return status::success;
}

Expand Down Expand Up @@ -129,23 +130,27 @@ status_t acl_wino_convolution_fwd_t::pd_t::init_conf() {
return status::success;
}

void acl_wino_convolution_fwd_t::reinitialize_acl_obj() const {
std::unique_ptr<acl_obj_t<acl_wino_convolution_fwd_t::Op>>
acl_wino_convolution_fwd_t::reinitialize_acl_obj() const {
auto acp = pd()->acp_;
std::lock_guard<std::mutex> _lock {this->mtx};
acl_obj_ = std::make_unique<acl_obj_t<Op>>();
acl_obj_->conv.configure(&acp.src_tensor_info, &acp.wei_tensor_info,
std::unique_ptr<acl_obj_t<Op>> acl_obj = std::make_unique<acl_obj_t<Op>>();
acl_obj->conv.configure(&acp.src_tensor_info, &acp.wei_tensor_info,
acp.with_bias ? &acp.bia_tensor_info : nullptr,
&acp.dst_tensor_info, acp.padstride_info, acp.act_info,
true); // to support 5x5, 7x7 filter shapes in addition to 3x3

acl_obj_->aux_mem_req = acl_obj_->conv.workspace();
acl_obj->aux_mem_req = acl_obj->conv.workspace();
return acl_obj;
}

status_t acl_wino_convolution_fwd_t::execute_forward(
const exec_ctx_t &ctx) const {
reinitialize_acl_obj();
// Temporary hotfix: We're using a local acl_obj instance in this method
// instead of the class member acl_obj_. This hotfix is to bypass persistent aux mem requirements but is not the ideal solution.
// It should be refactored or removed in the future when a more permanent fix is implemented.
const auto acl_obj = reinitialize_acl_obj();
return execute_forward_conv_acl<acl_obj_t<Op>, pd_t, data_t>(
ctx, acl_obj_.get(), pd(), wino_conv_keys);
ctx, acl_obj.get(), pd(), wino_conv_keys);
}
} // namespace aarch64
} // namespace cpu
Expand Down
12 changes: 7 additions & 5 deletions src/cpu/aarch64/acl_winograd_convolution.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,10 @@ struct acl_wino_convolution_fwd_t : public primitive_t {
status_t init_conf();
};

acl_wino_convolution_fwd_t(const pd_t *apd)
: primitive_t(apd), acl_obj_(std::make_unique<acl_obj_t<Op>>()) {}
// hot fix solution for stateless API which should be replaced soon.
// acl_wino_convolution_fwd_t(const pd_t *apd)
// : primitive_t(apd), acl_obj_(std::make_unique<acl_obj_t<Op>>()) {}
acl_wino_convolution_fwd_t(const pd_t *apd) : primitive_t(apd) {}

status_t init(engine_t *engine) override;

Expand All @@ -60,11 +62,11 @@ struct acl_wino_convolution_fwd_t : public primitive_t {
status_t execute_forward(const exec_ctx_t &ctx) const;

// hot fix solution for stateless API which should be replaced soon.
mutable std::mutex mtx;
void reinitialize_acl_obj() const;
std::unique_ptr<acl_obj_t<Op>> reinitialize_acl_obj() const;

const pd_t *pd() const { return (const pd_t *)primitive_t::pd().get(); }
mutable std::unique_ptr<acl_obj_t<Op>> acl_obj_;
// commented due to hot fix solution for stateless API which should be replaced soon.
// std::unique_ptr<acl_obj_t<Op>> acl_obj_;
}; // acl_wino_convolution_fwd_t

} // namespace aarch64
Expand Down

0 comments on commit 62f4fda

Please # to comment.