From d2d80b12ffe062d98478de88393bf57fd1bf8d09 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?=
 <83450930+Liyulingyue@users.noreply.github.com>
Date: Wed, 21 Feb 2024 21:17:25 +0800
Subject: [PATCH 1/6] Update CMakeLists.txt

---
 .../framework/ir/memory_optimize_pass/CMakeLists.txt  | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt b/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt
index 85923aafc23a74..6a84c60ddf96ae 100644
--- a/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt
@@ -41,11 +41,9 @@ if(WITH_CINN)
     share_varinfo_into_cinn_pass
     SRCS share_varinfo_into_cinn_pass.cc
     DEPS pass enforce common graph_helper)
-  cc_test(
+  paddle_test(
     share_varinfo_into_cinn_pass_test
-    SRCS share_varinfo_into_cinn_pass_test.cc
-    DEPS share_varinfo_into_cinn_pass parallel_executor elementwise_add_op
-         mul_op cinn_launch_op)
+    SRCS share_varinfo_into_cinn_pass_test.cc)
   list(APPEND EAGER_DELETETION_PASS_DEPS share_varinfo_into_cinn_pass)
 endif()
 
@@ -73,8 +71,7 @@ cc_library(
   SRCS inplace_addto_op_pass.cc
   DEPS memory_reuse_pass)
 
-cc_test(
+paddle_test(
   test_reference_count_pass_last_lived_ops
   SRCS test_reference_count_pass_last_lived_ops.cc
-  DEPS parallel_executor elementwise_mul_op elementwise_add_op generated_op phi
-       common)
+  DEPS common)

From 94694dda17e8ebe5ec33a9723d94545be9918bd5 Mon Sep 17 00:00:00 2001
From: Liyulingyue <852433440@qq.com>
Date: Thu, 22 Feb 2024 20:45:13 +0800
Subject: [PATCH 2/6] mv cc file

---
 .../ir/memory_optimize_pass/CMakeLists.txt    |   8 -
 test/cpp/fluid/framework/CMakeLists.txt       |   2 +
 test/cpp/fluid/framework/ir/CMakeLists.txt    |   1 +
 .../ir/memory_optimize_pass/CMakeLists.txt    |   8 +
 .../share_varinfo_into_cinn_pass_test.cc      | 154 ++++++++++++
 ...est_reference_count_pass_last_lived_ops.cc | 235 ++++++++++++++++++
 6 files changed, 400 insertions(+), 8 deletions(-)
 create mode 100644 test/cpp/fluid/framework/ir/CMakeLists.txt
 create mode 100644 test/cpp/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt
 create mode 100644 test/cpp/fluid/framework/ir/memory_optimize_pass/share_varinfo_into_cinn_pass_test.cc
 create mode 100644 test/cpp/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc

diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt b/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt
index 6a84c60ddf96ae..222fef33c5ea63 100644
--- a/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt
@@ -41,9 +41,6 @@ if(WITH_CINN)
     share_varinfo_into_cinn_pass
     SRCS share_varinfo_into_cinn_pass.cc
     DEPS pass enforce common graph_helper)
-  paddle_test(
-    share_varinfo_into_cinn_pass_test
-    SRCS share_varinfo_into_cinn_pass_test.cc)
   list(APPEND EAGER_DELETETION_PASS_DEPS share_varinfo_into_cinn_pass)
 endif()
 
@@ -70,8 +67,3 @@ cc_library(
   inplace_addto_op_pass
   SRCS inplace_addto_op_pass.cc
   DEPS memory_reuse_pass)
-
-paddle_test(
-  test_reference_count_pass_last_lived_ops
-  SRCS test_reference_count_pass_last_lived_ops.cc
-  DEPS common)
diff --git a/test/cpp/fluid/framework/CMakeLists.txt b/test/cpp/fluid/framework/CMakeLists.txt
index 5e0e7404f6999d..8e1686b2429933 100644
--- a/test/cpp/fluid/framework/CMakeLists.txt
+++ b/test/cpp/fluid/framework/CMakeLists.txt
@@ -346,3 +346,5 @@ cc_test(
   workqueue_test
   SRCS new_executor/workqueue_test.cc
   DEPS standalone_executor)
+
+add_subdirectory(ir)
diff --git a/test/cpp/fluid/framework/ir/CMakeLists.txt b/test/cpp/fluid/framework/ir/CMakeLists.txt
new file mode 100644
index 00000000000000..81a68ccb22f834
--- /dev/null
+++ b/test/cpp/fluid/framework/ir/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(memory_optimize_pass)
diff --git a/test/cpp/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt b/test/cpp/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt
new file mode 100644
index 00000000000000..585545936cb165
--- /dev/null
+++ b/test/cpp/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt
@@ -0,0 +1,8 @@
+if(WITH_CINN)
+  paddle_test(share_varinfo_into_cinn_pass_test SRCS
+              share_varinfo_into_cinn_pass_test.cc)
+  list(APPEND EAGER_DELETETION_PASS_DEPS share_varinfo_into_cinn_pass)
+endif()
+
+paddle_test(test_reference_count_pass_last_lived_ops SRCS
+            test_reference_count_pass_last_lived_ops.cc DEPS common)
diff --git a/test/cpp/fluid/framework/ir/memory_optimize_pass/share_varinfo_into_cinn_pass_test.cc b/test/cpp/fluid/framework/ir/memory_optimize_pass/share_varinfo_into_cinn_pass_test.cc
new file mode 100644
index 00000000000000..1f78e293a21a39
--- /dev/null
+++ b/test/cpp/fluid/framework/ir/memory_optimize_pass/share_varinfo_into_cinn_pass_test.cc
@@ -0,0 +1,154 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <memory>
+
+#include "gtest/gtest.h"
+#include "paddle/fluid/framework/details/computation_op_handle.h"
+#include "paddle/fluid/framework/details/eager_deletion_op_handle.h"
+#include "paddle/fluid/framework/ir/graph.h"
+#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h"
+#include "paddle/fluid/framework/ir/pass.h"
+#include "paddle/fluid/framework/paddle2cinn/build_cinn_pass.h"
+#include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h"
+#include "paddle/fluid/framework/parallel_executor.h"
+#include "paddle/fluid/framework/program_desc.h"
+
+USE_OP_ITSELF(mul);
+USE_OP_ITSELF(elementwise_add);
+
+USE_OP_ITSELF(cinn_launch);
+PD_DECLARE_KERNEL(cinn_launch, CPU, ALL_LAYOUT);
+#ifdef PADDLE_WITH_CUDA
+PD_DECLARE_KERNEL(cinn_launch, GPU, ALL_LAYOUT);
+#endif
+
+namespace paddle::framework {
+
+using Name2VarInfoMap =
+    std::unordered_map<std::string, std::shared_ptr<ir::MemOptVarInfo>>;
+
+static ProgramDesc BuildProgramInsideCinnLaunchOp() {
+  ProgramDesc program;
+  auto* block = program.MutableBlock(0);
+  block->Var("var1");
+  block->Var("var2");
+  block->Var("var3");
+  block->Var("var4");
+  block->Var("var5");
+
+  auto add_op =
+      std::unique_ptr<OpDesc>(new OpDesc("elementwise_add",
+                                         {{"X", {"var1"}}, {"Y", {"var2"}}},
+                                         {{"Out", {"var3"}}},
+                                         {}));
+  block->AppendAllocatedOp(std::move(add_op));
+  auto mul_op = std::unique_ptr<OpDesc>(new OpDesc(
+      "mul", {{"X", {"var3"}}, {"Y", {"var4"}}}, {{"Out", {"var5"}}}, {}));
+  block->AppendAllocatedOp(std::move(mul_op));
+  return program;
+}
+
+static ProgramDesc BuildProgramWithCinnLaunchOp(int64_t compilation_key) {
+  // create a cinn_launch op
+  ProgramDesc program;
+  auto* block = program.MutableBlock(0);
+  block->Var("var1");
+  block->Var("var2");
+  block->Var("var4");
+  block->Var("var5");
+
+  auto cinn_launch_op = std::unique_ptr<OpDesc>(
+      new OpDesc("cinn_launch",
+                 {{"X", {"var1", "var2", "var4"}}},
+                 {{"Out", {"var5"}}},
+                 {{"compilation_key", compilation_key}}));
+  block->AppendAllocatedOp(std::move(cinn_launch_op));
+  return program;
+}
+
+struct TestPassContext {
+  explicit TestPassContext(const ProgramDesc& program) {
+    graph = std::make_unique<ir::Graph>(program);
+    details::BuildStrategy build_strategy;
+    details::ExecutionStrategy exec_strategy;
+    exec_strategy.use_device_ = paddle::platform::kCUDA;
+    executor.reset(new ParallelExecutor(platform::CUDAPlace(0),
+                                        &scope,
+                                        exec_strategy,
+                                        build_strategy,
+                                        graph.get()));
+  }
+
+  Scope scope;
+  std::unique_ptr<ir::Graph> graph;
+  std::unique_ptr<ParallelExecutor> executor;
+};
+
+TEST(ShareMemInfoToSubGraphPassTest, test_main_graph_share_varinfo) {
+  // add a subgraph to CinnCompiler
+  auto subgraph = std::make_unique<ir::Graph>(BuildProgramInsideCinnLaunchOp());
+  subgraph->GetOrInit<Name2VarInfoMap>(
+      paddle2cinn::kMemOptVarInfoFromMainGraph);
+  auto compilation_key =
+      paddle2cinn::CinnCompiler::GetInstance()->AddGraph(std::move(subgraph));
+
+  // build test data and apply pass
+  auto context = std::make_unique<TestPassContext>(
+      BuildProgramWithCinnLaunchOp(compilation_key));
+
+  // check result
+  const ir::Graph& result_subgraph =
+      paddle2cinn::CinnCompiler::GetInstance()->FindGraph(compilation_key);
+  const auto& dst_varinfo_map = result_subgraph.Get<Name2VarInfoMap>(
+      paddle2cinn::kMemOptVarInfoFromMainGraph);
+  ASSERT_EQ(dst_varinfo_map.size(), 4);
+  EXPECT_EQ(dst_varinfo_map.count("var1"), 1);
+  EXPECT_EQ(dst_varinfo_map.count("var5"), 1);
+  EXPECT_EQ(dst_varinfo_map.at("var1").use_count(), 2);
+  EXPECT_EQ(dst_varinfo_map.at("var5").use_count(), 2);
+}
+
+TEST(ShareMemInfoToSubGraphPassTest, test_subgraph_take_varinfo) {
+  // build test data and apply pass
+  auto context =
+      std::make_unique<TestPassContext>(BuildProgramInsideCinnLaunchOp());
+  auto& varinfo_map_shared = context->graph->GetOrInit<Name2VarInfoMap>(
+      paddle2cinn::kMemOptVarInfoFromMainGraph);
+  varinfo_map_shared = {
+      {"var1", std::make_shared<ir::MemOptVarInfo>("var1", 1)},
+      {"var2", std::make_shared<ir::MemOptVarInfo>("var2", 2)},
+  };
+
+  ir::MemOptVarInfoMapList varinfo_maps(1);
+  auto& dst_varinfo_map = varinfo_maps.front();
+  dst_varinfo_map = {{"var1", std::make_shared<ir::MemOptVarInfo>("var1", 1)},
+                     {"var2", std::make_shared<ir::MemOptVarInfo>("var2", 1)},
+                     {"var3", std::make_shared<ir::MemOptVarInfo>("var3", 1)},
+                     {"var4", std::make_shared<ir::MemOptVarInfo>("var4", 1)},
+                     {"var5", std::make_shared<ir::MemOptVarInfo>("var5", 1)}};
+  auto share_pass =
+      ir::PassRegistry::Instance().Get("share_varinfo_into_cinn_pass");
+  share_pass->SetNotOwned(ir::kMemOptVarInfoMapList, &varinfo_maps);
+  share_pass->Apply(context->graph.get());
+
+  // check result
+  ASSERT_NE(dst_varinfo_map.at("var1")->ParentHolder(), nullptr);
+  ASSERT_NE(dst_varinfo_map.at("var2")->ParentHolder(), nullptr);
+  ASSERT_EQ(dst_varinfo_map.at("var3")->ParentHolder(), nullptr);
+  ASSERT_EQ(dst_varinfo_map.at("var4")->ParentHolder(), nullptr);
+  ASSERT_EQ(dst_varinfo_map.at("var5")->ParentHolder(), nullptr);
+}
+
+}  // namespace paddle::framework
diff --git a/test/cpp/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc b/test/cpp/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc
new file mode 100644
index 00000000000000..fc2173f36316d9
--- /dev/null
+++ b/test/cpp/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc
@@ -0,0 +1,235 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "gtest/gtest.h"
+#include "paddle/common/flags.h"
+#include "paddle/fluid/framework/details/multi_devices_helper.h"
+#include "paddle/fluid/framework/ir/graph.h"
+#include "paddle/fluid/framework/ir/graph_helper.h"
+#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h"
+#include "paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass_helper.h"
+#include "paddle/fluid/framework/parallel_executor.h"
+#include "paddle/fluid/framework/program_desc.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+USE_OP_ITSELF(scale);
+USE_OP_ITSELF(elementwise_mul);
+USE_OP_ITSELF(elementwise_add);
+USE_OP_ITSELF(elementwise_add_grad);
+
+PD_DECLARE_KERNEL(scale, CPU, ALL_LAYOUT);
+
+COMMON_DECLARE_double(eager_delete_tensor_gb);
+
+namespace paddle {
+namespace framework {
+namespace p = paddle::platform;
+
+static std::vector<platform::Place> CreatePlaces(size_t num, bool use_cuda) {
+  std::vector<platform::Place> result;
+  result.reserve(num);
+  for (size_t i = 0; i < num; ++i) {
+    if (use_cuda) {
+      result.emplace_back(platform::CUDAPlace(static_cast<int>(i)));
+    } else {
+      result.emplace_back(platform::CPUPlace());
+    }
+  }
+  return result;
+}
+
+static void NewVar(BlockDesc *block,
+                   const std::string &name,
+                   const std::vector<int64_t> &shape) {
+  auto *var_desc = block->Var(name);
+  var_desc->SetShape(shape);
+}
+
+static void AppendOp(BlockDesc *block,
+                     const std::string &type,
+                     VariableNameMap inputs,
+                     VariableNameMap outputs,
+                     AttributeMap attrs) {
+  auto &op_info = OpInfoMap::Instance().Get(type);
+  if (op_info.Checker()) {
+    op_info.Checker()->Check(&attrs);
+  }
+
+  auto *op = block->AppendOp();
+  op->SetType(type);
+  for (auto &pair : inputs) {
+    op->SetInput(pair.first, pair.second);
+  }
+
+  for (auto &pair : outputs) {
+    op->SetOutput(pair.first, pair.second);
+    for (auto &var_name : pair.second) {
+      if (!block->FindVarRecursive(var_name)) {
+        NewVar(block, var_name, {});
+      }
+    }
+  }
+
+  op->SetAttrMap(attrs);
+  op->InferVarType(block);
+  op->InferShape(*block);
+}
+
+class ReferenceCountPassTestHelper {
+ public:
+  ReferenceCountPassTestHelper(const ProgramDesc &program, bool use_cuda)
+      : graph_(program) {
+    details::BuildStrategy build_strategy;
+    build_strategy.enable_inplace_ = false;
+    build_strategy.memory_optimize_ = false;
+    FLAGS_eager_delete_tensor_gb = -1;
+
+    details::ExecutionStrategy exec_strategy;
+    exec_strategy.use_device_ = use_cuda ? p::kCUDA : p::kCPU;
+
+    executor_ = std::make_unique<ParallelExecutor>(CreatePlaces(1, use_cuda),
+                                                   std::vector<std::string>(),
+                                                   "",
+                                                   &scope_,
+                                                   std::vector<Scope *>(),
+                                                   exec_strategy,
+                                                   build_strategy,
+                                                   &graph_);
+
+    auto ref_cnt_pass =
+        ir::PassRegistry::Instance().Get("reference_count_pass");
+    ref_cnt_pass->SetNotOwned(ir::kMemOptVarInfoMapList, &mem_opt_var_infos_);
+    ref_cnt_pass->SetNotOwned(ir::kLastLiveOpsOfVars, &last_live_ops_of_vars_);
+    ref_cnt_pass->Apply(&const_cast<ir::Graph &>(executor_->Graph()));
+  }
+
+  bool IsLastLivedOps(const std::string &name,
+                      std::vector<std::string> ops) const {
+    std::sort(ops.begin(), ops.end());
+    return LastLivedOpTypes(name) == ops;
+  }
+
+  std::vector<OperatorBase *> LastLivedOps(const std::string &name) const {
+    auto &ops = last_live_ops_of_vars_[0].at(name).ops();
+    std::vector<OperatorBase *> ret;
+    ret.reserve(ops.size());
+    for (auto *op : ops) {
+      ret.emplace_back(op->GetOp());
+    }
+    return ret;
+  }
+
+ private:
+  std::vector<std::string> LastLivedOpTypes(const std::string &name) const {
+    auto iter = last_live_ops_of_vars_[0].find(name);
+    std::vector<std::string> ret;
+    if (iter != last_live_ops_of_vars_[0].end()) {
+      for (auto *op : iter->second.ops()) {
+        ret.emplace_back(op->GetOp()->Type());
+      }
+    }
+    std::sort(ret.begin(), ret.end());
+    return ret;
+  }
+
+ private:
+  ir::Graph graph_;
+  Scope scope_;
+  std::unique_ptr<ParallelExecutor> executor_;
+
+  ir::MemOptVarInfoMapList mem_opt_var_infos_;
+  std::vector<ir::LastLiveOpsOfVars> last_live_ops_of_vars_;
+};
+
+TEST(test_reference_count_pass, test_no_need_buffer_var_shrink) {
+  ProgramDesc program;
+  auto *block = program.MutableBlock(0);
+  std::vector<int64_t> shape{{3, 4, 5}};
+
+  /**
+   * The network is:
+   *
+   * x0 = fluid.layer.data(...)
+   * x1 = scale(x0, scale=1)
+   * x2 = scale(x1, scale=2)
+   * x3 = elementwise_mul(x1, x2)
+   * scale(x3, out=x1, scale=3) # produce a new version of x1
+   * x4, x5 = elementwise_add_grad(dout=x3, x=x2, y=x1)
+   * x6 = elementwise_mul(x4, x5)
+   * x7 = elementwise_add(x5, x5)
+   */
+  std::string x0 = "x0";
+  std::string x1 = "x1";
+  std::string x2 = "x2";
+  std::string x3 = "x3";
+  std::string x4 = "x4";
+  std::string x5 = "x5";
+  std::string x6 = "x6";
+  std::string x7 = "x7";
+
+  NewVar(block, x0, shape);
+  AppendOp(block, "scale", {{"X", {x0}}}, {{"Out", {x1}}}, {{"scale", 1.0f}});
+  AppendOp(block, "scale", {{"X", {x1}}}, {{"Out", {x2}}}, {{"scale", 2.0f}});
+  AppendOp(block,
+           "elementwise_mul",
+           {{"X", {x1}}, {"Y", {x2}}},
+           {{"Out", {x3}}},
+           {});
+  AppendOp(block, "scale", {{"X", {x3}}}, {{"Out", {x1}}}, {{"scale", 3.0f}});
+  AppendOp(block,
+           "elementwise_add_grad",
+           {{GradVarName("Out"), {x3}}, {"X", {x2}}, {"Y", {x1}}},
+           {{GradVarName("X"), {x4}}, {GradVarName("Y"), {x5}}},
+           {});
+  AppendOp(block,
+           "elementwise_mul",
+           {{"X", {x4}}, {"Y", {x5}}},
+           {{"Out", {x6}}},
+           {});
+  AppendOp(block,
+           "elementwise_add",
+           {{"X", {x5}}, {"Y", {x5}}},
+           {{"Out", {x7}}},
+           {});
+
+  std::vector<bool> use_cuda_list{false};
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+  use_cuda_list.push_back(true);
+#endif
+  for (auto use_cuda : use_cuda_list) {
+    ReferenceCountPassTestHelper helper(program, use_cuda);
+    ASSERT_TRUE(helper.IsLastLivedOps(x0, {"scale"}));
+    ASSERT_EQ(PADDLE_GET_CONST(float,
+                               helper.LastLivedOps(x0)[0]->Attrs().at("scale")),
+              1.0f);
+
+    ASSERT_TRUE(helper.IsLastLivedOps(x1, {"scale"}));
+    ASSERT_EQ(PADDLE_GET_CONST(float,
+                               helper.LastLivedOps(x1)[0]->Attrs().at("scale")),
+              3.0f);
+
+    ASSERT_TRUE(helper.IsLastLivedOps(x2, {"elementwise_mul"}));
+    ASSERT_TRUE(helper.IsLastLivedOps(x3, {"elementwise_add_grad"}));
+
+    ASSERT_TRUE(helper.IsLastLivedOps(x4, {"elementwise_mul"}));
+    ASSERT_TRUE(
+        helper.IsLastLivedOps(x5, {"elementwise_mul", "elementwise_add"}));
+
+    ASSERT_TRUE(helper.IsLastLivedOps(x6, {"elementwise_mul"}));
+    ASSERT_TRUE(helper.IsLastLivedOps(x7, {"elementwise_add"}));
+  }
+}
+
+}  // namespace framework
+}  // namespace paddle

From da7a09f07f2481a0aeffa91f88baf6301a7fec6f Mon Sep 17 00:00:00 2001
From: Liyulingyue <852433440@qq.com>
Date: Fri, 23 Feb 2024 05:34:57 +0800
Subject: [PATCH 3/6] add TEST_API

---
 paddle/fluid/framework/parallel_executor.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h
index 32514089763c6e..29df757d17c8a0 100644
--- a/paddle/fluid/framework/parallel_executor.h
+++ b/paddle/fluid/framework/parallel_executor.h
@@ -52,14 +52,14 @@ class ParallelExecutor {
   DISABLE_COPY_AND_ASSIGN(ParallelExecutor);
 
  public:
-  explicit ParallelExecutor(const std::vector<platform::Place> &places,
-                            const std::vector<std::string> &bcast_vars,
-                            const std::string &loss_var_name,
-                            Scope *scope,
-                            const std::vector<Scope *> &local_scopes,
-                            const ExecutionStrategy &exec_strategy,
-                            const BuildStrategy &build_strategy,
-                            ir::Graph *graph);
+  TEST_API explicit ParallelExecutor(const std::vector<platform::Place> &places,
+                                     const std::vector<std::string> &bcast_vars,
+                                     const std::string &loss_var_name,
+                                     Scope *scope,
+                                     const std::vector<Scope *> &local_scopes,
+                                     const ExecutionStrategy &exec_strategy,
+                                     const BuildStrategy &build_strategy,
+                                     ir::Graph *graph);
 
   // NOTE(Aurelius84): Construct a PE running on single device for @to_static
   explicit ParallelExecutor(const platform::Place &place,
@@ -68,7 +68,7 @@ class ParallelExecutor {
                             const BuildStrategy &build_strategy,
                             ir::Graph *graph);
 
-  ~ParallelExecutor();
+  TEST_API ~ParallelExecutor();
 
   size_t DeviceCount() const;
 
@@ -98,7 +98,7 @@ class ParallelExecutor {
   void ResetOpHandleScopeMapOfGraphs(
       const std::unordered_map<Scope *, Scope *> &scope_map);
 
-  const ir::Graph &Graph() const;
+  TEST_API const ir::Graph &Graph() const;
   void PrepareVariables(Scope *scope);
 
   void SkipMemoryReuse(size_t scope_idx,

From 5a323087dd99e768e79d855ea2f014efec8ad984 Mon Sep 17 00:00:00 2001
From: Liyulingyue <852433440@qq.com>
Date: Fri, 23 Feb 2024 17:36:35 +0800
Subject: [PATCH 4/6] delete use_op_itself

---
 .../test_reference_count_pass_last_lived_ops.cc            | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc b/paddle/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc
index fc2173f36316d9..eeec6fd8788d41 100644
--- a/paddle/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc
+++ b/paddle/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc
@@ -23,13 +23,6 @@
 #include "paddle/fluid/framework/program_desc.h"
 #include "paddle/phi/core/kernel_registry.h"
 
-USE_OP_ITSELF(scale);
-USE_OP_ITSELF(elementwise_mul);
-USE_OP_ITSELF(elementwise_add);
-USE_OP_ITSELF(elementwise_add_grad);
-
-PD_DECLARE_KERNEL(scale, CPU, ALL_LAYOUT);
-
 COMMON_DECLARE_double(eager_delete_tensor_gb);
 
 namespace paddle {

From 463b81249f8449776a121ba8a6c234324d1f9504 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?=
 <83450930+Liyulingyue@users.noreply.github.com>
Date: Fri, 23 Feb 2024 22:26:45 +0800
Subject: [PATCH 5/6] Update test_reference_count_pass_last_lived_ops.cc

---
 .../test_reference_count_pass_last_lived_ops.cc            | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/test/cpp/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc b/test/cpp/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc
index fc2173f36316d9..eeec6fd8788d41 100644
--- a/test/cpp/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc
+++ b/test/cpp/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc
@@ -23,13 +23,6 @@
 #include "paddle/fluid/framework/program_desc.h"
 #include "paddle/phi/core/kernel_registry.h"
 
-USE_OP_ITSELF(scale);
-USE_OP_ITSELF(elementwise_mul);
-USE_OP_ITSELF(elementwise_add);
-USE_OP_ITSELF(elementwise_add_grad);
-
-PD_DECLARE_KERNEL(scale, CPU, ALL_LAYOUT);
-
 COMMON_DECLARE_double(eager_delete_tensor_gb);
 
 namespace paddle {

From e46ad171975d72beba44d9a363b91c703a5eae8c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?=
 <83450930+Liyulingyue@users.noreply.github.com>
Date: Sat, 24 Feb 2024 07:35:14 +0800
Subject: [PATCH 6/6] Update CMakeLists.txt

---
 .../fluid/framework/ir/memory_optimize_pass/CMakeLists.txt  | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test/cpp/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt b/test/cpp/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt
index 585545936cb165..841ebd7c0fcc0c 100644
--- a/test/cpp/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt
+++ b/test/cpp/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt
@@ -6,3 +6,9 @@ endif()
 
 paddle_test(test_reference_count_pass_last_lived_ops SRCS
             test_reference_count_pass_last_lived_ops.cc DEPS common)
+
+if(WITH_ONNXRUNTIME AND WIN32)
+  # Copy onnxruntime for some c++ test in Windows, since the test will
+  # be build only in CI, so suppose the generator in Windows is Ninja.
+  copy_onnx(test_reference_count_pass_last_lived_ops)
+endif()