Clean fluid APIs in distributed and fleet files (#48851)

* Fix bug of reduce_sum op. When input.numel() > INT32_MAX, its result is wrong. * Remove climits. * Clean fluid API in paddle/distributed and paddle/fleetx folders. Include following files: python/paddle/distributed/__init__.py python/paddle/distributed/collective.py python/paddle/distributed/fleet/utils/fs.py python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py python/paddle/distributed/fleet/utils/hybrid_parallel_util.py python/paddle/distributed/fleet/utils/internal_storage.py python/paddle/distributed/launch/context/device.py python/paddle/distributed/parallel.py python/paddle/distributed/parallel_with_gloo.py python/paddle/distributed/spawn.py python/paddle/framework/__init__.py To be mentioned, 'paddle.fluid.dygraph.parallel.ParallelEnv' and 'fluid.framework.core' keeps unchanged in those files. ParallelEnv is used by paddle.fluid.dygraph.parallel.DataParallel. However, APIs in paddle.fluid.dygraph.parallel can't be migrated to paddle.distributed, as there exists cyclic import dependencies in modules like paddle.static, paddle.tensor. And 'fluid.framework.core' will be changed to import framework.core after fluid.core is transmitted. * Change TODO authors.
PaddlePaddle · Dec 8, 2022 · 911d6bb · 911d6bb
1 parent ea9e408
commit 911d6bb
Show file tree

Hide file tree

Showing 11 changed files with 61 additions and 28 deletions.
diff --git a/python/paddle/distributed/__init__.py b/python/paddle/distributed/__init__.py
@@ -64,6 +64,9 @@
 from .entry_attr import CountFilterEntry  # noqa: F401
 from .entry_attr import ShowClickEntry  # noqa: F401
 
+# (TODO: GhostScreaming) It needs migration of ParallelEnv. However,
+# it's hard to migrate APIs in paddle.fluid.dygraph.parallel completely.
+# It will be replaced later.
 from paddle.fluid.dygraph.parallel import ParallelEnv  # noqa: F401
 
 from . import cloud_utils  # noqa: F401

diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py
@@ -15,9 +15,11 @@
 import datetime
 
 import paddle
+
+# (TODO: GhostScreaming) It will be removed later.
 import paddle.fluid.core as core
+from paddle.framework import _non_static_mode, in_dygraph_mode
 
-from ..fluid.framework import _non_static_mode, in_dygraph_mode
 from .communication.group import Group, _add_new_group, is_initialized
 from .fleet.layers.mpu.mp_ops import _c_concat  # noqa: F401
 from .fleet.layers.mpu.mp_ops import _c_identity  # noqa: F401

diff --git a/python/paddle/distributed/fleet/utils/fs.py b/python/paddle/distributed/fleet/utils/fs.py
@@ -20,6 +20,7 @@
 import shutil
 import time
 
+# (TODO: GhostScreaming) It will be removed later.
 from paddle.fluid import core
 
 from .log_util import logger

diff --git a/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py b/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py
@@ -17,8 +17,10 @@
 import numpy as np
 
 import paddle.distributed.fleet as fleet
+
+# (TODO: GhostScreaming) It will be removed later.
 import paddle.fluid.core as core
-from paddle.fluid.framework import Block, Program, _non_static_mode
+from paddle.framework import Block, Program, _non_static_mode
 
 
 class HybridParallelInferenceHelper:

diff --git a/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py b/python/paddle/distributed/fleet/utils/hybrid_parallel_util.py
@@ -14,13 +14,16 @@
 
 import paddle
 from paddle import framework
+
+# (TODO: GhostScreaming) It will be removed later.
 from paddle.fluid import core
-from paddle.fluid.dygraph.parallel import (
+from paddle.framework import (
+    _in_legacy_dygraph,
     _split_tensors,
     build_groups,
+    in_dygraph_mode,
     sync_params_buffers,
 )
-from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
 
 from .log_util import logger
 

diff --git a/python/paddle/distributed/fleet/utils/internal_storage.py b/python/paddle/distributed/fleet/utils/internal_storage.py
@@ -25,7 +25,9 @@
 import numpy as np
 
 import paddle
-import paddle.fluid as fluid
+from paddle import framework
+
+# (TODO: GhostScreaming) It will be removed later.
 from paddle.fluid import core
 
 from ..meta_parallel.sharding.sharding_utils import Type, device_guard
@@ -111,7 +113,7 @@ def to(self, device, dtype=None, keep_alignment=True):
         if keep_alignment:
             self._array_params()
 
-    @fluid.dygraph.no_grad
+    @framework.no_grad()
     def add_rank_params(self, trainable_params, param2align, convert_gpu=True):
         """
         Add new parameters to the InternalStorage. Params becomes a view of this InternalStorage buffer.
@@ -145,7 +147,7 @@ def add_rank_params(self, trainable_params, param2align, convert_gpu=True):
             self._params.append(param)
             self._param_ids.append(id(param))
 
-    @fluid.dygraph.no_grad
+    @framework.no_grad()
     def _add_param_as_view(self, param, align, convert_gpu=True):
 
         assert (
@@ -185,7 +187,7 @@ def _add_param_as_view(self, param, align, convert_gpu=True):
         self._fill = offset
         return p_shape
 
-    @fluid.dygraph.no_grad
+    @framework.no_grad()
     def _convert_buffer(self, param, p_shape, align):
 
         var_end = self._fill + np.prod(p_shape)
@@ -199,7 +201,7 @@ def _convert_buffer(self, param, p_shape, align):
 
         self._fill = offset
 
-    @fluid.dygraph.no_grad
+    @framework.no_grad()
     def _array_params(self):
         """
         Given the parameters which have been registered previously, rebuild the whole InternalStorage.
@@ -261,7 +263,7 @@ def to(self, device, dtype=None, keep_alignment=True):
         if keep_alignment:
             self._array_grads()
 
-    @fluid.dygraph.no_grad
+    @framework.no_grad()
     def add_grad(self, param, align):
         """
         Add a new parameter gradient to the InternalStorage. Param.grad becomes a view of this InternalStorage buffer.
@@ -275,7 +277,7 @@ def add_grad(self, param, align):
         self._params.append(param)
         self._param_ids.append(id(param))
 
-    @fluid.dygraph.no_grad
+    @framework.no_grad()
     def manumal_relase(self):
         """
         Release the buffer from InternalStorage. The InternalStorage will need to be rebuilt before use.
@@ -291,7 +293,7 @@ def manumal_relase(self):
             self.params_checked_in = 0
             self._release = True
 
-    @fluid.dygraph.no_grad
+    @framework.no_grad()
     def rebuild(self):
         """
         Given the parameter gradients which have been registered previously, rebuild the whole InternalStorage.
@@ -305,7 +307,7 @@ def rebuild(self):
 
             self._release = False
 
-    @fluid.dygraph.no_grad
+    @framework.no_grad()
     def _array_grads(self):
         """
         Given the parameters gradients which have been registered previously, rebuild the whole InternalStorage.
@@ -315,7 +317,7 @@ def _array_grads(self):
             for p in self._params:
                 self._add_grad_as_view(p, self._parm2align[p.name])
 
-    @fluid.dygraph.no_grad
+    @framework.no_grad()
     def _add_grad_as_view(self, param, align):
         assert (
             np.prod(self.buffer.shape) > 0

diff --git a/python/paddle/distributed/launch/context/device.py b/python/paddle/distributed/launch/context/device.py
@@ -14,9 +14,11 @@
 
 import os
 
-import paddle.fluid as fluid
 from paddle.device import get_available_custom_device
 
+# (TODO: GhostScreaming) It will be removed later.
+from paddle.fluid import core
+
 
 class DeviceType:
     CPU = 'cpu'
@@ -148,25 +150,25 @@ def get_custom_devices_count(device_type):
             )
             if visible_devices_str in os.environ:
                 visible_devices = os.getenv(visible_devices_str)
-        elif fluid.core.is_compiled_with_cuda():
+        elif core.is_compiled_with_cuda():
             dev._dtype = DeviceType.GPU
-            num = fluid.core.get_cuda_device_count()
+            num = core.get_cuda_device_count()
             visible_devices = os.getenv("CUDA_VISIBLE_DEVICES")
-        elif fluid.core.is_compiled_with_xpu():
+        elif core.is_compiled_with_xpu():
             dev._dtype = DeviceType.XPU
-            num = fluid.core.get_xpu_device_count()
+            num = core.get_xpu_device_count()
             visible_devices = os.getenv("XPU_VISIBLE_DEVICES")
-        elif fluid.core.is_compiled_with_npu():
+        elif core.is_compiled_with_npu():
             dev._dtype = DeviceType.NPU
-            num = fluid.core.get_npu_device_count()
+            num = core.get_npu_device_count()
             visible_devices = os.getenv("ASCEND_VISIBLE_DEVICES")
-        elif fluid.core.is_compiled_with_mlu():
+        elif core.is_compiled_with_mlu():
             dev._dtype = DeviceType.MLU
-            num = fluid.core.get_mlu_device_count()
+            num = core.get_mlu_device_count()
             visible_devices = os.getenv("MLU_VISIBLE_DEVICES")
-        elif fluid.core.is_compiled_with_ipu():
+        elif core.is_compiled_with_ipu():
             dev._dtype = DeviceType.IPU
-            num = fluid.core.get_ipu_device_count()
+            num = core.get_ipu_device_count()
             # For IPUs, 'labels' is a list which contains the available numbers of IPU devices.
             dev._labels = [str(x) for x in range(0, num + 1)]
             return dev

diff --git a/python/paddle/distributed/parallel.py b/python/paddle/distributed/parallel.py
@@ -38,10 +38,18 @@
 from paddle.distributed.fleet.launch_utils import check_backend
 
 # deprecated module import
+# (TODO: GhostScreaming) It will be removed later.
 from paddle.fluid import core
-from paddle.fluid.dygraph import parallel_helper
+
+# (TODO: GhostScreaming) It will be removed later.
 from paddle.fluid.dygraph.parallel import ParallelEnv
-from paddle.fluid.framework import _set_expected_place, in_dygraph_mode
+
+# (TODO: GhostScreaming) It will be removed later.
+from paddle.framework import (
+    _set_expected_place,
+    in_dygraph_mode,
+    parallel_helper,
+)
 
 __all__ = []
 

diff --git a/python/paddle/distributed/parallel_with_gloo.py b/python/paddle/distributed/parallel_with_gloo.py
@@ -20,6 +20,7 @@
 )
 
 # deprecated module import
+# (TODO: GhostScreaming) It will be removed later.
 from paddle.fluid import core
 
 __all__ = []

diff --git a/python/paddle/distributed/spawn.py b/python/paddle/distributed/spawn.py
@@ -37,8 +37,9 @@
 )
 
 # deprecated module import
+# (TODO: GhostScreaming) It will be removed later.
 from paddle.fluid import core
-from paddle.fluid.framework import set_flags
+from paddle.framework import set_flags
 
 __all__ = []
 

diff --git a/python/paddle/framework/__init__.py b/python/paddle/framework/__init__.py
@@ -68,5 +68,13 @@
 from ..fluid.framework import _global_flags  # noqa: F401
 from ..fluid.framework import _apply_pass  # noqa: F401
 from ..fluid.framework import switch_main_program
+from ..fluid.framework import _set_expected_place  # noqa: F401
+from ..fluid.framework import Block, Program  # noqa: F401
+from ..fluid.dygraph import parallel_helper  # noqa: F401
+from ..fluid.dygraph.parallel import (
+    _split_tensors,
+    build_groups,
+    sync_params_buffers,
+)
 
 __all__ = []