openvinotoolkit · eunwoosh · Oct 10, 2024 · Oct 10, 2024 · Oct 10, 2024 · Oct 10, 2024
@@ -112,8 +112,14 @@
                 break
 
         if available_bs == 0:
-            msg = "Current device can't train model even with 2."
-            raise RuntimeError(msg)
+            if oom:
+                msg = "Current device can't train model even with 2."
+                raise RuntimeError(msg)
+            logger.warning(
+                "Even with a batch size of 2, most of the memory is used, "
+                "which could cause the training to fail midway.",
+            )
+            available_bs = 2
 
         return available_bs
 
@@ -141,8 +147,14 @@
         if oom or bs_mem_usage > self._mem_upper_bound:
             self._default_bs -= 2
             if self._default_bs <= 0:
-                msg = "Current device can't train model even with 2."
-                raise RuntimeError(msg)
+                if oom:
+                    msg = "Current device can't train model even with 2."
+                    raise RuntimeError(msg)
+                logger.warning(
+                    "Even with a batch size of 2, most of the memory is used, "
+                    "which could cause the training to fail midway.",
+                )
+                return 2
 
             return self.auto_decrease_batch_size()
 

@@ -99,12 +99,19 @@ def test_auto_decrease_batch_size(self):
         assert adapted_bs == 80
 
     def test_find_max_usable_bs_gpu_memory_too_small(self):
-        mock_train_func = self.get_mock_train_func(cuda_oom_bound=4, max_runnable_bs=1)
+        mock_train_func = self.get_mock_train_func(cuda_oom_bound=1, max_runnable_bs=1)
 
         bs_search_algo = BsSearchAlgo(mock_train_func, 128, 1000)
         with pytest.raises(RuntimeError):
             bs_search_algo.auto_decrease_batch_size()
 
+    def test_auto_decrease_batch_size_bs2_not_oom_but_most_mem(self):
+        """Batch size 2 doesn't make oom but use most of memory."""
+        mock_train_func = self.get_mock_train_func(cuda_oom_bound=2, max_runnable_bs=1)
+
+        bs_search_algo = BsSearchAlgo(mock_train_func, 128, 1000)
+        assert bs_search_algo.auto_decrease_batch_size() == 2
+
     @pytest.mark.parametrize(
         ("max_runnable_bs", "max_bs", "expected_bs"),
         [
@@ -126,12 +133,19 @@ def test_find_big_enough_batch_size(self, max_runnable_bs, max_bs, expected_bs):
             assert adapted_bs == expected_bs
 
     def test_find_big_enough_batch_size_gpu_memory_too_small(self):
-        mock_train_func = self.get_mock_train_func(cuda_oom_bound=4, max_runnable_bs=1)
+        mock_train_func = self.get_mock_train_func(cuda_oom_bound=1, max_runnable_bs=1)
 
         bs_search_algo = BsSearchAlgo(mock_train_func, 128, 1000)
         with pytest.raises(RuntimeError):
             bs_search_algo.find_big_enough_batch_size()
 
+    def test_find_big_enough_batch_size_bs2_not_oom_but_most_mem(self):
+        """Batch size 2 doesn't make oom but use most of memory."""
+        mock_train_func = self.get_mock_train_func(cuda_oom_bound=2, max_runnable_bs=1)
+
+        bs_search_algo = BsSearchAlgo(mock_train_func, 2, 1000)
+        assert bs_search_algo.find_big_enough_batch_size() == 2
+
     def test_find_big_enough_batch_size_gradient_zero(self):
         def mock_train_func(batch_size) -> int:
             if batch_size > 1000: