update

NVIDIA-Merlin · Dec 8, 2022 · 62d4ee0 · 62d4ee0
1 parent 9df1677
commit 62d4ee0
Showing 1 changed file with 41 additions and 43 deletions.
diff --git a/examples/02-Multi-GPU-Tensorflow-with-Horovod.ipynb b/examples/02-Multi-GPU-Tensorflow-with-Horovod.ipynb
@@ -76,8 +76,8 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "downloading ml-25m.zip: 262MB [00:06, 43.0MB/s]                            \n",
-      "unzipping files: 100%|██████████| 8/8 [00:08<00:00,  1.12s/files]\n"
+      "downloading ml-25m.zip: 262MB [00:07, 36.0MB/s]                            \n",
+      "unzipping files: 100%|██████████| 8/8 [00:08<00:00,  1.08s/files]\n"
      ]
     }
    ],
@@ -108,7 +108,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 4,
    "id": "c65e5ef6",
    "metadata": {},
    "outputs": [],
@@ -144,7 +144,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 5,
    "id": "9fbe17a7",
    "metadata": {},
    "outputs": [
@@ -186,8 +186,6 @@
     "\n",
     "DATA_PATH = '/workspace'\n",
     "\n",
-    "\n",
-    "\n",
     "dataset = Dataset(glob(DATA_PATH + f'/train_{hvd.local_rank()}.parquet'), part_size=\"100MB\")\n",
     "loader = Loader(dataset, batch_size=64*1024)\n",
     "\n",
@@ -233,7 +231,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 6,
    "id": "ec5e9b7f",
    "metadata": {
     "scrolled": true
@@ -243,39 +241,15 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[1,1]<stderr>:2022-12-08 06:00:40.084035: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
-      "[1,1]<stderr>:To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "[1,1]<stderr>:2022-12-08 06:00:40.160948: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n",
-      "[1,1]<stderr>:2022-12-08 06:00:40.161277: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 16255 MB memory:  -> device: 0, name: Tesla V100-SXM2-32GB-LS, pci bus id: 0000:89:00.0, compute capability: 7.0\n",
-      "[1,0]<stderr>:2022-12-08 06:00:40.355181: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
+      "[1,0]<stderr>:2022-12-08 06:58:30.501381: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
       "[1,0]<stderr>:To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "[1,0]<stderr>:2022-12-08 06:00:40.461020: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n",
-      "[1,0]<stderr>:2022-12-08 06:00:40.461327: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 16255 MB memory:  -> device: 0, name: Tesla V100-SXM2-32GB-LS, pci bus id: 0000:86:00.0, compute capability: 7.0\n",
-      "[1,1]<stderr>:2022-12-08 06:00:45.524363: W tensorflow/core/common_runtime/forward_type_inference.cc:231] Type inference failed. This indicates an invalid graph that escaped type checking. Error message: INVALID_ARGUMENT: expected compatible input types, but input 1:\n",
-      "[1,1]<stderr>:type_id: TFT_OPTIONAL\n",
-      "[1,1]<stderr>:args {\n",
-      "[1,1]<stderr>:  type_id: TFT_PRODUCT\n",
-      "[1,1]<stderr>:  args {\n",
-      "[1,1]<stderr>:    type_id: TFT_TENSOR\n",
-      "[1,1]<stderr>:    args {\n",
-      "[1,1]<stderr>:      type_id: TFT_BOOL\n",
-      "[1,1]<stderr>:    }\n",
-      "[1,1]<stderr>:  }\n",
-      "[1,1]<stderr>:}\n",
-      "[1,1]<stderr>: is neither a subtype nor a supertype of the combined inputs preceding it:\n",
-      "[1,1]<stderr>:type_id: TFT_OPTIONAL\n",
-      "[1,1]<stderr>:args {\n",
-      "[1,1]<stderr>:  type_id: TFT_PRODUCT\n",
-      "[1,1]<stderr>:  args {\n",
-      "[1,1]<stderr>:    type_id: TFT_TENSOR\n",
-      "[1,1]<stderr>:    args {\n",
-      "[1,1]<stderr>:      type_id: TFT_LEGACY_VARIANT\n",
-      "[1,1]<stderr>:    }\n",
-      "[1,1]<stderr>:  }\n",
-      "[1,1]<stderr>:}\n",
-      "[1,1]<stderr>:\n",
-      "[1,1]<stderr>:\twhile inferring type of node 'mean_squared_error/cond/output/_11'\n",
-      "[1,0]<stderr>:2022-12-08 06:00:45.837188: W tensorflow/core/common_runtime/forward_type_inference.cc:231] Type inference failed. This indicates an invalid graph that escaped type checking. Error message: INVALID_ARGUMENT: expected compatible input types, but input 1:\n",
+      "[1,0]<stderr>:2022-12-08 06:58:30.555187: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n",
+      "[1,0]<stderr>:2022-12-08 06:58:30.555454: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 16255 MB memory:  -> device: 0, name: Tesla V100-SXM2-32GB-LS, pci bus id: 0000:85:00.0, compute capability: 7.0\n",
+      "[1,1]<stderr>:2022-12-08 06:58:30.575717: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX\n",
+      "[1,1]<stderr>:To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "[1,1]<stderr>:2022-12-08 06:58:30.632564: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n",
+      "[1,1]<stderr>:2022-12-08 06:58:30.632832: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 16255 MB memory:  -> device: 0, name: Tesla V100-SXM2-32GB-LS, pci bus id: 0000:86:00.0, compute capability: 7.0\n",
+      "[1,0]<stderr>:2022-12-08 06:58:35.010671: W tensorflow/core/common_runtime/forward_type_inference.cc:231] Type inference failed. This indicates an invalid graph that escaped type checking. Error message: INVALID_ARGUMENT: expected compatible input types, but input 1:\n",
       "[1,0]<stderr>:type_id: TFT_OPTIONAL\n",
       "[1,0]<stderr>:args {\n",
       "[1,0]<stderr>:  type_id: TFT_PRODUCT\n",
@@ -299,13 +273,37 @@
       "[1,0]<stderr>:}\n",
       "[1,0]<stderr>:\n",
       "[1,0]<stderr>:\twhile inferring type of node 'mean_squared_error/cond/output/_11'\n",
-      "  6/191 [..............................] - ETA: 2s - loss: 13.6364   [1,0]<stderr>:/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (1.26.12) or chardet (3.0.4) doesn't match a supported version!\n",
+      "[1,1]<stderr>:2022-12-08 06:58:35.218048: W tensorflow/core/common_runtime/forward_type_inference.cc:231] Type inference failed. This indicates an invalid graph that escaped type checking. Error message: INVALID_ARGUMENT: expected compatible input types, but input 1:\n",
+      "[1,1]<stderr>:type_id: TFT_OPTIONAL\n",
+      "[1,1]<stderr>:args {\n",
+      "[1,1]<stderr>:  type_id: TFT_PRODUCT\n",
+      "[1,1]<stderr>:  args {\n",
+      "[1,1]<stderr>:    type_id: TFT_TENSOR\n",
+      "[1,1]<stderr>:    args {\n",
+      "[1,1]<stderr>:      type_id: TFT_BOOL\n",
+      "[1,1]<stderr>:    }\n",
+      "[1,1]<stderr>:  }\n",
+      "[1,1]<stderr>:}\n",
+      "[1,1]<stderr>: is neither a subtype nor a supertype of the combined inputs preceding it:\n",
+      "[1,1]<stderr>:type_id: TFT_OPTIONAL\n",
+      "[1,1]<stderr>:args {\n",
+      "[1,1]<stderr>:  type_id: TFT_PRODUCT\n",
+      "[1,1]<stderr>:  args {\n",
+      "[1,1]<stderr>:    type_id: TFT_TENSOR\n",
+      "[1,1]<stderr>:    args {\n",
+      "[1,1]<stderr>:      type_id: TFT_LEGACY_VARIANT\n",
+      "[1,1]<stderr>:    }\n",
+      "[1,1]<stderr>:  }\n",
+      "[1,1]<stderr>:}\n",
+      "[1,1]<stderr>:\n",
+      "[1,1]<stderr>:\twhile inferring type of node 'mean_squared_error/cond/output/_11'\n",
+      "  6/191 [..............................] - ETA: 2s - loss: 13.6433   [1,0]<stderr>:/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (1.26.12) or chardet (3.0.4) doesn't match a supported version!\n",
       "[1,0]<stderr>:  warnings.warn(\"urllib3 ({}) or chardet ({}) doesn't match a supported \"\n",
-      "[1,0]<stderr>:WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0096s vs `on_train_batch_end` time: 0.1717s). Check your callbacks.\n",
+      "[1,0]<stderr>:WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0094s vs `on_train_batch_end` time: 0.1490s). Check your callbacks.\n",
       "[1,1]<stderr>:/usr/lib/python3/dist-packages/requests/__init__.py:89: RequestsDependencyWarning: urllib3 (1.26.12) or chardet (3.0.4) doesn't match a supported version!\n",
       "[1,1]<stderr>:  warnings.warn(\"urllib3 ({}) or chardet ({}) doesn't match a supported \"\n",
-      "[1,1]<stderr>:WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0093s vs `on_train_batch_end` time: 0.1719s). Check your callbacks.\n",
-      "191/191 [==============================] - 10s 14ms/step - loss: 3.3301stdout>[1,0]<stdout[1,0]<stdout[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>\n"
+      "[1,1]<stderr>:WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0093s vs `on_train_batch_end` time: 0.1489s). Check your callbacks.\n",
+      "191/191 [==============================] - 8s 12ms/step - loss: 3.3301<stdout>[1,0]<stdout[1,0]<stdout[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>[1,0]<stdout>\n"
      ]
     }
    ],