Skip to content

Commit 34e9a4a

Browse files
authored
Upgrade cuML and cuDF (#1395)
upgrade RAPIDS to 24.4.x (cuML and cuDF). - looks like installing via mamba was not working, installed using conda with mamba package solver (which is faster than plan conda). - Added test for cuML and cuDF, these packages were reported as "broken" for last the 6 months. - Skip certain test if running on a p100 gpu Going forward, rapids will not be compatible with p100 GPUs, that's a problem for another day https://chat.kaggle.net/kaggle/pl/85tczsc4w3nhijkd1ftryxr7yo b/296444923 b/341938540
1 parent 873dbab commit 34e9a4a

File tree

6 files changed

+72
-20
lines changed

6 files changed

+72
-20
lines changed

Dockerfile.tmpl

+6-12
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ ENV PROJ_LIB=/opt/conda/share/proj
9999
# the remaining pip commands: https://www.anaconda.com/using-pip-in-a-conda-environment/
100100
RUN conda config --add channels nvidia && \
101101
conda config --add channels rapidsai && \
102+
conda config --set solver libmamba && \
102103
# b/299991198 remove curl/libcurl install once DLVM base image includes version >= 7.86
103104
conda install -c conda-forge mamba curl libcurl && \
104105
# Base image channel order: conda-forge (highest priority), defaults.
@@ -107,24 +108,17 @@ RUN conda config --add channels nvidia && \
107108
/tmp/clean-layer.sh
108109

109110
# Install spacy
111+
# b/232247930: uninstall pyarrow to avoid double installation with the GPU specific version.
112+
# b/341938540: unistall grpc-cpp to allow >=v24.4 cudf and cuml to be installed.
110113
{{ if eq .Accelerator "gpu" }}
111-
RUN mamba install -y -c conda-forge spacy cupy cuda-version=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION && \
114+
RUN pip uninstall -y pyarrow && \
115+
mamba remove -y --force grpc-cpp && \
116+
mamba install -y -c conda-forge spacy cudf>=24.4 cuml>=24.4 cupy cuda-version=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION && \
112117
/tmp/clean-layer.sh
113118
{{ else }}
114119
RUN pip install spacy && \
115120
/tmp/clean-layer.sh
116121
{{ end}}
117-
{{ if eq .Accelerator "gpu" }}
118-
119-
# b/232247930: uninstall pyarrow to avoid double installation with the GPU specific version.
120-
RUN pip uninstall -y pyarrow && \
121-
mamba install -y cudf cuml && \
122-
/tmp/clean-layer.sh
123-
124-
# TODO: b/296444923 - Resolve pandas dependency another way
125-
RUN sed -i 's/^is_extension_type/# is_extension_type/g' /opt/conda/lib/python3.10/site-packages/cudf/api/types.py \
126-
&& sed -i 's/^is_categorical/# is_categorical/g' /opt/conda/lib/python3.10/site-packages/cudf/api/types.py
127-
{{ end }}
128122

129123
# Install PyTorch
130124
{{ if eq .Accelerator "gpu" }}

tests/common.py

+10
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,16 @@
22

33
import os
44
import unittest
5+
import subprocess
6+
7+
def getAcceleratorName():
8+
try:
9+
deviceName = subprocess.check_output(['nvidia-smi', '--query-gpu=name', '--format=csv,noheader'])
10+
return deviceName.decode('utf-8').strip()
11+
except FileNotFoundError:
12+
return("nvidia-smi not found.")
513

614
gpu_test = unittest.skipIf(len(os.environ.get('CUDA_VERSION', '')) == 0, 'Not running GPU tests')
15+
# b/342143152 P100s are slowly being unsupported in new release of popular ml tools such as RAPIDS.
16+
p100_exempt = unittest.skipIf(getAcceleratorName() == "Tesla P100-PCIE-16GB", 'Not running p100 exempt tests')
717
tpu_test = unittest.skipIf(len(os.environ.get('ISTPUVM', '')) == 0, 'Not running TPU tests')

tests/test_cudf.py

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import unittest
2+
3+
from common import gpu_test, p100_exempt
4+
5+
6+
class TestCudf(unittest.TestCase):
7+
@gpu_test
8+
@p100_exempt # b/342143152: cuDL(>=24.4v) is inompatible with p100 GPUs.
9+
def test_cudf_dataframe_operations(self):
10+
import cudf
11+
12+
data = {'col1': [1, 2, 3], 'col2': [4, 5, 6]}
13+
gdf = cudf.DataFrame({'col1': [1, 2, 3], 'col2': [4, 5, 6]})
14+
15+
gdf['col3'] = gdf['col1'] + gdf['col2']
16+
17+
expected_col3 = cudf.Series([5, 7, 9])
18+
self.assertEqual(gdf.shape, (3, 3))
19+
self.assertEqual(list(gdf.columns), ['col1', 'col2', 'col3'])
20+
self.assertTrue(gdf['col3'].equals(expected_col3))

tests/test_cuml.py

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import unittest
2+
3+
from common import gpu_test, p100_exempt
4+
5+
6+
class TestCuml(unittest.TestCase):
7+
@gpu_test
8+
@p100_exempt # b/342143152: cuML(>=24.4v) is inompatible with p100 GPUs.
9+
def test_pca_fit_transform(self):
10+
import unittest
11+
import numpy as np
12+
from cuml.decomposition import PCA
13+
14+
x = np.array([[1.0, 2.0], [2.0, 4.0], [3.0, 6.0], [-1.0, -2.0], [-2.0, -4.0]])
15+
pca = PCA(n_components=1)
16+
17+
x_transformed = pca.fit_transform(x)
18+
19+
self.assertEqual(x_transformed.shape, (5, 1))

tests/test_datashader.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
import unittest
22

3-
import numpy as np
4-
import pandas as pd
5-
import datashader as ds
6-
import datashader.transfer_functions as tf
3+
from common import p100_exempt
74

85
class TestDatashader(unittest.TestCase):
9-
# based on https://github.com/pyviz/datashader/blob/master/datashader/tests/test_pipeline.py
6+
7+
@p100_exempt # b/342143152: Uses cuDF(>=24.4v), which is no longer capitble with p100 GPUs.
108
def test_pipeline(self):
9+
# based on https://github.com/pyviz/datashader/blob/master/datashader/tests/test_pipeline.py
10+
import numpy as np
11+
import pandas as pd
12+
import datashader as ds
13+
import datashader.transfer_functions as tf
14+
1115
df = pd.DataFrame({
1216
'x': np.array(([0.] * 10 + [1] * 10)),
1317
'y': np.array(([0.] * 5 + [1] * 5 + [0] * 5 + [1] * 5)),

tests/test_geoviews.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
import unittest
22

3-
import geoviews.feature as gf
4-
import holoviews as hv
5-
from cartopy import crs
3+
from common import p100_exempt
64

75
class TestGeoviews(unittest.TestCase):
6+
7+
@p100_exempt # b/342143152: Uses cuDF(>=24.4v), which is no longer capitble with p100 GPUs.
8+
89
def test_viz(self):
10+
import geoviews.feature as gf
11+
import holoviews as hv
12+
from cartopy import crs
13+
914
hv.extension('matplotlib')
1015
(gf.ocean + gf.land + gf.ocean * gf.land * gf.coastline * gf.borders).options(
1116
'Feature', projection=crs.Geostationary(), global_extent=True

0 commit comments

Comments
 (0)