Skip to content

Commit

Permalink
Fix dask2 proxies (#190)
Browse files Browse the repository at this point in the history
* First version dask-proxy shortcut code.

* Replace routine with iris version and add a no-fetch test.

* Code style fix.

* Code style fix.

* Rely on iris.util for _array_slice_ifempty; require iris>=2.4
  • Loading branch information
pp-mo authored Feb 24, 2020
1 parent cd5d581 commit ab9bc7d
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 31 deletions.
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
channels:
- conda-forge
dependencies:
- iris>=2
- iris>=2.4
- python-eccodes>=0.9.1,<2
- pep8
23 changes: 15 additions & 8 deletions iris_grib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@
import numpy as np
import numpy.ma as ma

# NOTE: careful here, to avoid circular imports (as iris imports grib)
import iris
from iris._lazy_data import as_lazy_data
import iris.coord_systems as coord_systems
from iris.exceptions import TranslationError, NotYetImplementedError
from iris.util import _array_slice_ifempty

# NOTE: careful here, to avoid circular imports (as iris imports grib)
from . import grib_phenom_translation as gptx
from . import _save_rules
from ._load_convert import convert as load_convert
Expand Down Expand Up @@ -97,13 +98,19 @@ def ndim(self):
return len(self.shape)

def __getitem__(self, keys):
with open(self.path, 'rb') as grib_fh:
grib_fh.seek(self.offset)
grib_message = gribapi.grib_new_from_file(grib_fh)
data = _message_values(grib_message, self.shape)
gribapi.grib_release(grib_message)

return data.__getitem__(keys)
# Avoid fetching file data just to return an 'empty' result.
# Needed because of how dask.array.from_array behaves since Dask v2.0.
result = _array_slice_ifempty(keys, self.shape, self.dtype)
if result is None:
with open(self.path, 'rb') as grib_fh:
grib_fh.seek(self.offset)
grib_message = gribapi.grib_new_from_file(grib_fh)
data = _message_values(grib_message, self.shape)
gribapi.grib_release(grib_message)

result = data.__getitem__(keys)

return result

def __repr__(self):
msg = '<{self.__class__.__name__} shape={self.shape} ' \
Expand Down
49 changes: 28 additions & 21 deletions iris_grib/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import re

import gribapi
from iris_grib import _array_slice_ifempty
import numpy as np
import numpy.ma as ma

Expand Down Expand Up @@ -228,29 +229,35 @@ def _bitmap(self, bitmap_section):
def __getitem__(self, keys):
# NB. Currently assumes that the validity of this interpretation
# is checked before this proxy is created.
message = self.recreate_raw()
sections = message.sections
bitmap_section = sections[6]
bitmap = self._bitmap(bitmap_section)
data = sections[7]['codedValues']

if bitmap is not None:
# Note that bitmap and data are both 1D arrays at this point.
if np.count_nonzero(bitmap) == data.shape[0]:
# Only the non-masked values are included in codedValues.
_data = np.empty(shape=bitmap.shape)
_data[bitmap.astype(bool)] = data
# `ma.masked_array` masks where input = 1, the opposite of
# the behaviour specified by the GRIB spec.
data = ma.masked_array(_data, mask=np.logical_not(bitmap),
fill_value=np.nan)
else:
msg = 'Shapes of data and bitmap do not match.'
raise TranslationError(msg)

data = data.reshape(self.shape)
# Avoid fetching file data just to return an 'empty' result.
# Needed because of how dask.array.from_array behaves since Dask v2.0.
result = _array_slice_ifempty(keys, self.shape, self.dtype)
if result is None:
message = self.recreate_raw()
sections = message.sections
bitmap_section = sections[6]
bitmap = self._bitmap(bitmap_section)
data = sections[7]['codedValues']

if bitmap is not None:
# Note that bitmap and data are both 1D arrays at this point.
if np.count_nonzero(bitmap) == data.shape[0]:
# Only the non-masked values are included in codedValues.
_data = np.empty(shape=bitmap.shape)
_data[bitmap.astype(bool)] = data
# `ma.masked_array` masks where input = 1, the opposite of
# the behaviour specified by the GRIB spec.
data = ma.masked_array(_data, mask=np.logical_not(bitmap),
fill_value=np.nan)
else:
msg = 'Shapes of data and bitmap do not match.'
raise TranslationError(msg)

data = data.reshape(self.shape)
result = data.__getitem__(keys)

return data.__getitem__(keys)
return result

def __repr__(self):
msg = '<{self.__class__.__name__} shape={self.shape} ' \
Expand Down
28 changes: 28 additions & 0 deletions iris_grib/tests/unit/message/test__DataProxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
# importing anything else.
import iris_grib.tests as tests

from unittest import mock

import numpy as np
from numpy.random import randint

Expand Down Expand Up @@ -41,5 +43,31 @@ def test_bitmap__invalid_indicator(self):
data_proxy._bitmap(section_6)


class Test_emptyfetch(tests.IrisGribTest):
# See :
# iris.tests.unit.fileformats.pp.test_PPDataProxy.Test__getitem__slicing
# In this case, test *only* the no-data-read effect, not the method which
# is part of Iris.
def test_empty_slice(self):
# Check behaviour of the getitem call with an 'empty' slicing.
# This is necessary because, since Dask 2.0, the "from_array" function
# takes a zero-length slice of its array argument, to capture array
# metadata, and in those cases we want to avoid file access.
test_dtype = np.dtype(np.float32)
mock_datafetch = mock.MagicMock()
proxy = _DataProxy(shape=(3, 4),
dtype=np.dtype(np.float32),
recreate_raw=mock_datafetch)

# Test the special no-data indexing operation.
result = proxy[0:0, 0:0]

# Check the behaviour and results were as expected.
self.assertEqual(mock_datafetch.call_count, 0)
self.assertIsInstance(result, np.ndarray)
self.assertEqual(result.dtype, test_dtype)
self.assertEqual(result.shape, (0, 0))


if __name__ == '__main__':
tests.main()
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def file_walk_relative(top, remove=''):
# NOTE: The Python 3 bindings to eccodes (eccodes-python) is available on
# PyPI, but the user is required to install eccodes itself manually. See
# ECMWF ecCodes installation documentation for more information.
install_requires=['scitools-iris>=2.0.*'] + ['eccodes-python'],
install_requires=['scitools-iris>=2.4.*'] + ['eccodes-python'],
test_suite = 'iris_grib.tests',
)

Expand Down

0 comments on commit ab9bc7d

Please # to comment.