diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml index c5212076..f945950a 100644 --- a/.github/workflows/codecov.yml +++ b/.github/workflows/codecov.yml @@ -19,10 +19,11 @@ jobs: pip install --upgrade pip pip install pytest pip install pytest-cov + pip install pytest-timeout + pip install flaky pip install -e . - name: Generate coverage report run: | - pip install pytest-cov cd tests pytest --disable-warnings --cov=./ --cov-report=xml:coverage.xml - name: Upload coverage to Codecov diff --git a/.github/workflows/pull_request_tests.yml b/.github/workflows/pull_request_tests.yml index 56517637..61f85a53 100644 --- a/.github/workflows/pull_request_tests.yml +++ b/.github/workflows/pull_request_tests.yml @@ -29,6 +29,7 @@ jobs: pip install pytest pip install pytest-cov pip install pytest-timeout + pip install flaky pip install -e . - name: Run pytest and Generate coverage report run: | diff --git a/rex/multi_file_resource.py b/rex/multi_file_resource.py index 613fa1a0..ee91e00d 100644 --- a/rex/multi_file_resource.py +++ b/rex/multi_file_resource.py @@ -127,7 +127,7 @@ def _get_dsets(h5_path): shared_dsets = [] try: with h5py.File(h5_path, mode='r') as f: - for dset in f: + for dset in Resource._get_datasets(f): if dset not in ['meta', 'time_index', 'coordinates']: unique_dsets.append(dset) else: diff --git a/rex/rechunk_h5/rechunk_h5.py b/rex/rechunk_h5/rechunk_h5.py index 635b130d..61758a2e 100644 --- a/rex/rechunk_h5/rechunk_h5.py +++ b/rex/rechunk_h5/rechunk_h5.py @@ -88,6 +88,9 @@ def get_dataset_attributes(h5_file, out_json=None, chunk_size=2, class RechunkH5: """ Class to create new .h5 file with new chunking + + .. WARNING:: This code does not currently support re-chunking H5 + files with grouped datasets. """ # None time-series NON_TS_DSETS = ('meta', 'coordinates', 'time_index') diff --git a/rex/resource.py b/rex/resource.py index 64b7ccf5..4875fca6 100644 --- a/rex/resource.py +++ b/rex/resource.py @@ -924,7 +924,7 @@ def attrs(self): """ if self._attrs is None: self._attrs = {} - for dset in set(self.datasets).intersection(self.h5): + for dset in self.datasets: self._attrs[dset] = dict(self.h5[dset].attrs) return self._attrs diff --git a/setup.py b/setup.py index 4dc10e80..6a4a0583 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ def run(self): with open("requirements.txt") as f: install_requires = f.readlines() -test_requires = ["pytest>=5.2", "pytest-timeout>=2.3.1"] +test_requires = ["pytest>=5.2", "pytest-timeout>=2.3.1", "flaky>=3.8.1"] dev_requires = ["flake8", "pre-commit", "pylint", "hsds>=0.8.4"] description = ("National Renewable Energy Laboratory's (NREL's) REsource " "eXtraction tool: rex") diff --git a/tests/test_bc.py b/tests/test_bc.py index b7485889..4c2ac06a 100644 --- a/tests/test_bc.py +++ b/tests/test_bc.py @@ -4,11 +4,13 @@ """ import numpy as np +from flaky import flaky from rex.temporal_stats.temporal_stats import cdf from rex.utilities.bc_utils import QuantileDeltaMapping +@flaky(max_runs=3, min_passes=1) def test_qdm(): """Test basic QuantileDeltaMapping functionality with dummy distributions diff --git a/tests/test_resource.py b/tests/test_resource.py index 37fe759a..3bff5071 100644 --- a/tests/test_resource.py +++ b/tests/test_resource.py @@ -975,6 +975,64 @@ def test_mh5_iterator(): assert len(dsets_permutation) == len(mh5.datasets) ** 2 +@pytest.mark.parametrize("read_class", [Resource, MultiFileResource]) +def test_attrs_for_grouped_datasets(read_class): + """"Test attrs for files with datasets under groups.""" + + meta = pd.DataFrame({'latitude': np.ones(100), + 'longitude': np.zeros(100)}) + time_index = pd_date_range('20210101', '20220101', freq='1h', + closed='right') + with tempfile.TemporaryDirectory() as td: + fp = os.path.join(td, 'outputs.h5') + + with Outputs(fp, 'w') as f: + f.meta = meta + f.time_index = time_index + + Outputs.add_dataset(h5_file=fp, dset_name='dset1', + dset_data=np.ones((8760, 100)) * 42.42, + attrs={'scale_factor': 100}, dtype=np.int32) + + with Outputs(fp, 'a', group="g1") as f: + f.meta = meta + f.time_index = time_index + + Outputs.add_dataset(h5_file=fp, dset_name='dset_g1', + dset_data=np.ones((8760, 100)) * 42.42, + attrs={'scale_factor': 100}, dtype=np.int32, + group="g1") + + with read_class(fp) as res: + assert np.allclose(res["dset1"], 42.42) + assert np.allclose(res["g1/dset_g1"], 42.42) + + expected_dsets = {'dset1', 'meta', 'time_index', + 'g1/dset_g1', 'g1/meta', 'g1/time_index'} + assert set(res.datasets) == expected_dsets + assert set(res.dtypes) == expected_dsets + + expected_attrs = {'dset1': {'scale_factor': 100}, + 'g1/dset_g1': {'scale_factor': 100}, + 'g1/meta': {}, 'g1/time_index': {}, + 'meta': {}, 'time_index': {}} + assert res.attrs == expected_attrs + + expected_shapes = {'dset1': (8760, 100), + 'g1/dset_g1': (8760, 100), + 'g1/meta': (100,), + 'g1/time_index': (8760,), + 'meta': (100,), 'time_index': (8760,)} + assert res.shapes == expected_shapes + + expected_chunks = {'dset1': None, + 'g1/dset_g1': None, + 'g1/meta': None, + 'g1/time_index': None, + 'meta': None, 'time_index': None} + assert res.chunks == expected_chunks + + def execute_pytest(capture='all', flags='-rapP'): """Execute module as pytest with detailed summary report.