From c7ea82e64385497c04a2f4ba8f290a55816cf776 Mon Sep 17 00:00:00 2001 From: grantbuster Date: Wed, 3 Jan 2024 15:51:02 -0700 Subject: [PATCH 01/19] added handling for temporal stats function that outputs multiple values with test using scipy.stats.weibull_min.fit() --- rex/temporal_stats/temporal_stats.py | 7 +++++- tests/test_temporal_stats.py | 36 ++++++++++++++++++++++++---- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/rex/temporal_stats/temporal_stats.py b/rex/temporal_stats/temporal_stats.py index 3b17b934..18de50a4 100644 --- a/rex/temporal_stats/temporal_stats.py +++ b/rex/temporal_stats/temporal_stats.py @@ -372,8 +372,13 @@ def _compute_stats(cls, res_data, statistics, diurnal=False, month=False): columns = column_names[name] s_data = s_data.T s_data.columns = columns - else: + elif not isinstance(s_data, pd.DataFrame): s_data = s_data.to_frame(name=name) + elif isinstance(s_data, pd.DataFrame) and len(s_data) > 1: + # e.g., if func is scipy.stats.beta.fit(), this collapses + # multiple output parameters into list + s_data['name'] = name + s_data = s_data.groupby('name').agg(list).T res_stats.append(s_data) diff --git a/tests/test_temporal_stats.py b/tests/test_temporal_stats.py index 670c0345..d5702d77 100644 --- a/tests/test_temporal_stats.py +++ b/tests/test_temporal_stats.py @@ -7,7 +7,7 @@ import os import pandas as pd import pytest -from scipy.stats import mode +import scipy import tempfile import traceback @@ -39,7 +39,7 @@ def mode_func(arr, axis=0): """ custom mode stats """ - return mode(arr, axis=axis, keepdims=True).mode[0] + return scipy.stats.mode(arr, axis=axis, keepdims=True).mode[0] @pytest.mark.parametrize(("max_workers", "sites"), @@ -185,7 +185,7 @@ def test_custom_stats(max_workers): msg = 'Mins do not match!' assert np.allclose(truth, test_stats['min'].values), msg - truth = mode(res_data, axis=0, keepdims=True).mode[0] + truth = scipy.stats.mode(res_data, axis=0, keepdims=True).mode[0] msg = 'Modes do not match!' assert np.allclose(truth, test_stats['mode'].values), msg @@ -194,7 +194,7 @@ def test_custom_stats(max_workers): msg = 'January mins do not match!' assert np.allclose(truth, test_stats['Jan_min'].values), msg - truth = mode(res_data[mask], axis=0, keepdims=True).mode[0] + truth = scipy.stats.mode(res_data[mask], axis=0, keepdims=True).mode[0] msg = 'January modes do not match!' assert np.allclose(truth, test_stats['Jan_mode'].values), msg @@ -344,6 +344,34 @@ def test_cli(runner): LOGGERS.clear() +def test_scipy_dist_fit(): + """Test rex temporal stats with a scipy distribution fit function""" + scipy_stats = {'weibull': {'func': scipy.stats.weibull_min.fit}} + + res_h5 = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_*.h5') + dataset = 'windspeed_100m' + sites = slice(0, 10) + stats1 = TemporalStats.run(res_h5, dataset, sites=sites, + statistics=scipy_stats, + res_cls=MultiYearWindResource, + max_workers=1) + assert isinstance(stats1.loc[0, 'weibull'], list) + assert len(stats1.loc[0, 'weibull']) == 3 + + scipy_stats = {'weibull': {'func': scipy.stats.weibull_min.fit, + 'kwargs': {'floc': 0}}} + stats2 = TemporalStats.run(res_h5, dataset, sites=sites, + statistics=scipy_stats, + res_cls=MultiYearWindResource, + max_workers=1) + assert isinstance(stats2.loc[0, 'weibull'], list) + assert len(stats2.loc[0, 'weibull']) == 3 + for gid in range(10): + assert np.allclose(stats2.loc[gid, 'weibull'][0], 2.3, atol=1) + assert np.allclose(stats2.loc[gid, 'weibull'][2], 7.9, atol=1) + assert stats2.loc[gid, 'weibull'][1] == 0 + + def execute_pytest(capture='all', flags='-rapP'): """Execute module as pytest with detailed summary report. From 083f17a6c4597702760fd934d661f4b09a7c4da3 Mon Sep 17 00:00:00 2001 From: grantbuster Date: Wed, 3 Jan 2024 16:26:41 -0700 Subject: [PATCH 02/19] started new bias_correction file and dynamically linked up functions with the sam_resource bias_correct() method --- rex/bias_correction.py | 88 ++++++++++++++++++++++++++++++++++++++ rex/sam_resource.py | 88 +++++++++++++++++++++----------------- tests/test_sam_resource.py | 41 ++++++++++-------- 3 files changed, 159 insertions(+), 58 deletions(-) create mode 100644 rex/bias_correction.py diff --git a/rex/bias_correction.py b/rex/bias_correction.py new file mode 100644 index 00000000..b6de7ac8 --- /dev/null +++ b/rex/bias_correction.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +""" +Module to perform bias correction of renewable energy resource data +""" +import numpy as np +import logging + + +logger = logging.getLogger(__name__) + + +def lin_irrad(ghi, dni, dhi, scalar, adder): + """Correct GHI and DNI using linear correction factors. Both irradiance + variables are corrected as `(irradiance*scalar)+adder`. DHI is preserved + based on the relationship `dhi = ghi - (dni * cos(sza))`. Times when GHI + and DNI are zero are preserved and negative values are protected against. + + Parameters + ---------- + ghi : np.ndarray + 2D array of global horizontal irradiance values in shape (time, space) + dni : np.ndarray + 2D array of direct normal irradiance values in shape (time, space) + dhi : np.ndarray + 2D array of diffuse horizontal irradiance values in shape (time, space) + scalar : np.ndarray + 1D array of linear scalar values in the shape (space,) + adder : np.ndarray + 1D array of linear adder values in the shape (space,) + + Returns + ------- + ghi : np.ndarray + 2D array of global horizontal irradiance values in shape (time, space) + dni : np.ndarray + 2D array of direct normal irradiance values in shape (time, space) + dhi : np.ndarray + 2D array of diffuse horizontal irradiance values in shape (time, space) + """ + + ghi_zeros = ghi == 0 + dni_zeros = dni == 0 + dhi_zeros = dhi == 0 + with np.errstate(divide='ignore', invalid='ignore'): + cos_sza = (ghi - dhi) / dni + + ghi = ghi * scalar + adder + dni = dni * scalar + adder + + ghi = np.maximum(0, ghi) + dni = np.maximum(0, dni) + + ghi[ghi_zeros] = 0 + dni[dni_zeros] = 0 + + with np.errstate(divide='ignore', invalid='ignore'): + dhi = ghi - (dni * cos_sza) + + dhi[dni_zeros] = ghi[dni_zeros] + dhi = np.maximum(0, dhi) + dhi[dhi_zeros] = 0 + + assert not np.isnan(dhi).any() + + return ghi, dni, dhi + + +def lin_ws(ws, scalar, adder): + """Correct windspeed using linear correction factors. Windspeed is + corrected as `(windspeed*scalar)+adder` with a minimum of zero. + + Parameters + ---------- + ws : np.ndarray + 2D array of windspeed values in shape (time, space) + scalar : np.ndarray + 1D array of linear scalar values in the shape (space,) + adder : np.ndarray + 1D array of linear adder values in the shape (space,) + + Returns + ------- + ws : np.ndarray + 2D array of windspeed values in shape (time, space) + """ + ws = ws * scalar + adder + ws = np.maximum(ws, 0) + return ws diff --git a/rex/sam_resource.py b/rex/sam_resource.py index ff59c786..69a4931c 100644 --- a/rex/sam_resource.py +++ b/rex/sam_resource.py @@ -3,11 +3,13 @@ Module to handle SAM Resource iterator to create site by site resource DataFrames """ +from inspect import signature import numpy as np import pandas as pd from warnings import warn import logging +import rex.bias_correction from rex.utilities.exceptions import (ResourceKeyError, ResourceRuntimeError, ResourceValueError, SAMInputWarning) from rex.utilities.parse_keys import parse_keys @@ -709,21 +711,19 @@ def bias_correct(self, bc_df): ---------- bc_df : pd.DataFrame DataFrame with wind or solar resource bias correction table. This - has columns: gid, adder, scalar. If either adder or scalar is not - present, scalar defaults to 1 and adder to 0. Only windspeed or - GHI+DNI are corrected depending on the technology. GHI and DNI are - corrected with the same correction factors. + must have columns "gid" and "method", where "gid" is the resource + file indices, and "method" is a function name from the + `rex.bias_correction` module. Only windspeed or GHI+DNI are + corrected depending on the technology. GHI and DNI are corrected + with the same correction factors. """ - if 'adder' not in bc_df: - logger.info('Bias correction table provided, but "adder" not ' - 'found, defaulting to 0.') - bc_df['adder'] = 0 - - if 'scalar' not in bc_df: - logger.info('Bias correction table provided, but "scalar" not ' - 'found, defaulting to 1.') - bc_df['scalar'] = 1 + if 'method' not in bc_df: + msg = ('Bias correction table provided, but "method" not ' + 'found! Need to specify "method" as a function name ' + 'from `rex.bias_correction`') + logger.error(msg) + raise KeyError(msg) if bc_df.index.name != 'gid': msg = ('Bias correction table must have "gid" column but only ' @@ -740,42 +740,52 @@ def bias_correct(self, bc_df): logger.warning(msg) warn(msg) - scalar = np.expand_dims(bc_df.loc[site_arr[isin], 'scalar'].values, 0) - adder = np.expand_dims(bc_df.loc[site_arr[isin], 'adder'].values, 0) + fun_name = bc_df['method'].unique() + msg = ('rex bias correction currently only supports a single unique ' + 'bias correction method per chunk of sites but received: {}' + .format(fun_name)) + assert len(fun_name) == 1, msg + bc_fun = getattr(rex.bias_correction, fun_name[0]) + + bc_fun_kwargs = {} + for col in bc_df.columns: + arr = bc_df.loc[site_arr[isin], col].values + bc_fun_kwargs[col] = np.expand_dims(arr, 0) if 'ghi' in self._res_arrays and 'dni' in self._res_arrays: + logger.debug('Bias correcting irradiance with function {} ' + 'for sites {}'.format(bc_fun, self.sites)) ghi = self._res_arrays['ghi'] dni = self._res_arrays['dni'] dhi = self._res_arrays['dhi'] - ghi_zeros = ghi == 0 - dni_zeros = dni == 0 - dhi_zeros = dhi == 0 - with np.errstate(divide='ignore', invalid='ignore'): - cos_sza = (ghi - dhi) / dni - - ghi[:, isin] = ghi[:, isin] * scalar + adder - dni[:, isin] = dni[:, isin] * scalar + adder - ghi = np.maximum(0, ghi) - dni = np.maximum(0, dni) - ghi[ghi_zeros] = 0 - dni[dni_zeros] = 0 - with np.errstate(divide='ignore', invalid='ignore'): - dhi[:, isin] = ghi[:, isin] - (dni[:, isin] * cos_sza[:, isin]) - - dhi[dni_zeros] = ghi[dni_zeros] - dhi = np.maximum(0, dhi) - dhi[dhi_zeros] = 0 - assert not np.isnan(dhi).any() + + bc_fun_kwargs['ghi'] = ghi[:, isin] + bc_fun_kwargs['dni'] = dni[:, isin] + bc_fun_kwargs['dhi'] = dhi[:, isin] + + sig = signature(bc_fun) + bc_fun_kwargs = {k: v for k, v in bc_fun_kwargs.items() + if k in sig.parameters} + out = bc_fun(**bc_fun_kwargs) + + ghi[:, isin] = out[0][:, isin] + dni[:, isin] = out[1][:, isin] + dhi[:, isin] = out[2][:, isin] + self._res_arrays['ghi'] = ghi self._res_arrays['dni'] = dni self._res_arrays['dhi'] = dhi elif 'windspeed' in self._res_arrays: - logger.debug('Bias correcting windspeeds.') - arr = self._res_arrays['windspeed'] - arr[:, isin] = arr[:, isin] * scalar + adder - arr = np.maximum(arr, 0) - self._res_arrays['windspeed'] = arr + logger.debug('Bias correcting windspeed with function {} ' + 'for sites {}'.format(bc_fun, self.sites)) + ws = self._res_arrays['windspeed'] + bc_fun_kwargs['ws'] = ws[:, isin] + sig = signature(bc_fun) + bc_fun_kwargs = {k: v for k, v in bc_fun_kwargs.items() + if k in sig.parameters} + ws[:, isin] = bc_fun(**bc_fun_kwargs) + self._res_arrays['windspeed'] = ws if self._mean_arrays is not None: # pylint: disable=consider-iterating-dictionary diff --git a/tests/test_sam_resource.py b/tests/test_sam_resource.py index fa757ede..b814edb5 100644 --- a/tests/test_sam_resource.py +++ b/tests/test_sam_resource.py @@ -394,22 +394,6 @@ def test_nsrdb_and_wtk(): _ = res._get_res_df(0) -def execute_pytest(capture='all', flags='-rapP'): - """Execute module as pytest with detailed summary report. - - Parameters - ---------- - capture : str - Log or stdout/stderr capture option. ex: log (only logger), - all (includes stdout/stderr) - flags : str - Which tests to show logs and results for. - """ - - fname = os.path.basename(__file__) - pytest.main(['-q', '--show-capture={}'.format(capture), fname, flags]) - - def test_bias_correct_wind(): """Test linear bias correction functionality on windspeed""" h5 = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_2012.h5') @@ -420,7 +404,8 @@ def test_bias_correct_wind(): n = 10 bc = pd.DataFrame({'gid': np.arange(n), 'adder': np.random.uniform(-1, 1, n), - 'scalar': np.random.uniform(0.9, 1.1, n)}) + 'scalar': np.random.uniform(0.9, 1.1, n), + 'method': 'lin_ws'}) with pytest.warns() as record: res = WindResource.preload_SAM(h5, sites, hub_heights) @@ -437,7 +422,8 @@ def test_bias_correct_wind(): n = 200 bc = pd.DataFrame({'gid': np.arange(n), 'adder': np.random.uniform(-1, 1, n), - 'scalar': np.random.uniform(0.9, 1.1, n)}) + 'scalar': np.random.uniform(0.9, 1.1, n), + 'method': 'lin_ws'}) with pytest.warns(None) as record: res = WindResource.preload_SAM(h5, sites, hub_heights) @@ -458,7 +444,8 @@ def test_bias_correct_solar(): n = 10 bc = pd.DataFrame({'gid': np.arange(n), 'adder': np.random.uniform(-100, 100, n), - 'scalar': np.random.uniform(1, 1, n)}) + 'scalar': np.random.uniform(1, 1, n), + 'method': 'lin_irrad'}) res = NSRDB.preload_SAM(h5, sites) res.bias_correct(bc) @@ -488,5 +475,21 @@ def test_bias_correct_solar(): assert np.allclose(cos_sza, base_cos_sza, atol=0.005) +def execute_pytest(capture='all', flags='-rapP'): + """Execute module as pytest with detailed summary report. + + Parameters + ---------- + capture : str + Log or stdout/stderr capture option. ex: log (only logger), + all (includes stdout/stderr) + flags : str + Which tests to show logs and results for. + """ + + fname = os.path.basename(__file__) + pytest.main(['-q', '--show-capture={}'.format(capture), fname, flags]) + + if __name__ == '__main__': execute_pytest() From 5a2711d17bfcb7d62fc941552cd30ab1434a648b Mon Sep 17 00:00:00 2001 From: grantbuster Date: Thu, 4 Jan 2024 10:44:41 -0700 Subject: [PATCH 03/19] moved bias correct table and function parsing to separate module for use in reV --- rex/bias_correction.py | 13 +++--- rex/sam_resource.py | 59 +++++++------------------- rex/utilities/bc_utils.py | 87 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+), 51 deletions(-) create mode 100644 rex/utilities/bc_utils.py diff --git a/rex/bias_correction.py b/rex/bias_correction.py index b6de7ac8..854577ff 100644 --- a/rex/bias_correction.py +++ b/rex/bias_correction.py @@ -9,11 +9,12 @@ logger = logging.getLogger(__name__) -def lin_irrad(ghi, dni, dhi, scalar, adder): +def lin_irrad(ghi, dni, dhi, scalar=1, adder=0): """Correct GHI and DNI using linear correction factors. Both irradiance - variables are corrected as `(irradiance*scalar)+adder`. DHI is preserved - based on the relationship `dhi = ghi - (dni * cos(sza))`. Times when GHI - and DNI are zero are preserved and negative values are protected against. + variables are corrected as ``irradiance * scalar + adder``. DHI is + preserved based on the relationship ``dhi = ghi - (dni * cos(sza))``. Times + when GHI and DNI are zero are preserved and negative values are protected + against. Parameters ---------- @@ -65,9 +66,9 @@ def lin_irrad(ghi, dni, dhi, scalar, adder): return ghi, dni, dhi -def lin_ws(ws, scalar, adder): +def lin_ws(ws, scalar=1, adder=0): """Correct windspeed using linear correction factors. Windspeed is - corrected as `(windspeed*scalar)+adder` with a minimum of zero. + corrected as ``windspeed * scalar + adder`` with a minimum of zero. Parameters ---------- diff --git a/rex/sam_resource.py b/rex/sam_resource.py index 69a4931c..a1119c5f 100644 --- a/rex/sam_resource.py +++ b/rex/sam_resource.py @@ -9,7 +9,7 @@ from warnings import warn import logging -import rex.bias_correction +from rex.utilities.bc_utils import _parse_bc_table from rex.utilities.exceptions import (ResourceKeyError, ResourceRuntimeError, ResourceValueError, SAMInputWarning) from rex.utilities.parse_keys import parse_keys @@ -713,44 +713,13 @@ def bias_correct(self, bc_df): DataFrame with wind or solar resource bias correction table. This must have columns "gid" and "method", where "gid" is the resource file indices, and "method" is a function name from the - `rex.bias_correction` module. Only windspeed or GHI+DNI are - corrected depending on the technology. GHI and DNI are corrected - with the same correction factors. + ``rex.bias_correction`` module. Only windspeed or GHI+DNI+DHI are + corrected, depending on the technology. See the + ``rex.bias_correction`` module for more details on available + bias correction methods. """ - if 'method' not in bc_df: - msg = ('Bias correction table provided, but "method" not ' - 'found! Need to specify "method" as a function name ' - 'from `rex.bias_correction`') - logger.error(msg) - raise KeyError(msg) - - if bc_df.index.name != 'gid': - msg = ('Bias correction table must have "gid" column but only ' - 'found: {}'.format(list(bc_df.columns))) - assert 'gid' in bc_df, msg - bc_df = bc_df.set_index('gid') - - site_arr = np.array(self.sites) - isin = np.isin(self.sites, bc_df.index.values) - if not isin.all(): - missing = site_arr[~isin] - msg = ('{} sites were missing from the bias correction table, ' - 'not bias correcting: {}'.format(len(missing), missing)) - logger.warning(msg) - warn(msg) - - fun_name = bc_df['method'].unique() - msg = ('rex bias correction currently only supports a single unique ' - 'bias correction method per chunk of sites but received: {}' - .format(fun_name)) - assert len(fun_name) == 1, msg - bc_fun = getattr(rex.bias_correction, fun_name[0]) - - bc_fun_kwargs = {} - for col in bc_df.columns: - arr = bc_df.loc[site_arr[isin], col].values - bc_fun_kwargs[col] = np.expand_dims(arr, 0) + bc_fun, bc_fun_kwargs, bool_bc = _parse_bc_table(bc_df, self.sites) if 'ghi' in self._res_arrays and 'dni' in self._res_arrays: logger.debug('Bias correcting irradiance with function {} ' @@ -759,18 +728,18 @@ def bias_correct(self, bc_df): dni = self._res_arrays['dni'] dhi = self._res_arrays['dhi'] - bc_fun_kwargs['ghi'] = ghi[:, isin] - bc_fun_kwargs['dni'] = dni[:, isin] - bc_fun_kwargs['dhi'] = dhi[:, isin] + bc_fun_kwargs['ghi'] = ghi[:, bool_bc] + bc_fun_kwargs['dni'] = dni[:, bool_bc] + bc_fun_kwargs['dhi'] = dhi[:, bool_bc] sig = signature(bc_fun) bc_fun_kwargs = {k: v for k, v in bc_fun_kwargs.items() if k in sig.parameters} out = bc_fun(**bc_fun_kwargs) - ghi[:, isin] = out[0][:, isin] - dni[:, isin] = out[1][:, isin] - dhi[:, isin] = out[2][:, isin] + ghi[:, bool_bc] = out[0][:, bool_bc] + dni[:, bool_bc] = out[1][:, bool_bc] + dhi[:, bool_bc] = out[2][:, bool_bc] self._res_arrays['ghi'] = ghi self._res_arrays['dni'] = dni @@ -780,11 +749,11 @@ def bias_correct(self, bc_df): logger.debug('Bias correcting windspeed with function {} ' 'for sites {}'.format(bc_fun, self.sites)) ws = self._res_arrays['windspeed'] - bc_fun_kwargs['ws'] = ws[:, isin] + bc_fun_kwargs['ws'] = ws[:, bool_bc] sig = signature(bc_fun) bc_fun_kwargs = {k: v for k, v in bc_fun_kwargs.items() if k in sig.parameters} - ws[:, isin] = bc_fun(**bc_fun_kwargs) + ws[:, bool_bc] = bc_fun(**bc_fun_kwargs) self._res_arrays['windspeed'] = ws if self._mean_arrays is not None: diff --git a/rex/utilities/bc_utils.py b/rex/utilities/bc_utils.py new file mode 100644 index 00000000..2fc0b331 --- /dev/null +++ b/rex/utilities/bc_utils.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +""" +rex bias correction utilities. +""" +import numpy as np +import logging +from warnings import warn +import rex.bias_correction + + +logger = logging.getLogger(__name__) + + +def _parse_bc_table(bc_df, gids): + """Parse the bias correction table for required bc functions and kwargs + + Parameters + ---------- + bc_df : pd.DataFrame + DataFrame with wind or solar resource bias correction table. This + must have columns "gid" and "method", where "gid" is the resource + file indices, and "method" is a function name from the + ``rex.bias_correction`` module. Only windspeed or GHI+DNI+DHI are + corrected, depending on the technology. See the + ``rex.bias_correction`` module for more details on available + bias correction methods. + gids : list | np.ndarray + Array of integer gids (spatial indices) from the source h5 file. + This is used to get the correct bias correction parameters from + ``bias_correct`` table based on its ``gid`` column + + Returns + ------- + bc_fun : function + Function from ``rex.bias_correction`` to use. + bc_fun_kwargs : dict + Kwargs from ``bc_df`` to input to ``bc_fun``. This may include extra + kwargs that are not required by ``bc_fun`` and should be cleaned before + passing to the function. + bool_bc : np.ndarray + 1D Boolean array with length equal to the ``gids`` input with ``True`` + where data has available bias correction inputs in ``bc_df`` and + ``False`` where not + """ + + if 'method' not in bc_df: + msg = ('Bias correction table provided, but "method" not ' + 'found! Need to specify "method" as a function name ' + 'from `rex.bias_correction`') + logger.error(msg) + raise KeyError(msg) + + if bc_df.index.name != 'gid': + msg = ('Bias correction table must have "gid" column but only ' + 'found: {}'.format(list(bc_df.columns))) + assert 'gid' in bc_df, msg + bc_df = bc_df.set_index('gid') + + gid_arr = np.array(gids) + bool_bc = np.isin(gid_arr, bc_df.index.values) + if not bool_bc.all(): + missing = gid_arr[~bool_bc] + msg = ('{} sites were missing from the bias correction table, ' + 'not bias correcting: {}'.format(len(missing), missing)) + logger.warning(msg) + warn(msg) + + fun_name = bc_df['method'].unique() + msg = ('rex bias correction currently only supports a single unique ' + 'bias correction method per chunk of sites but received: {}' + .format(fun_name)) + assert len(fun_name) == 1, msg + bc_fun = getattr(rex.bias_correction, fun_name[0], None) + if bc_fun is None: + avail = [x for x in dir(rex.bias_correction) if not x.startswith('_')] + msg = ('Could not find method name "{}" in ``rex.bias_correction`` ' + 'which has the available objects: {}' + .format(fun_name[0], avail)) + logger.error(msg) + raise KeyError(msg) + + bc_fun_kwargs = {} + for col in bc_df.columns: + arr = bc_df.loc[gid_arr[bool_bc], col].values + bc_fun_kwargs[col] = np.expand_dims(arr, 0) + + return bc_fun, bc_fun_kwargs, bool_bc From 29ea3570c65d29ff99857d21ed8c9e99db2be714 Mon Sep 17 00:00:00 2001 From: grantbuster Date: Thu, 4 Jan 2024 10:57:55 -0700 Subject: [PATCH 04/19] negative tests for bad bc inputs --- rex/utilities/bc_utils.py | 16 ++++++++------ tests/test_sam_resource.py | 44 +++++++++++++++++++++++++++++++++----- 2 files changed, 48 insertions(+), 12 deletions(-) diff --git a/rex/utilities/bc_utils.py b/rex/utilities/bc_utils.py index 2fc0b331..184abe65 100644 --- a/rex/utilities/bc_utils.py +++ b/rex/utilities/bc_utils.py @@ -44,16 +44,18 @@ def _parse_bc_table(bc_df, gids): """ if 'method' not in bc_df: - msg = ('Bias correction table provided, but "method" not ' - 'found! Need to specify "method" as a function name ' - 'from `rex.bias_correction`') - logger.error(msg) + msg = ('Bias correction table provided, but "method" column not ' + 'found! Only see columns: {}. Need to specify "method" which ' + 'is a function name from `rex.bias_correction`' + .format(list(bc_df.columns))) raise KeyError(msg) if bc_df.index.name != 'gid': - msg = ('Bias correction table must have "gid" column but only ' - 'found: {}'.format(list(bc_df.columns))) - assert 'gid' in bc_df, msg + if 'gid' not in bc_df: + msg = ('Bias correction table must have "gid" column but only ' + 'found: {}'.format(list(bc_df.columns))) + logger.error(msg) + raise KeyError(msg) bc_df = bc_df.set_index('gid') gid_arr = np.array(gids) diff --git a/tests/test_sam_resource.py b/tests/test_sam_resource.py index b814edb5..aa75290a 100644 --- a/tests/test_sam_resource.py +++ b/tests/test_sam_resource.py @@ -394,6 +394,39 @@ def test_nsrdb_and_wtk(): _ = res._get_res_df(0) +def test_bias_correct_errors(): + """Negative tests for bad bias correction inputs""" + h5 = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_2012.h5') + sites = slice(0, 20) + hub_heights = 80 + + n = 10 + res = WindResource.preload_SAM(h5, sites, hub_heights) + + bc = pd.DataFrame({'gid': np.arange(n), + 'adder': np.random.uniform(-1, 1, n), + 'scalar': np.random.uniform(0.9, 1.1, n)}) + with pytest.raises(KeyError) as record: + res.bias_correct(bc) + assert '"method" column not found!' in str(record.value) + + bc = pd.DataFrame({'gidasdfasf': np.arange(n), + 'adder': np.random.uniform(-1, 1, n), + 'scalar': np.random.uniform(0.9, 1.1, n), + 'method': 'lin_ws'}) + with pytest.raises(KeyError) as record: + res.bias_correct(bc) + assert 'must have "gid" column' in str(record.value) + + bc = pd.DataFrame({'gid': np.arange(n), + 'adder': np.random.uniform(-1, 1, n), + 'scalar': np.random.uniform(0.9, 1.1, n), + 'method': 'testfasdfasdf'}) + with pytest.raises(KeyError) as record: + res.bias_correct(bc) + assert 'Could not find method name "test' in str(record.value) + + def test_bias_correct_wind(): """Test linear bias correction functionality on windspeed""" h5 = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_2012.h5') @@ -407,16 +440,17 @@ def test_bias_correct_wind(): 'scalar': np.random.uniform(0.9, 1.1, n), 'method': 'lin_ws'}) + res = WindResource.preload_SAM(h5, sites, hub_heights) + with pytest.warns() as record: - res = WindResource.preload_SAM(h5, sites, hub_heights) res.bias_correct(bc) assert len(record) == 1 assert 'missing from the bias correction' in str(record[0].message) assert np.allclose(res._res_arrays['windspeed'][:, 10:], base_res._res_arrays['windspeed'][:, 10:]) - assert not (res._res_arrays['windspeed'][:, :10] == - base_res._res_arrays['windspeed'][:, :10]).any() + assert not (res._res_arrays['windspeed'][:, :10] + == base_res._res_arrays['windspeed'][:, :10]).any() assert (res._res_arrays['windspeed'] >= 0).all() n = 200 @@ -430,8 +464,8 @@ def test_bias_correct_wind(): res.bias_correct(bc) assert not any(record) - assert not (res._res_arrays['windspeed'] == - base_res._res_arrays['windspeed']).any() + assert not (res._res_arrays['windspeed'] + == base_res._res_arrays['windspeed']).any() assert (res._res_arrays['windspeed'] >= 0).all() From 13e8a4607b21059d535fe05b85c86200c8ad713c Mon Sep 17 00:00:00 2001 From: grantbuster Date: Thu, 4 Jan 2024 14:42:04 -0700 Subject: [PATCH 05/19] started parametric quantile delta mapping bias correction method --- rex/bias_correction.py | 150 +++++++++++++++++++++++++++++++++++++ rex/utilities/bc_utils.py | 14 +++- tests/test_sam_resource.py | 30 +++++++- 3 files changed, 189 insertions(+), 5 deletions(-) diff --git a/rex/bias_correction.py b/rex/bias_correction.py index 854577ff..cd00f04f 100644 --- a/rex/bias_correction.py +++ b/rex/bias_correction.py @@ -2,6 +2,7 @@ """ Module to perform bias correction of renewable energy resource data """ +import scipy import numpy as np import logging @@ -87,3 +88,152 @@ def lin_ws(ws, scalar=1, adder=0): ws = ws * scalar + adder ws = np.maximum(ws, 0) return ws + + +class PQDM: + """Correct windspeed using parametric quantile delta mapping based on a + parametric implementation of the method from Cannon et al., 2015 + + Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM + Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in + Quantiles and Extremes? Journal of Climate 28, 6938–6959 (2015). + """ + + def __init__(self, params_oh, params_mh, params_mf, dist, relative): + """ + Parameters + ---------- + ws : np.ndarray + 2D array of windspeed values in shape (time, space) + params_oh : np.ndarray | list + 2D array of probability distribution parameters created using a + function like ``scipy.stats.weibull_min.fit()`` where the shape is + (space, N) with N being the number of parameters required by the + specified distribution. This input arg is for the **observed + historical distribution**. + params_mh : np.ndarray | list + Same requirements as params_oh. This input arg is for the **modeled + historical distribution**. + params_mf : np.ndarray | list | None + Same requirements as params_oh. This input arg is for the **modeled + future distribution**. If this is None, this defaults to params_mh + (no future data). + dist : str | np.ndarray + Parametric probability distribution name to use to model the + windspeed. This can be any distribution name from ``scipy.stats``, + but "weibull_min" is a common choice for windspeed distributions. + Can also be an array of dist inputs if being used from reV, but + they must all be the same option. + relative : bool | np.ndarray + Flag to preserve relative rather than absolute changes in + quantiles. relative=False (default) will multiply by the change in + quantiles while relative=True will add. See Equations 4-6 from + Cannon et al., 2015 for more details. Can also be an array of dist + inputs if being used from reV, but they must all be the same + option. + """ + self.params_oh = params_oh + self.params_mh = params_mh + self.params_mf = params_mf + self.dist = self._clean_kwarg(dist) + self.relative = self._clean_kwarg(relative) + + self.dist_obj = getattr(scipy.stats, self.dist, None) + if self.dist_obj is None: + msg = ('Could not get requested distribution "{}" from ' + '``scipy.stats``. Please double check your spelling and ' + 'select one of the options from here: ' + 'https://docs.scipy.org/doc/scipy/reference/stats.html' + .format(self.dist)) + logger.error(msg) + raise KeyError(msg) + + @staticmethod + def _clean_kwarg(inp): + unique = np.unique(inp) + msg = ('PQDM kwargs must have only one unique input even if being ' + 'called with arrays as part of reV but found: {}' + .format(unique)) + assert len(unique) == 1, msg + + while isinstance(inp, np.ndarray): + inp = inp[0] + + return inp + + @staticmethod + def _clean_params(params, arr_shape): + """Params shape should be (space, N) where N is the number of + parameters for the distribution. Array shape should be + (time, space).""" + if arr_shape[1] == params.shape[0]: + params = [params[:, i] for i in range(params.shape[1])] + return params + + def __call__(self, arr): + params_oh = self._clean_params(self.params_oh, arr.shape) + params_mh = self._clean_params(self.params_mh, arr.shape) + params_mf = self._clean_params(self.params_mf, arr.shape) + + p_mf = self.dist_obj.cdf(arr, *params_mf) + x_oh = self.dist_obj.ppf(p_mf, *params_oh) + + if self.relative: + delta = arr / self.dist_obj.ppf(p_mf, *params_mh) + arr_bc = x_oh * delta + else: + delta = arr - self.dist_obj.ppf(p_mf, *params_mh) + arr_bc = x_oh + delta + + return arr_bc + + +def pqdm_ws(ws, params_oh, params_mh, params_mf=None, dist='weibull_min', + relative=True): + """Correct windspeed using parametric quantile delta mapping based on a + parametric implementation of the method from Cannon et al., 2015 + + Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM + Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in + Quantiles and Extremes? Journal of Climate 28, 6938–6959 (2015). + + Parameters + ---------- + ws : np.ndarray + 2D array of windspeed values in shape (time, space) + params_oh : np.ndarray | list + 2D array of probability distribution parameters created using a + function like ``scipy.stats.weibull_min.fit()`` where the shape is + (space, N) with N being the number of parameters required by the + specified distribution. This input arg is for the **observed historical + distribution**. + params_mh : np.ndarray | list + Same requirements as params_oh. This input arg is for the **modeled + historical distribution**. + params_mf : np.ndarray | list | None + Same requirements as params_oh. This input arg is for the **modeled + future distribution**. If this is None, this defaults to params_mh + (no future data). + dist : str | np.ndarray + Parametric probability distribution name to use to model the windspeed. + This can be any distribution name from ``scipy.stats``, but + "weibull_min" is a common choice for windspeed distributions. Can also + be an array of dist inputs if being used from reV, but they must all be + the same option. + relative : bool | np.ndarray + Flag to preserve relative rather than absolute changes in quantiles. + relative=False (default) will multiply by the change in quantiles while + relative=True will add. See Equations 4-6 from Cannon et al., 2015 for + more details. Can also be an array of dist inputs if being used from + reV, but they must all be the same option. + + Returns + ------- + ws : np.ndarray + 2D array of windspeed values in shape (time, space) + """ + + pqdm = PQDM(params_oh, params_mh, params_mf, dist, relative) + ws = pqdm(ws) + ws = np.maximum(ws, 0) + return ws diff --git a/rex/utilities/bc_utils.py b/rex/utilities/bc_utils.py index 184abe65..224f935a 100644 --- a/rex/utilities/bc_utils.py +++ b/rex/utilities/bc_utils.py @@ -2,6 +2,7 @@ """ rex bias correction utilities. """ +import json import numpy as np import logging from warnings import warn @@ -83,7 +84,18 @@ def _parse_bc_table(bc_df, gids): bc_fun_kwargs = {} for col in bc_df.columns: + + # load serialized lists from string columns in bc_df into nested lists + sample = bc_df[col].values[0] + if isinstance(sample, str) and '[' in sample and ']' in sample: + bc_df[col] = bc_df[col].apply(json.loads) + arr = bc_df.loc[gid_arr[bool_bc], col].values - bc_fun_kwargs[col] = np.expand_dims(arr, 0) + + # nested lists in bc_df converted to arr of shape (space, N) + if isinstance(arr[0], (list, tuple)): + arr = np.array(arr.tolist()) + + bc_fun_kwargs[col] = arr return bc_fun, bc_fun_kwargs, bool_bc diff --git a/tests/test_sam_resource.py b/tests/test_sam_resource.py index aa75290a..44fa0675 100644 --- a/tests/test_sam_resource.py +++ b/tests/test_sam_resource.py @@ -427,8 +427,8 @@ def test_bias_correct_errors(): assert 'Could not find method name "test' in str(record.value) -def test_bias_correct_wind(): - """Test linear bias correction functionality on windspeed""" +def test_bias_correct_wind_lin(): + """Test linear bias correction function on windspeed""" h5 = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_2012.h5') sites = slice(0, 20) hub_heights = 80 @@ -469,8 +469,8 @@ def test_bias_correct_wind(): assert (res._res_arrays['windspeed'] >= 0).all() -def test_bias_correct_solar(): - """Test adder bias correction functionality on irradiance""" +def test_bias_correct_solar_lin(): + """Test adder bias correction function on irradiance""" h5 = os.path.join(TESTDATADIR, 'nsrdb/ri_100_nsrdb_2012.h5') sites = slice(0, 10) base_res = NSRDB.preload_SAM(h5, sites) @@ -509,6 +509,28 @@ def test_bias_correct_solar(): assert np.allclose(cos_sza, base_cos_sza, atol=0.005) +def test_bias_correct_wind_pqdm(): + """Test parametric QDM bias correction function on windspeed""" + h5 = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_2012.h5') + sites = slice(0, 20) + hub_heights = 80 + base_res = WindResource.preload_SAM(h5, sites, hub_heights) + + n = 10 + bc = pd.DataFrame({'gid': np.arange(n), + 'method': 'pqdm_ws', + 'params_oh': '[2.6, -0.8, 8.9]', + 'params_mh': '[2.6, -0.8, 8.9]', + 'params_mf': '[2.6, -0.8, 8.9]', + 'dist': 'weibull_min', + 'relative': True, + }) + + res = WindResource.preload_SAM(h5, sites, hub_heights) + res.bias_correct(bc) + + + def execute_pytest(capture='all', flags='-rapP'): """Execute module as pytest with detailed summary report. From d36793acff737d37cf59a3974cb57ec097cf953f Mon Sep 17 00:00:00 2001 From: grantbuster Date: Thu, 4 Jan 2024 16:04:50 -0700 Subject: [PATCH 06/19] finished PQDM for wind with thorough test --- rex/bias_correction.py | 81 ++++++++++++++++++++++++++++---------- tests/test_sam_resource.py | 38 +++++++++++++++--- 2 files changed, 93 insertions(+), 26 deletions(-) diff --git a/rex/bias_correction.py b/rex/bias_correction.py index cd00f04f..0e6d22cb 100644 --- a/rex/bias_correction.py +++ b/rex/bias_correction.py @@ -91,8 +91,11 @@ def lin_ws(ws, scalar=1, adder=0): class PQDM: - """Correct windspeed using parametric quantile delta mapping based on a - parametric implementation of the method from Cannon et al., 2015 + """Class for parametric quantile delta mapping based on a parametric + implementation of the method from Cannon et al., 2015 + + Note that this is a utility class for implementing PQDM and should not be + requested directly as a method in the reV/rex bias correction table input Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in @@ -105,16 +108,16 @@ def __init__(self, params_oh, params_mh, params_mf, dist, relative): ---------- ws : np.ndarray 2D array of windspeed values in shape (time, space) - params_oh : np.ndarray | list + params_oh : np.ndarray 2D array of probability distribution parameters created using a function like ``scipy.stats.weibull_min.fit()`` where the shape is (space, N) with N being the number of parameters required by the - specified distribution. This input arg is for the **observed - historical distribution**. - params_mh : np.ndarray | list + specified distribution e.g., (shape, loc, scale) for weibull_min. + This input arg is for the **observed historical distribution**. + params_mh : np.ndarray Same requirements as params_oh. This input arg is for the **modeled historical distribution**. - params_mf : np.ndarray | list | None + params_mf : np.ndarray | None Same requirements as params_oh. This input arg is for the **modeled future distribution**. If this is None, this defaults to params_mh (no future data). @@ -134,22 +137,25 @@ def __init__(self, params_oh, params_mh, params_mf, dist, relative): """ self.params_oh = params_oh self.params_mh = params_mh - self.params_mf = params_mf - self.dist = self._clean_kwarg(dist) + self.params_mf = params_mf if params_mf is not None else params_mh + self.dist_name = self._clean_kwarg(dist) self.relative = self._clean_kwarg(relative) - self.dist_obj = getattr(scipy.stats, self.dist, None) - if self.dist_obj is None: + self.scipy_dist = getattr(scipy.stats, self.dist_name, None) + if self.scipy_dist is None: msg = ('Could not get requested distribution "{}" from ' '``scipy.stats``. Please double check your spelling and ' 'select one of the options from here: ' 'https://docs.scipy.org/doc/scipy/reference/stats.html' - .format(self.dist)) + .format(self.dist_name)) logger.error(msg) raise KeyError(msg) @staticmethod def _clean_kwarg(inp): + """Clean any kwargs inputs (e.g., dist, relative) that might be + provided as an array and must be collapsed into a single string or + boolean value""" unique = np.unique(inp) msg = ('PQDM kwargs must have only one unique input even if being ' 'called with arrays as part of reV but found: {}' @@ -163,28 +169,61 @@ def _clean_kwarg(inp): @staticmethod def _clean_params(params, arr_shape): - """Params shape should be (space, N) where N is the number of - parameters for the distribution. Array shape should be - (time, space).""" + """Re-organize 2D parameter arrays for passing into scipy distribution + functions. + + Parameters + ---------- + params : np.ndarray + Input params shape should be (space, N) where N is the number of + parameters for the distribution. + arr_shape : tuple + Array shape should be (time, space). + + Returns + ------- + params : np.ndarray | list + If the two inputs are of the expected shape, this output will be + params unpacked along axis=1 into a list so that the list entries + represent the scipy distribution parameters (e.g., shape, scale, + loc) and each list entry is of shape (space,) + """ if arr_shape[1] == params.shape[0]: params = [params[:, i] for i in range(params.shape[1])] return params def __call__(self, arr): + """Run the PQDM function to bias correct an array + + Parameters + ---------- + arr : np.ndarray + 2D array of values in shape (time, space) + + Returns + ------- + arr : np.ndarray + Bias corrected copy of the input array with same shape. + """ + params_oh = self._clean_params(self.params_oh, arr.shape) params_mh = self._clean_params(self.params_mh, arr.shape) params_mf = self._clean_params(self.params_mf, arr.shape) - p_mf = self.dist_obj.cdf(arr, *params_mf) - x_oh = self.dist_obj.ppf(p_mf, *params_oh) + p_mf = self.scipy_dist.cdf(arr, *params_mf) + x_oh = self.scipy_dist.ppf(p_mf, *params_oh) if self.relative: - delta = arr / self.dist_obj.ppf(p_mf, *params_mh) + delta = arr / self.scipy_dist.ppf(p_mf, *params_mh) arr_bc = x_oh * delta else: - delta = arr - self.dist_obj.ppf(p_mf, *params_mh) + delta = arr - self.scipy_dist.ppf(p_mf, *params_mh) arr_bc = x_oh + delta + msg = ('Input shape {} does not match PQDM bias corrected output ' + 'shape {}!'.format(arr.shape, arr_bc.shape)) + assert arr.shape == arr_bc.shape, msg + return arr_bc @@ -205,8 +244,8 @@ def pqdm_ws(ws, params_oh, params_mh, params_mf=None, dist='weibull_min', 2D array of probability distribution parameters created using a function like ``scipy.stats.weibull_min.fit()`` where the shape is (space, N) with N being the number of parameters required by the - specified distribution. This input arg is for the **observed historical - distribution**. + specified distribution e.g., (shape, loc, scale) for weibull_min. This + input arg is for the **observed historical distribution**. params_mh : np.ndarray | list Same requirements as params_oh. This input arg is for the **modeled historical distribution**. diff --git a/tests/test_sam_resource.py b/tests/test_sam_resource.py index 44fa0675..9eb5d1c3 100644 --- a/tests/test_sam_resource.py +++ b/tests/test_sam_resource.py @@ -10,6 +10,7 @@ import pandas as pd from pandas.testing import assert_series_equal import pytest +from scipy.stats import weibull_min from rex.renewable_resource import WindResource, NSRDB, WaveResource from rex.multi_file_resource import MultiFileNSRDB @@ -512,23 +513,50 @@ def test_bias_correct_solar_lin(): def test_bias_correct_wind_pqdm(): """Test parametric QDM bias correction function on windspeed""" h5 = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_2012.h5') - sites = slice(0, 20) + n = 20 + sites = slice(0, n) hub_heights = 80 base_res = WindResource.preload_SAM(h5, sites, hub_heights) + base_ws = base_res._res_arrays['windspeed'] + shape, loc, scale = weibull_min.fit(base_ws.mean(axis=1)) - n = 10 bc = pd.DataFrame({'gid': np.arange(n), 'method': 'pqdm_ws', - 'params_oh': '[2.6, -0.8, 8.9]', - 'params_mh': '[2.6, -0.8, 8.9]', - 'params_mf': '[2.6, -0.8, 8.9]', + 'params_oh': f'[{shape}, {loc}, {scale}]', + 'params_mh': f'[{shape}, {loc}, {scale}]', + 'params_mf': f'[{shape}, {loc}, {scale}]', 'dist': 'weibull_min', 'relative': True, }) res = WindResource.preload_SAM(h5, sites, hub_heights) res.bias_correct(bc) + bc_ws = res._res_arrays['windspeed'] + assert np.allclose(bc_ws, base_ws) + scale_mod = 0.9 + bc['params_mh'] = f'[{shape}, {loc}, {scale * scale_mod}]' + bc = bc.drop('params_mf', axis=1) + res = WindResource.preload_SAM(h5, sites, hub_heights) + res.bias_correct(bc) + bc_ws = res._res_arrays['windspeed'] + + # spot check using manual calc + idys = np.random.choice(base_ws.shape[0], size=10) + idxs = np.random.choice(base_ws.shape[1], size=10) + for idy, idx in zip(idys, idxs): + p_mf = weibull_min.cdf(base_ws[idy, idx], shape, loc, + scale * scale_mod) + x_oh = weibull_min.ppf(p_mf, shape, loc, scale) + delta = base_ws[idy, idx] / weibull_min.ppf(p_mf, shape, loc, + scale * scale_mod) + single_ws_bc = x_oh * delta + assert np.allclose(single_ws_bc, bc_ws[idy, idx]) + + # almost none of the values should be the same as the input, and none + # should be negative + assert ((bc_ws == base_ws).sum() / bc_ws.size) < 0.001 + assert not (bc_ws < 0).any() def execute_pytest(capture='all', flags='-rapP'): From bd8e7d832e26519dad76d9cbd61b6151476a68ce Mon Sep 17 00:00:00 2001 From: grantbuster Date: Fri, 5 Jan 2024 10:50:05 -0700 Subject: [PATCH 07/19] added PQDM for irradiance --- rex/bias_correction.py | 190 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 169 insertions(+), 21 deletions(-) diff --git a/rex/bias_correction.py b/rex/bias_correction.py index 0e6d22cb..c0b07c73 100644 --- a/rex/bias_correction.py +++ b/rex/bias_correction.py @@ -10,12 +10,9 @@ logger = logging.getLogger(__name__) -def lin_irrad(ghi, dni, dhi, scalar=1, adder=0): - """Correct GHI and DNI using linear correction factors. Both irradiance - variables are corrected as ``irradiance * scalar + adder``. DHI is - preserved based on the relationship ``dhi = ghi - (dni * cos(sza))``. Times - when GHI and DNI are zero are preserved and negative values are protected - against. +def _irrad_pre_proc(ghi, dni, dhi): + """Irradiance data pre processing to get ancillary variables + (run before bias correction). Parameters ---------- @@ -25,19 +22,18 @@ def lin_irrad(ghi, dni, dhi, scalar=1, adder=0): 2D array of direct normal irradiance values in shape (time, space) dhi : np.ndarray 2D array of diffuse horizontal irradiance values in shape (time, space) - scalar : np.ndarray - 1D array of linear scalar values in the shape (space,) - adder : np.ndarray - 1D array of linear adder values in the shape (space,) Returns ------- - ghi : np.ndarray - 2D array of global horizontal irradiance values in shape (time, space) - dni : np.ndarray - 2D array of direct normal irradiance values in shape (time, space) - dhi : np.ndarray - 2D array of diffuse horizontal irradiance values in shape (time, space) + ghi_zeros : np.ndarray + 2D boolean array, True where ghi==0, same shape as ghi input + dni_zeros : np.ndarray + 2D boolean array, True where dni==0, same shape as dni input + dhi_zeros : np.ndarray + 2D boolean array, True where dhi==0, same shape as dhi input + cos_sza : np.ndarray + 2D array for cos(solar_zenith_angle) calculated from the basic + relationship ``cos_sza = (ghi - dhi) / dni`` """ ghi_zeros = ghi == 0 @@ -46,8 +42,38 @@ def lin_irrad(ghi, dni, dhi, scalar=1, adder=0): with np.errstate(divide='ignore', invalid='ignore'): cos_sza = (ghi - dhi) / dni - ghi = ghi * scalar + adder - dni = dni * scalar + adder + return ghi_zeros, dni_zeros, dhi_zeros, cos_sza + + +def _irrad_post_proc(ghi, dni, ghi_zeros, dni_zeros, dhi_zeros, cos_sza): + """Irradiance data post processing to calculate DHI and set limits on + irradiance variables (run after bias correction). + + Parameters + ---------- + ghi : np.ndarray + 2D array of global horizontal irradiance values in shape (time, space) + dni : np.ndarray + 2D array of direct normal irradiance values in shape (time, space) + ghi_zeros : np.ndarray + 2D boolean array, True where ghi==0, same shape as ghi input + dni_zeros : np.ndarray + 2D boolean array, True where dni==0, same shape as dni input + dhi_zeros : np.ndarray + 2D boolean array, True where dhi==0, same shape as dhi input + cos_sza : np.ndarray + 2D array for cos(solar_zenith_angle) calculated from the basic + relationship ``cos_sza = (ghi - dhi) / dni`` + + Returns + ------- + ghi : np.ndarray + 2D array of global horizontal irradiance values in shape (time, space) + dni : np.ndarray + 2D array of direct normal irradiance values in shape (time, space) + dhi : np.ndarray + 2D array of diffuse horizontal irradiance values in shape (time, space) + """ ghi = np.maximum(0, ghi) dni = np.maximum(0, dni) @@ -67,6 +93,47 @@ def lin_irrad(ghi, dni, dhi, scalar=1, adder=0): return ghi, dni, dhi +def lin_irrad(ghi, dni, dhi, scalar=1, adder=0): + """Correct GHI and DNI using linear correction factors. Both irradiance + variables are corrected as ``irradiance * scalar + adder``. DHI is + preserved based on the relationship ``dhi = ghi - (dni * cos(sza))``. Times + when GHI and DNI are zero are preserved and negative values are protected + against. + + Parameters + ---------- + ghi : np.ndarray + 2D array of global horizontal irradiance values in shape (time, space) + dni : np.ndarray + 2D array of direct normal irradiance values in shape (time, space) + dhi : np.ndarray + 2D array of diffuse horizontal irradiance values in shape (time, space) + scalar : np.ndarray + 1D array of linear scalar values in the shape (space,) + adder : np.ndarray + 1D array of linear adder values in the shape (space,) + + Returns + ------- + ghi : np.ndarray + 2D array of global horizontal irradiance values in shape (time, space) + dni : np.ndarray + 2D array of direct normal irradiance values in shape (time, space) + dhi : np.ndarray + 2D array of diffuse horizontal irradiance values in shape (time, space) + """ + + ghi_zeros, dni_zeros, dhi_zeros, cos_sza = _irrad_pre_proc(ghi, dni, dhi) + + ghi = ghi * scalar + adder + dni = dni * scalar + adder + + ghi, dni, dhi = _irrad_post_proc(ghi, dni, ghi_zeros, dni_zeros, dhi_zeros, + cos_sza) + + return ghi, dni, dhi + + def lin_ws(ws, scalar=1, adder=0): """Correct windspeed using linear correction factors. Windspeed is corrected as ``windspeed * scalar + adder`` with a minimum of zero. @@ -90,7 +157,7 @@ def lin_ws(ws, scalar=1, adder=0): return ws -class PQDM: +class _PQDM: """Class for parametric quantile delta mapping based on a parametric implementation of the method from Cannon et al., 2015 @@ -157,7 +224,7 @@ def _clean_kwarg(inp): provided as an array and must be collapsed into a single string or boolean value""" unique = np.unique(inp) - msg = ('PQDM kwargs must have only one unique input even if being ' + msg = ('_PQDM kwargs must have only one unique input even if being ' 'called with arrays as part of reV but found: {}' .format(unique)) assert len(unique) == 1, msg @@ -227,6 +294,87 @@ def __call__(self, arr): return arr_bc +def pqdm_irrad(ghi, dni, dhi, + ghi_params_oh, dni_params_oh, + ghi_params_mh, dni_params_mh, + ghi_params_mf=None, dni_params_mf=None, + dist='weibull_min', relative=True): + """Correct irradiance using parametric quantile delta mapping based on a + parametric implementation of the method from Cannon et al., 2015 + + Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM + Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in + Quantiles and Extremes? Journal of Climate 28, 6938–6959 (2015). + + Parameters + ---------- + ghi : np.ndarray + 2D array of global horizontal irradiance values in shape (time, space) + dni : np.ndarray + 2D array of direct normal irradiance values in shape (time, space) + dhi : np.ndarray + 2D array of diffuse horizontal irradiance values in shape (time, space) + ghi_params_oh : np.ndarray | list + 2D array of probability distribution parameters for GHI created using a + function like ``scipy.stats.weibull_min.fit()`` where the shape is + (space, N) with N being the number of parameters required by the + specified distribution e.g., (shape, loc, scale) for weibull_min. This + input arg is for the **observed historical distribution**. + dni_params_oh : np.ndarray | list + Same requirements as ghi_params_oh. This input arg is for the + **observed historical distribution** for DNI. + ghi_params_mh : np.ndarray | list + Same requirements as ghi_params_oh. This input arg is for the **modeled + historical distribution**. + dni_params_mh : np.ndarray | list + Same requirements as ghi_params_oh. This input arg is for the **modeled + historical distribution** for DNI. + ghi_params_mf : np.ndarray | list | None + Same requirements as ghi_params_oh. This input arg is for the **modeled + future distribution**. If this is None, this defaults to ghi_params_mh + (no future data). + dni_params_mf : np.ndarray | list | None + Same requirements as ghi_params_oh. This input arg is for the **modeled + future distribution** for DNI. If this is None, this defaults to + dni_params_mh (no future data). + dist : str | np.ndarray + Parametric probability distribution name to use to model the windspeed. + This can be any distribution name from ``scipy.stats``. Can also + be an array of dist inputs if being used from reV, but they must all be + the same option. + relative : bool | np.ndarray + Flag to preserve relative rather than absolute changes in quantiles. + relative=False (default) will multiply by the change in quantiles while + relative=True will add. See Equations 4-6 from Cannon et al., 2015 for + more details. Can also be an array of dist inputs if being used from + reV, but they must all be the same option. + + Returns + ------- + ghi : np.ndarray + 2D array of global horizontal irradiance values in shape (time, space) + dni : np.ndarray + 2D array of direct normal irradiance values in shape (time, space) + dhi : np.ndarray + 2D array of diffuse horizontal irradiance values in shape (time, space) + """ + + ghi_zeros, dni_zeros, dhi_zeros, cos_sza = _irrad_pre_proc(ghi, dni, dhi) + + ghi_pqdm = _PQDM(ghi_params_oh, ghi_params_mh, ghi_params_mf, + dist, relative) + dni_pqdm = _PQDM(dni_params_oh, dni_params_mh, dni_params_mf, + dist, relative) + + ghi = ghi_pqdm(ghi) + dni = dni_pqdm(dni) + + ghi, dni, dhi = _irrad_post_proc(ghi, dni, ghi_zeros, dni_zeros, dhi_zeros, + cos_sza) + + return ghi, dni, dhi + + def pqdm_ws(ws, params_oh, params_mh, params_mf=None, dist='weibull_min', relative=True): """Correct windspeed using parametric quantile delta mapping based on a @@ -272,7 +420,7 @@ def pqdm_ws(ws, params_oh, params_mh, params_mf=None, dist='weibull_min', 2D array of windspeed values in shape (time, space) """ - pqdm = PQDM(params_oh, params_mh, params_mf, dist, relative) + pqdm = _PQDM(params_oh, params_mh, params_mf, dist, relative) ws = pqdm(ws) ws = np.maximum(ws, 0) return ws From f5f12c6d16f152aed5781b0a0e5ba097d1fec39d Mon Sep 17 00:00:00 2001 From: grantbuster Date: Fri, 5 Jan 2024 14:22:47 -0700 Subject: [PATCH 08/19] implemented empirical QDM with tests --- rex/bias_correction.py | 262 +++++++++++++++++++++++-------------- tests/test_sam_resource.py | 85 +++++++++++- 2 files changed, 247 insertions(+), 100 deletions(-) diff --git a/rex/bias_correction.py b/rex/bias_correction.py index c0b07c73..649df17f 100644 --- a/rex/bias_correction.py +++ b/rex/bias_correction.py @@ -85,8 +85,8 @@ def _irrad_post_proc(ghi, dni, ghi_zeros, dni_zeros, dhi_zeros, cos_sza): dhi = ghi - (dni * cos_sza) dhi[dni_zeros] = ghi[dni_zeros] - dhi = np.maximum(0, dhi) dhi[dhi_zeros] = 0 + dhi = np.maximum(0, dhi) assert not np.isnan(dhi).any() @@ -157,11 +157,11 @@ def lin_ws(ws, scalar=1, adder=0): return ws -class _PQDM: - """Class for parametric quantile delta mapping based on a parametric - implementation of the method from Cannon et al., 2015 +class _QuantileDeltaMapping: + """Class for quantile delta mapping based on the method from + Cannon et al., 2015 - Note that this is a utility class for implementing PQDM and should not be + Note that this is a utility class for implementing QDM and should not be requested directly as a method in the reV/rex bias correction table input Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM @@ -173,50 +173,56 @@ def __init__(self, params_oh, params_mh, params_mf, dist, relative): """ Parameters ---------- - ws : np.ndarray - 2D array of windspeed values in shape (time, space) params_oh : np.ndarray - 2D array of probability distribution parameters created using a - function like ``scipy.stats.weibull_min.fit()`` where the shape is - (space, N) with N being the number of parameters required by the - specified distribution e.g., (shape, loc, scale) for weibull_min. - This input arg is for the **observed historical distribution**. + 2D array of **observed historical** distribution parameters created + from a multi-year set of data where the shape is (space, N). This + can be the output of a parametric distribution fit like + ``scipy.stats.weibull_min.fit()`` where N is the number of + parameters for that distribution, or this can define the x-values + of N points from an empirical CDF that will be linearly + interpolated between. If this is an empirical CDF, this must + include the 0th and 100th percentile values and have even + percentile spacing between values. params_mh : np.ndarray Same requirements as params_oh. This input arg is for the **modeled historical distribution**. params_mf : np.ndarray | None Same requirements as params_oh. This input arg is for the **modeled future distribution**. If this is None, this defaults to params_mh - (no future data). + (no future data, just corrected to modeled historical distribution) dist : str | np.ndarray - Parametric probability distribution name to use to model the - windspeed. This can be any distribution name from ``scipy.stats``, - but "weibull_min" is a common choice for windspeed distributions. - Can also be an array of dist inputs if being used from reV, but - they must all be the same option. + Probability distribution name to use to model the data which + determines how the param args are used. This can "empirical" or any + continuous distribution name from ``scipy.stats``. Can also be a 1D + array of dist inputs if being used from reV, but they must all be + the same option. relative : bool | np.ndarray Flag to preserve relative rather than absolute changes in quantiles. relative=False (default) will multiply by the change in quantiles while relative=True will add. See Equations 4-6 from - Cannon et al., 2015 for more details. Can also be an array of dist - inputs if being used from reV, but they must all be the same + Cannon et al., 2015 for more details. Can also be a 1D array of + dist inputs if being used from reV, but they must all be the same option. """ + self.params_oh = params_oh self.params_mh = params_mh self.params_mf = params_mf if params_mf is not None else params_mh - self.dist_name = self._clean_kwarg(dist) self.relative = self._clean_kwarg(relative) - - self.scipy_dist = getattr(scipy.stats, self.dist_name, None) - if self.scipy_dist is None: - msg = ('Could not get requested distribution "{}" from ' - '``scipy.stats``. Please double check your spelling and ' - 'select one of the options from here: ' - 'https://docs.scipy.org/doc/scipy/reference/stats.html' - .format(self.dist_name)) - logger.error(msg) - raise KeyError(msg) + self.dist_name = self._clean_kwarg(dist) + self.scipy_dist = None + + if self.dist_name != 'empirical': + self.scipy_dist = getattr(scipy.stats, self.dist_name, None) + if self.scipy_dist is None: + msg = ('Could not get requested distribution "{}" from ' + '``scipy.stats``. Please double check your spelling ' + 'and select "empirical" or one of the continuous ' + 'distribution options from here: ' + 'https://docs.scipy.org/doc/scipy/reference/stats.html' + .format(self.dist_name)) + logger.error(msg) + raise KeyError(msg) @staticmethod def _clean_kwarg(inp): @@ -224,8 +230,8 @@ def _clean_kwarg(inp): provided as an array and must be collapsed into a single string or boolean value""" unique = np.unique(inp) - msg = ('_PQDM kwargs must have only one unique input even if being ' - 'called with arrays as part of reV but found: {}' + msg = ('_QuantileDeltaMapping kwargs must have only one unique input ' + 'even if being called with arrays as part of reV but found: {}' .format(unique)) assert len(unique) == 1, msg @@ -234,10 +240,9 @@ def _clean_kwarg(inp): return inp - @staticmethod - def _clean_params(params, arr_shape): - """Re-organize 2D parameter arrays for passing into scipy distribution - functions. + def _clean_params(self, params, arr_shape): + """Verify and clean 2D parameter arrays for passing into empirical + distribution or scipy continuous distribution functions. Parameters ---------- @@ -250,17 +255,56 @@ def _clean_params(params, arr_shape): Returns ------- params : np.ndarray | list - If the two inputs are of the expected shape, this output will be - params unpacked along axis=1 into a list so that the list entries - represent the scipy distribution parameters (e.g., shape, scale, - loc) and each list entry is of shape (space,) + If a scipy continuous dist is set, this output will be params + unpacked along axis=1 into a list so that the list entries + represent the scipy distribution parameters + (e.g., shape, scale, loc) and each list entry is of shape (space,) """ - if arr_shape[1] == params.shape[0]: + + msg = f'params must be 2D array but received {type(params)}' + assert isinstance(params, np.ndarray), msg + msg = (f'params must be 2D array of shape ({arr_shape[1]}, N) ' + f'but received shape {params.shape}') + assert len(params.shape) == 2, msg + assert params.shape[0] == arr_shape[1], msg + + if self.scipy_dist is not None: params = [params[:, i] for i in range(params.shape[1])] + return params + def cdf(self, x, params): + """Run the CDF function e.g., convert physical variable to quantile""" + + if self.scipy_dist is None: + p = np.zeros_like(x) + for idx in range(x.shape[1]): + xp = params[idx, :] + fp = np.linspace(0, 1, len(xp)) + p[:, idx] = np.interp(x[:, idx], xp, fp) + + else: + p = self.scipy_dist.cdf(x, *params) + + return p + + def ppf(self, p, params): + """Run the inverse CDF function (percent point function) e.g., convert + quantile to physical variable""" + + if self.scipy_dist is None: + x = np.zeros_like(p) + for idx in range(p.shape[1]): + fp = params[idx, :] + xp = np.linspace(0, 1, len(fp)) + x[:, idx] = np.interp(p[:, idx], xp, fp) + else: + x = self.scipy_dist.ppf(p, *params) + + return x + def __call__(self, arr): - """Run the PQDM function to bias correct an array + """Run the QDM function to bias correct an array Parameters ---------- @@ -277,30 +321,30 @@ def __call__(self, arr): params_mh = self._clean_params(self.params_mh, arr.shape) params_mf = self._clean_params(self.params_mf, arr.shape) - p_mf = self.scipy_dist.cdf(arr, *params_mf) - x_oh = self.scipy_dist.ppf(p_mf, *params_oh) + p_mf = self.cdf(arr, params_mf) + x_oh = self.ppf(p_mf, params_oh) if self.relative: - delta = arr / self.scipy_dist.ppf(p_mf, *params_mh) + delta = arr / self.ppf(p_mf, params_mh) arr_bc = x_oh * delta else: - delta = arr - self.scipy_dist.ppf(p_mf, *params_mh) + delta = arr - self.ppf(p_mf, params_mh) arr_bc = x_oh + delta - msg = ('Input shape {} does not match PQDM bias corrected output ' + msg = ('Input shape {} does not match QDM bias corrected output ' 'shape {}!'.format(arr.shape, arr_bc.shape)) assert arr.shape == arr_bc.shape, msg return arr_bc -def pqdm_irrad(ghi, dni, dhi, - ghi_params_oh, dni_params_oh, - ghi_params_mh, dni_params_mh, - ghi_params_mf=None, dni_params_mf=None, - dist='weibull_min', relative=True): - """Correct irradiance using parametric quantile delta mapping based on a - parametric implementation of the method from Cannon et al., 2015 +def qdm_irrad(ghi, dni, dhi, + ghi_params_oh, dni_params_oh, + ghi_params_mh, dni_params_mh, + ghi_params_mf=None, dni_params_mf=None, + dist='empirical', relative=True): + """Correct irradiance using the quantile delta mapping based on the method + from Cannon et al., 2015 Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in @@ -315,39 +359,47 @@ def pqdm_irrad(ghi, dni, dhi, dhi : np.ndarray 2D array of diffuse horizontal irradiance values in shape (time, space) ghi_params_oh : np.ndarray | list - 2D array of probability distribution parameters for GHI created using a - function like ``scipy.stats.weibull_min.fit()`` where the shape is - (space, N) with N being the number of parameters required by the - specified distribution e.g., (shape, loc, scale) for weibull_min. This - input arg is for the **observed historical distribution**. + 2D array of **observed historical** distribution parameters created + from a multi-year set of data where the shape is (space, N). This + can be the output of a parametric distribution fit like + ``scipy.stats.weibull_min.fit()`` where N is the number of + parameters for that distribution, or this can define the x-values + of N points from an empirical CDF that will be linearly + interpolated between. If this is an empirical CDF, this must + include the 0th and 100th percentile values and have even + percentile spacing between values. dni_params_oh : np.ndarray | list Same requirements as ghi_params_oh. This input arg is for the **observed historical distribution** for DNI. ghi_params_mh : np.ndarray | list Same requirements as ghi_params_oh. This input arg is for the **modeled - historical distribution**. + historical distribution** for GHI. dni_params_mh : np.ndarray | list Same requirements as ghi_params_oh. This input arg is for the **modeled historical distribution** for DNI. ghi_params_mf : np.ndarray | list | None Same requirements as ghi_params_oh. This input arg is for the **modeled - future distribution**. If this is None, this defaults to ghi_params_mh - (no future data). + future distribution** for GHI. If this is None, this defaults to + ghi_params_mh (no future data, just corrected to modeled historical + distribution) dni_params_mf : np.ndarray | list | None Same requirements as ghi_params_oh. This input arg is for the **modeled future distribution** for DNI. If this is None, this defaults to - dni_params_mh (no future data). + dni_params_mh. (no future data, just corrected to modeled historical + distribution) dist : str | np.ndarray - Parametric probability distribution name to use to model the windspeed. - This can be any distribution name from ``scipy.stats``. Can also - be an array of dist inputs if being used from reV, but they must all be + Probability distribution name to use to model the data which + determines how the param args are used. This can "empirical" or any + continuous distribution name from ``scipy.stats``. Can also be a 1D + array of dist inputs if being used from reV, but they must all be the same option. relative : bool | np.ndarray - Flag to preserve relative rather than absolute changes in quantiles. - relative=False (default) will multiply by the change in quantiles while - relative=True will add. See Equations 4-6 from Cannon et al., 2015 for - more details. Can also be an array of dist inputs if being used from - reV, but they must all be the same option. + Flag to preserve relative rather than absolute changes in + quantiles. relative=False (default) will multiply by the change in + quantiles while relative=True will add. See Equations 4-6 from + Cannon et al., 2015 for more details. Can also be a 1D array of + dist inputs if being used from reV, but they must all be the same + option. Returns ------- @@ -361,13 +413,19 @@ def pqdm_irrad(ghi, dni, dhi, ghi_zeros, dni_zeros, dhi_zeros, cos_sza = _irrad_pre_proc(ghi, dni, dhi) - ghi_pqdm = _PQDM(ghi_params_oh, ghi_params_mh, ghi_params_mf, - dist, relative) - dni_pqdm = _PQDM(dni_params_oh, dni_params_mh, dni_params_mf, - dist, relative) + ghi_qdm = _QuantileDeltaMapping(ghi_params_oh, ghi_params_mh, + ghi_params_mf, dist, relative) + dni_qdm = _QuantileDeltaMapping(dni_params_oh, dni_params_mh, + dni_params_mf, dist, relative) + + # This will prevent inverse CDF functions from returning zero resulting in + # a divide by zero error in the calculation of the QDM delta. These zeros + # get fixed later in _irrad_post_proc + ghi[ghi_zeros] = 1500 + dni[dni_zeros] = 1500 - ghi = ghi_pqdm(ghi) - dni = dni_pqdm(dni) + ghi = ghi_qdm(ghi) + dni = dni_qdm(dni) ghi, dni, dhi = _irrad_post_proc(ghi, dni, ghi_zeros, dni_zeros, dhi_zeros, cos_sza) @@ -375,10 +433,10 @@ def pqdm_irrad(ghi, dni, dhi, return ghi, dni, dhi -def pqdm_ws(ws, params_oh, params_mh, params_mf=None, dist='weibull_min', - relative=True): - """Correct windspeed using parametric quantile delta mapping based on a - parametric implementation of the method from Cannon et al., 2015 +def qdm_ws(ws, params_oh, params_mh, params_mf=None, dist='empirical', + relative=True): + """Correct windspeed using quantile delta mapping based on the method from + Cannon et al., 2015 Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in @@ -389,30 +447,35 @@ def pqdm_ws(ws, params_oh, params_mh, params_mf=None, dist='weibull_min', ws : np.ndarray 2D array of windspeed values in shape (time, space) params_oh : np.ndarray | list - 2D array of probability distribution parameters created using a - function like ``scipy.stats.weibull_min.fit()`` where the shape is - (space, N) with N being the number of parameters required by the - specified distribution e.g., (shape, loc, scale) for weibull_min. This - input arg is for the **observed historical distribution**. + 2D array of **observed historical** distribution parameters created + from a multi-year set of data where the shape is (space, N). This + can be the output of a parametric distribution fit like + ``scipy.stats.weibull_min.fit()`` where N is the number of + parameters for that distribution, or this can define the x-values + of N points from an empirical CDF that will be linearly + interpolated between. If this is an empirical CDF, this must + include the 0th and 100th percentile values and have even + percentile spacing between values. params_mh : np.ndarray | list Same requirements as params_oh. This input arg is for the **modeled historical distribution**. params_mf : np.ndarray | list | None Same requirements as params_oh. This input arg is for the **modeled future distribution**. If this is None, this defaults to params_mh - (no future data). + (no future data, just corrected to modeled historical distribution) dist : str | np.ndarray - Parametric probability distribution name to use to model the windspeed. - This can be any distribution name from ``scipy.stats``, but - "weibull_min" is a common choice for windspeed distributions. Can also - be an array of dist inputs if being used from reV, but they must all be + Probability distribution name to use to model the data which + determines how the param args are used. This can "empirical" or any + continuous distribution name from ``scipy.stats``. Can also be a 1D + array of dist inputs if being used from reV, but they must all be the same option. relative : bool | np.ndarray - Flag to preserve relative rather than absolute changes in quantiles. - relative=False (default) will multiply by the change in quantiles while - relative=True will add. See Equations 4-6 from Cannon et al., 2015 for - more details. Can also be an array of dist inputs if being used from - reV, but they must all be the same option. + Flag to preserve relative rather than absolute changes in + quantiles. relative=False (default) will multiply by the change in + quantiles while relative=True will add. See Equations 4-6 from + Cannon et al., 2015 for more details. Can also be a 1D array of + dist inputs if being used from reV, but they must all be the same + option. Returns ------- @@ -420,7 +483,8 @@ def pqdm_ws(ws, params_oh, params_mh, params_mf=None, dist='weibull_min', 2D array of windspeed values in shape (time, space) """ - pqdm = _PQDM(params_oh, params_mh, params_mf, dist, relative) - ws = pqdm(ws) + qdm = _QuantileDeltaMapping(params_oh, params_mh, params_mf, dist, + relative) + ws = qdm(ws) ws = np.maximum(ws, 0) return ws diff --git a/tests/test_sam_resource.py b/tests/test_sam_resource.py index 9eb5d1c3..17667219 100644 --- a/tests/test_sam_resource.py +++ b/tests/test_sam_resource.py @@ -2,6 +2,7 @@ """ pytests for sam_resource """ +import json import tempfile import numpy as np import shutil @@ -521,7 +522,7 @@ def test_bias_correct_wind_pqdm(): shape, loc, scale = weibull_min.fit(base_ws.mean(axis=1)) bc = pd.DataFrame({'gid': np.arange(n), - 'method': 'pqdm_ws', + 'method': 'qdm_ws', 'params_oh': f'[{shape}, {loc}, {scale}]', 'params_mh': f'[{shape}, {loc}, {scale}]', 'params_mf': f'[{shape}, {loc}, {scale}]', @@ -559,6 +560,88 @@ def test_bias_correct_wind_pqdm(): assert not (bc_ws < 0).any() +def test_bias_correct_wind_qdm(): + """Test empirical QDM bias correction function on windspeed""" + h5 = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_2012.h5') + n = 20 + sites = slice(0, n) + hub_heights = 80 + base_res = WindResource.preload_SAM(h5, sites, hub_heights) + base_ws = base_res._res_arrays['windspeed'] + + base_params = list(np.linspace(0, 100, 10)) + + bc = pd.DataFrame({'gid': np.arange(n), + 'method': 'qdm_ws', + 'params_oh': json.dumps(base_params), + 'params_mh': json.dumps(base_params), + 'params_mf': json.dumps(base_params), + 'dist': 'empirical', + 'relative': True, + }) + + res = WindResource.preload_SAM(h5, sites, hub_heights) + res.bias_correct(bc) + bc_ws = res._res_arrays['windspeed'] + assert np.allclose(bc_ws, base_ws) + + res = WindResource.preload_SAM(h5, sites, hub_heights) + params = list(0.1*np.ones(10)) + bc['params_oh'] = json.dumps(params) + bc.loc[0, 'params_oh'] = json.dumps(base_params) + res.bias_correct(bc) + bc_ws = res._res_arrays['windspeed'] + assert np.allclose(bc_ws[:, 0], base_ws[:, 0]) + assert ((bc_ws[:, 1:] > base_ws[:, 1:]).sum() / bc_ws[:, 1:].size) < 1e-4 + + +def test_bias_correct_irrad_qdm(): + """Test empirical QDM bias correction function on irradiance""" + h5 = os.path.join(TESTDATADIR, 'nsrdb/ri_100_nsrdb_2012.h5') + n = 10 + sites = slice(0, n) + base_res = NSRDB.preload_SAM(h5, sites) + base_ghi = base_res._res_arrays['ghi'] + base_dni = base_res._res_arrays['dni'] + + base_params = list(np.linspace(0, 1300, 10)) + bc = pd.DataFrame({'gid': np.arange(n), + 'ghi_params_oh': json.dumps(base_params), + 'dni_params_oh': json.dumps(base_params), + 'ghi_params_mh': json.dumps(base_params), + 'dni_params_mh': json.dumps(base_params), + 'method': 'qdm_irrad'}, + ) + + res = NSRDB.preload_SAM(h5, sites) + res.bias_correct(bc) + assert np.allclose(base_res._res_arrays['ghi'], base_ghi) + assert np.allclose(base_res._res_arrays['dni'], base_dni) + + res = NSRDB.preload_SAM(h5, sites) + params = list(0.1*np.ones(10)) + bc['ghi_params_oh'] = json.dumps(params) + res.bias_correct(bc) + bc_ghi = res._res_arrays['ghi'] + bc_dni = res._res_arrays['dni'] + bc_dhi = res._res_arrays['dhi'] + assert (bc_ghi >= 0).all() & (bc_dni >= 0).all() & (bc_dhi >= 0).all() + assert (bc_ghi.mean(axis=0) < base_ghi.mean(axis=0)).all() + assert np.allclose(bc_dni, base_dni) + + res = NSRDB.preload_SAM(h5, sites) + params = list(0.1*np.ones(10)) + bc['ghi_params_oh'] = json.dumps(base_params) + bc['dni_params_oh'] = json.dumps(params) + res.bias_correct(bc) + bc_ghi = res._res_arrays['ghi'] + bc_dni = res._res_arrays['dni'] + bc_dhi = res._res_arrays['dhi'] + assert (bc_ghi >= 0).all() & (bc_dni >= 0).all() & (bc_dhi >= 0).all() + assert (bc_dni.mean(axis=0) < base_dni.mean(axis=0)).all() + assert np.allclose(bc_ghi, base_ghi) + + def execute_pytest(capture='all', flags='-rapP'): """Execute module as pytest with detailed summary report. From 3eadb3bbf2513bc3512c02932b39bc88d77eb0ea Mon Sep 17 00:00:00 2001 From: grantbuster Date: Fri, 5 Jan 2024 15:00:22 -0700 Subject: [PATCH 09/19] added cdf function for temporal stats --- rex/temporal_stats/temporal_stats.py | 34 ++++++++++++++++++++++++++++ tests/test_temporal_stats.py | 16 ++++++++++++- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/rex/temporal_stats/temporal_stats.py b/rex/temporal_stats/temporal_stats.py index 18de50a4..a411ae01 100644 --- a/rex/temporal_stats/temporal_stats.py +++ b/rex/temporal_stats/temporal_stats.py @@ -83,6 +83,40 @@ def circular_mean(data, weights=None, degrees=True, axis=0, return mean +def cdf(data, n=50, decimals=None): + """Get a number of x-values that define a CDF for the input data. + + Parameters + ---------- + data : np.ndarray + 1D array of data to make a CDF for + n : int + Number of points to fit the CDF + decimals : int | None + Precision to round output to (see docstring for np.round). None will + not round outputs (default). + + Returns + ------- + x : np.ndarray + 1D array of values with shape (n,). Each value is in the same units as + the input data argument. The x[0] is the minimum value of data (0th + percentile) and x[-1] is the maximum (100th percentile). The values are + evenly spaced in quantile space on the y-axis of the CDF. + """ + + p = np.linspace(0, 1, n) + x = np.interp(p, np.linspace(0, 1, len(data)), sorted(data)) + + assert x[0] == data.min() + assert x[-1] == data.max() + + if decimals is not None: + x = np.round(x, decimals=decimals) + + return x + + class TemporalStats: """ Temporal Statistics from Resource Data diff --git a/tests/test_temporal_stats.py b/tests/test_temporal_stats.py index d5702d77..a55e260e 100644 --- a/tests/test_temporal_stats.py +++ b/tests/test_temporal_stats.py @@ -13,7 +13,7 @@ from rex.multi_year_resource import MultiYearWindResource from rex.renewable_resource import WindResource, NSRDB -from rex.temporal_stats.temporal_stats import TemporalStats, circular_mean +from rex.temporal_stats.temporal_stats import TemporalStats, circular_mean, cdf from rex.temporal_stats.temporal_stats_cli import main from rex.utilities.loggers import LOGGERS from rex import TESTDATADIR @@ -281,6 +281,20 @@ def test_weighted_circular_means(weights): assert np.allclose(truth, test_stats[name].values, rtol=0, atol=0.01), msg +def test_cdf(n=21): + """Test the CDF function which gets evenly spaced values (in quantile + space) defining the empirical CDF of a dataset""" + data = np.random.normal(0, 1, (1000,)) + x = cdf(data, n=n, decimals=None) + + quantiles = np.linspace(0, 1, n) + assert quantiles[0] == 0 + assert quantiles[-1] == 1 + + for i, q in enumerate(quantiles): + assert np.allclose(np.percentile(data, 100 * q), x[i]) + + def test_mask_zeros(): """Test the irradiance stats functionality with zeros masked out""" res_h5 = os.path.join(TESTDATADIR, 'nsrdb/ri_100_nsrdb_2012.h5') From 3962d9d2e826bff712d10676bbd51e1c7c119cdb Mon Sep 17 00:00:00 2001 From: grantbuster Date: Fri, 5 Jan 2024 15:22:24 -0700 Subject: [PATCH 10/19] handle nans for temporal means cdfs --- rex/bias_correction.py | 4 ++-- rex/temporal_stats/temporal_stats.py | 16 ++++++++++++---- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/rex/bias_correction.py b/rex/bias_correction.py index 649df17f..5479f9e1 100644 --- a/rex/bias_correction.py +++ b/rex/bias_correction.py @@ -421,8 +421,8 @@ def qdm_irrad(ghi, dni, dhi, # This will prevent inverse CDF functions from returning zero resulting in # a divide by zero error in the calculation of the QDM delta. These zeros # get fixed later in _irrad_post_proc - ghi[ghi_zeros] = 1500 - dni[dni_zeros] = 1500 + ghi[ghi_zeros] = 1 + dni[dni_zeros] = 1 ghi = ghi_qdm(ghi) dni = dni_qdm(dni) diff --git a/rex/temporal_stats/temporal_stats.py b/rex/temporal_stats/temporal_stats.py index a411ae01..bed01bab 100644 --- a/rex/temporal_stats/temporal_stats.py +++ b/rex/temporal_stats/temporal_stats.py @@ -105,11 +105,19 @@ def cdf(data, n=50, decimals=None): evenly spaced in quantile space on the y-axis of the CDF. """ - p = np.linspace(0, 1, n) - x = np.interp(p, np.linspace(0, 1, len(data)), sorted(data)) + nan_mask = np.isnan(data) + if nan_mask.all(): + return np.zeros(n) - assert x[0] == data.min() - assert x[-1] == data.max() + p = np.linspace(0, 1, n) + x = np.interp(p, np.linspace(0, 1, len(data[~nan_mask])), + sorted(data[~nan_mask])) + + msg = (f'First and last points defining the CDF ({x[0]}, {x[-1]}) ' + f'were not the min and max data values ' + f'({np.nanmin(data)}, {np.nanmin(data)}).') + assert x[0] == np.nanmin(data), msg + assert x[-1] == np.nanmax(data), msg if decimals is not None: x = np.round(x, decimals=decimals) From 6d83f387673b52fd98e43bb325892bd486587cff Mon Sep 17 00:00:00 2001 From: grantbuster Date: Fri, 5 Jan 2024 15:22:24 -0700 Subject: [PATCH 11/19] handle nans for temporal means cdfs --- rex/temporal_stats/temporal_stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rex/temporal_stats/temporal_stats.py b/rex/temporal_stats/temporal_stats.py index bed01bab..7dd80f1b 100644 --- a/rex/temporal_stats/temporal_stats.py +++ b/rex/temporal_stats/temporal_stats.py @@ -83,7 +83,7 @@ def circular_mean(data, weights=None, degrees=True, axis=0, return mean -def cdf(data, n=50, decimals=None): +def cdf(data, n=10, decimals=None): """Get a number of x-values that define a CDF for the input data. Parameters From cffe740440f06cfc298d95c439b91b9bdee72850 Mon Sep 17 00:00:00 2001 From: grantbuster Date: Sat, 6 Jan 2024 12:20:44 -0700 Subject: [PATCH 12/19] added nonlinear sampling for CDF --- rex/temporal_stats/temporal_stats.py | 57 ++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 16 deletions(-) diff --git a/rex/temporal_stats/temporal_stats.py b/rex/temporal_stats/temporal_stats.py index 7dd80f1b..c6510a36 100644 --- a/rex/temporal_stats/temporal_stats.py +++ b/rex/temporal_stats/temporal_stats.py @@ -83,46 +83,71 @@ def circular_mean(data, weights=None, degrees=True, axis=0, return mean -def cdf(data, n=10, decimals=None): +def cdf(data, n_samples=50, sampling='linear', log_base=10, decimals=None): """Get a number of x-values that define a CDF for the input data. Parameters ---------- data : np.ndarray 1D array of data to make a CDF for - n : int + n_samples : int Number of points to fit the CDF + sampling : str + Option for quantile sampling, e.g., how to sample the y-axis of the + distribution. "linear" will do even spacing, "log" will concentrate + samples near quantile=0, and "invlog" will concentrate samples near + quantile=1 + log_base : int | float + Log base value if sampling is "log" or "invlog". A higher value will + concentrate more samples at the extreme sides of the distribution. decimals : int | None Precision to round output to (see docstring for np.round). None will not round outputs (default). Returns ------- - x : np.ndarray - 1D array of values with shape (n,). Each value is in the same units as - the input data argument. The x[0] is the minimum value of data (0th - percentile) and x[-1] is the maximum (100th percentile). The values are - evenly spaced in quantile space on the y-axis of the CDF. + x_values : np.ndarray + 1D array of values with shape (n_samples,). Each value is in the same + units as the input data argument. The x_values[0] is the minimum value + of data (0th percentile) and x_values[-1] is the maximum + (100th percentile). The values are spaced in quantile space (y-axis of + the CDF) according to the sampling option (e.g., evenly spaced if + sampling='linear'). """ nan_mask = np.isnan(data) if nan_mask.all(): - return np.zeros(n) + return np.zeros(n_samples) + + if sampling == 'linear': + quantiles = np.linspace(0, 1, n_samples) + elif sampling == 'log': + quantiles = np.logspace(0, 1, n_samples, base=log_base) + quantiles = (quantiles - 1) / (log_base - 1) + elif sampling == 'invlog': + quantiles = np.logspace(0, 1, n_samples, base=log_base) + quantiles = (quantiles - 1) / (log_base - 1) + quantiles = np.array(sorted(1 - quantiles)) + else: + msg = ('sampling option must be linear, log, or invlog, but received: ' + '{}'.format(sampling)) + logger.error(msg) + raise KeyError(msg) - p = np.linspace(0, 1, n) - x = np.interp(p, np.linspace(0, 1, len(data[~nan_mask])), - sorted(data[~nan_mask])) + x_values = np.interp(quantiles, np.linspace(0, 1, len(data[~nan_mask])), + sorted(data[~nan_mask])) - msg = (f'First and last points defining the CDF ({x[0]}, {x[-1]}) ' + msg = (f'First and last x-value points defining the CDF ' + '({x_values[0]}, {x_values[-1]}) ' f'were not the min and max data values ' f'({np.nanmin(data)}, {np.nanmin(data)}).') - assert x[0] == np.nanmin(data), msg - assert x[-1] == np.nanmax(data), msg + assert x_values[0] == np.nanmin(data), msg + assert x_values[-1] == np.nanmax(data), msg if decimals is not None: - x = np.round(x, decimals=decimals) + x_values = np.round(x_values, decimals=decimals) - return x + return x_values class TemporalStats: From d97d05191dbf59215a9bad2e9d8129c982655fba Mon Sep 17 00:00:00 2001 From: grantbuster Date: Sat, 6 Jan 2024 12:42:10 -0700 Subject: [PATCH 13/19] added option for nonlinear quantile sampling when doing empirical CDF bias correction --- rex/bias_correction.py | 93 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 81 insertions(+), 12 deletions(-) diff --git a/rex/bias_correction.py b/rex/bias_correction.py index 5479f9e1..59307591 100644 --- a/rex/bias_correction.py +++ b/rex/bias_correction.py @@ -169,7 +169,8 @@ class _QuantileDeltaMapping: Quantiles and Extremes? Journal of Climate 28, 6938–6959 (2015). """ - def __init__(self, params_oh, params_mh, params_mf, dist, relative): + def __init__(self, params_oh, params_mh, params_mf, dist='empirical', + relative=True, sampling='linear', log_base=10): """ Parameters ---------- @@ -203,13 +204,28 @@ def __init__(self, params_oh, params_mh, params_mf, dist, relative): Cannon et al., 2015 for more details. Can also be a 1D array of dist inputs if being used from reV, but they must all be the same option. + sampling : str | np.ndarray + If dist="empirical", this is an option for how the quantiles were + sampled to produce the params inputs, e.g., how to sample the + y-axis of the distribution. "linear" will do even spacing, "log" + will concentrate samples near quantile=0, and "invlog" will + concentrate samples near quantile=1. Can also be a 1D array of dist + inputs if being used from reV, but they must all be the same + option. + log_base : int | float | np.ndarray + Log base value if sampling is "log" or "invlog". A higher value + will concentrate more samples at the extreme sides of the + distribution. Can also be a 1D array of dist inputs if being used + from reV, but they must all be the same option. """ self.params_oh = params_oh self.params_mh = params_mh self.params_mf = params_mf if params_mf is not None else params_mh - self.relative = self._clean_kwarg(relative) - self.dist_name = self._clean_kwarg(dist) + self.relative = bool(self._clean_kwarg(relative)) + self.dist_name = str(self._clean_kwarg(dist)) + self.sampling = str(self._clean_kwarg(sampling)) + self.log_base = float(self._clean_kwarg(log_base)) self.scipy_dist = None if self.dist_name != 'empirical': @@ -273,6 +289,30 @@ def _clean_params(self, params, arr_shape): return params + def _get_quantiles(self, n_samples): + """If dist='empirical', this will get the quantile values for the CDF + x-values specified in the input params""" + + if self.sampling == 'linear': + quantiles = np.linspace(0, 1, n_samples) + + elif self.sampling == 'log': + quantiles = np.logspace(0, 1, n_samples, base=self.log_base) + quantiles = (quantiles - 1) / (self.log_base - 1) + + elif self.sampling == 'invlog': + quantiles = np.logspace(0, 1, n_samples, base=self.log_base) + quantiles = (quantiles - 1) / (self.log_base - 1) + quantiles = np.array(sorted(1 - quantiles)) + + else: + msg = ('sampling option must be linear, log, or invlog, but ' + 'received: {}'.format(self.sampling)) + logger.error(msg) + raise KeyError(msg) + + return quantiles + def cdf(self, x, params): """Run the CDF function e.g., convert physical variable to quantile""" @@ -280,9 +320,8 @@ def cdf(self, x, params): p = np.zeros_like(x) for idx in range(x.shape[1]): xp = params[idx, :] - fp = np.linspace(0, 1, len(xp)) + fp = self._get_quantiles(len(xp)) p[:, idx] = np.interp(x[:, idx], xp, fp) - else: p = self.scipy_dist.cdf(x, *params) @@ -296,7 +335,7 @@ def ppf(self, p, params): x = np.zeros_like(p) for idx in range(p.shape[1]): fp = params[idx, :] - xp = np.linspace(0, 1, len(fp)) + xp = self._get_quantiles(len(fp)) x[:, idx] = np.interp(p[:, idx], xp, fp) else: x = self.scipy_dist.ppf(p, *params) @@ -342,7 +381,8 @@ def qdm_irrad(ghi, dni, dhi, ghi_params_oh, dni_params_oh, ghi_params_mh, dni_params_mh, ghi_params_mf=None, dni_params_mf=None, - dist='empirical', relative=True): + dist='empirical', relative=True, + sampling='linear', log_base=10): """Correct irradiance using the quantile delta mapping based on the method from Cannon et al., 2015 @@ -400,6 +440,18 @@ def qdm_irrad(ghi, dni, dhi, Cannon et al., 2015 for more details. Can also be a 1D array of dist inputs if being used from reV, but they must all be the same option. + sampling : str | np.ndarray + If dist="empirical", this is an option for how the quantiles were + sampled to produce the params inputs, e.g., how to sample the + y-axis of the distribution. "linear" will do even spacing, "log" + will concentrate samples near quantile=0, and "invlog" will + concentrate samples near quantile=1. Can also be a 1D array of dist + inputs if being used from reV, but they must all be the same option. + log_base : int | float | np.ndarray + Log base value if sampling is "log" or "invlog". A higher value + will concentrate more samples at the extreme sides of the + distribution. Can also be a 1D array of dist inputs if being used from + reV, but they must all be the same option. Returns ------- @@ -414,9 +466,13 @@ def qdm_irrad(ghi, dni, dhi, ghi_zeros, dni_zeros, dhi_zeros, cos_sza = _irrad_pre_proc(ghi, dni, dhi) ghi_qdm = _QuantileDeltaMapping(ghi_params_oh, ghi_params_mh, - ghi_params_mf, dist, relative) + ghi_params_mf, dist=dist, + relative=relative, sampling=sampling, + log_base=log_base) dni_qdm = _QuantileDeltaMapping(dni_params_oh, dni_params_mh, - dni_params_mf, dist, relative) + dni_params_mf, dist=dist, + relative=relative, sampling=sampling, + log_base=log_base) # This will prevent inverse CDF functions from returning zero resulting in # a divide by zero error in the calculation of the QDM delta. These zeros @@ -434,7 +490,7 @@ def qdm_irrad(ghi, dni, dhi, def qdm_ws(ws, params_oh, params_mh, params_mf=None, dist='empirical', - relative=True): + relative=True, sampling='linear', log_base=10): """Correct windspeed using quantile delta mapping based on the method from Cannon et al., 2015 @@ -476,6 +532,18 @@ def qdm_ws(ws, params_oh, params_mh, params_mf=None, dist='empirical', Cannon et al., 2015 for more details. Can also be a 1D array of dist inputs if being used from reV, but they must all be the same option. + sampling : str | np.ndarray + If dist="empirical", this is an option for how the quantiles were + sampled to produce the params inputs, e.g., how to sample the + y-axis of the distribution. "linear" will do even spacing, "log" + will concentrate samples near quantile=0, and "invlog" will + concentrate samples near quantile=1. Can also be a 1D array of dist + inputs if being used from reV, but they must all be the same option. + log_base : int | float | np.ndarray + Log base value if sampling is "log" or "invlog". A higher value + will concentrate more samples at the extreme sides of the + distribution. Can also be a 1D array of dist inputs if being used from + reV, but they must all be the same option. Returns ------- @@ -483,8 +551,9 @@ def qdm_ws(ws, params_oh, params_mh, params_mf=None, dist='empirical', 2D array of windspeed values in shape (time, space) """ - qdm = _QuantileDeltaMapping(params_oh, params_mh, params_mf, dist, - relative) + qdm = _QuantileDeltaMapping(params_oh, params_mh, params_mf, dist=dist, + relative=relative, sampling=sampling, + log_base=log_base) ws = qdm(ws) ws = np.maximum(ws, 0) return ws From db2d5ae9724cc033ad3c7685e68c02511cb645bc Mon Sep 17 00:00:00 2001 From: grantbuster Date: Sun, 7 Jan 2024 14:39:28 -0700 Subject: [PATCH 14/19] more flexible shapes and protect against div0 --- rex/bias_correction.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/rex/bias_correction.py b/rex/bias_correction.py index 59307591..6afe610a 100644 --- a/rex/bias_correction.py +++ b/rex/bias_correction.py @@ -279,6 +279,10 @@ def _clean_params(self, params, arr_shape): msg = f'params must be 2D array but received {type(params)}' assert isinstance(params, np.ndarray), msg + + if len(params.shape) == 1: + params = np.expand_dims(params, 0) + msg = (f'params must be 2D array of shape ({arr_shape[1]}, N) ' f'but received shape {params.shape}') assert len(params.shape) == 2, msg @@ -356,18 +360,23 @@ def __call__(self, arr): Bias corrected copy of the input array with same shape. """ + if len(arr.shape) == 1: + arr = np.expand_dims(arr, 1) + params_oh = self._clean_params(self.params_oh, arr.shape) params_mh = self._clean_params(self.params_mh, arr.shape) params_mf = self._clean_params(self.params_mf, arr.shape) - p_mf = self.cdf(arr, params_mf) - x_oh = self.ppf(p_mf, params_oh) + q_mf = self.cdf(arr, params_mf) # Tau_m_p + x_oh = self.ppf(q_mf, params_oh) # x^_o:m_h:p + x_mh_mf = self.ppf(q_mf, params_mh) # F-1_mh[Tau_m_p] if self.relative: - delta = arr / self.ppf(p_mf, params_mh) + x_mh_mf[x_mh_mf == 0] = 0.001 # arbitrary limit to prevent div 0 + delta = arr / x_mh_mf arr_bc = x_oh * delta else: - delta = arr - self.ppf(p_mf, params_mh) + delta = arr - x_mh_mf arr_bc = x_oh + delta msg = ('Input shape {} does not match QDM bias corrected output ' @@ -554,6 +563,16 @@ def qdm_ws(ws, params_oh, params_mh, params_mf=None, dist='empirical', qdm = _QuantileDeltaMapping(params_oh, params_mh, params_mf, dist=dist, relative=relative, sampling=sampling, log_base=log_base) + + # This will prevent inverse CDF functions from returning zero resulting in + # a divide by zero error in the calculation of the QDM delta. These zeros + # get fixed later + ws_zeros = ws == 0 + ws[ws_zeros] = 0.01 + ws = qdm(ws) + ws = np.maximum(ws, 0) + ws[ws_zeros] = 0 + return ws From 215f6094282f01ce6463903f9b92dff025fdba69 Mon Sep 17 00:00:00 2001 From: grantbuster Date: Sun, 7 Jan 2024 14:59:56 -0700 Subject: [PATCH 15/19] more tests on log cdf --- tests/test_sam_resource.py | 4 ++-- tests/test_temporal_stats.py | 31 ++++++++++++++++++++++++++++--- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/tests/test_sam_resource.py b/tests/test_sam_resource.py index 17667219..13e3e1b2 100644 --- a/tests/test_sam_resource.py +++ b/tests/test_sam_resource.py @@ -586,13 +586,13 @@ def test_bias_correct_wind_qdm(): assert np.allclose(bc_ws, base_ws) res = WindResource.preload_SAM(h5, sites, hub_heights) - params = list(0.1*np.ones(10)) + params = sorted(np.random.uniform(10, 20, 10)) bc['params_oh'] = json.dumps(params) bc.loc[0, 'params_oh'] = json.dumps(base_params) res.bias_correct(bc) bc_ws = res._res_arrays['windspeed'] assert np.allclose(bc_ws[:, 0], base_ws[:, 0]) - assert ((bc_ws[:, 1:] > base_ws[:, 1:]).sum() / bc_ws[:, 1:].size) < 1e-4 + assert ((bc_ws[:, 1:] != base_ws[:, 1:]).sum() / bc_ws[:, 1:].size) > 0.99 def test_bias_correct_irrad_qdm(): diff --git a/tests/test_temporal_stats.py b/tests/test_temporal_stats.py index a55e260e..1354b69e 100644 --- a/tests/test_temporal_stats.py +++ b/tests/test_temporal_stats.py @@ -281,13 +281,38 @@ def test_weighted_circular_means(weights): assert np.allclose(truth, test_stats[name].values, rtol=0, atol=0.01), msg -def test_cdf(n=21): +@pytest.mark.parametrize("n_samples", [21, 50]) +def test_cdf_linear(n_samples): """Test the CDF function which gets evenly spaced values (in quantile space) defining the empirical CDF of a dataset""" data = np.random.normal(0, 1, (1000,)) - x = cdf(data, n=n, decimals=None) + x = cdf(data, n_samples=n_samples, decimals=None) - quantiles = np.linspace(0, 1, n) + assert x[0] == data.min() + assert x[-1] == data.max() + + quantiles = np.linspace(0, 1, n_samples) + assert quantiles[0] == 0 + assert quantiles[-1] == 1 + + for i, q in enumerate(quantiles): + assert np.allclose(np.percentile(data, 100 * q), x[i]) + + +@pytest.mark.parametrize(("n_samples", "log_base"), [(21, 10), (50, 2)]) +def test_cdf_invlog(n_samples, log_base): + """Test the CDF function which gets inverse-log-spaced spaced values + (in quantile space) defining the empirical CDF of a dataset""" + data = np.random.normal(0, 1, (10000,)) + x = cdf(data, n_samples=n_samples, decimals=None, sampling='invlog', + log_base=log_base) + + assert x[0] == data.min() + assert x[-1] == data.max() + + quantiles = np.logspace(0, 1, n_samples, base=log_base) + quantiles = (quantiles - 1) / (log_base - 1) + quantiles = np.array(sorted(1 - quantiles)) assert quantiles[0] == 0 assert quantiles[-1] == 1 From c70378d5e606677ad39cef86cee5560f887bf45a Mon Sep 17 00:00:00 2001 From: grantbuster Date: Mon, 8 Jan 2024 16:25:36 -0700 Subject: [PATCH 16/19] protect against bias correction table with no relevant gids --- rex/sam_resource.py | 3 +++ rex/utilities/bc_utils.py | 8 ++++++-- tests/test_sam_resource.py | 4 ++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/rex/sam_resource.py b/rex/sam_resource.py index a1119c5f..b504b9c9 100644 --- a/rex/sam_resource.py +++ b/rex/sam_resource.py @@ -721,6 +721,9 @@ def bias_correct(self, bc_df): bc_fun, bc_fun_kwargs, bool_bc = _parse_bc_table(bc_df, self.sites) + if not bool_bc.any(): + return + if 'ghi' in self._res_arrays and 'dni' in self._res_arrays: logger.debug('Bias correcting irradiance with function {} ' 'for sites {}'.format(bc_fun, self.sites)) diff --git a/rex/utilities/bc_utils.py b/rex/utilities/bc_utils.py index 224f935a..45995c70 100644 --- a/rex/utilities/bc_utils.py +++ b/rex/utilities/bc_utils.py @@ -61,7 +61,11 @@ def _parse_bc_table(bc_df, gids): gid_arr = np.array(gids) bool_bc = np.isin(gid_arr, bc_df.index.values) - if not bool_bc.all(): + + if not bool_bc.any(): + return None, {}, bool_bc + + elif not bool_bc.all(): missing = gid_arr[~bool_bc] msg = ('{} sites were missing from the bias correction table, ' 'not bias correcting: {}'.format(len(missing), missing)) @@ -88,7 +92,7 @@ def _parse_bc_table(bc_df, gids): # load serialized lists from string columns in bc_df into nested lists sample = bc_df[col].values[0] if isinstance(sample, str) and '[' in sample and ']' in sample: - bc_df[col] = bc_df[col].apply(json.loads) + bc_df.loc[:, col] = bc_df[col].apply(json.loads) arr = bc_df.loc[gid_arr[bool_bc], col].values diff --git a/tests/test_sam_resource.py b/tests/test_sam_resource.py index 13e3e1b2..3dd604c9 100644 --- a/tests/test_sam_resource.py +++ b/tests/test_sam_resource.py @@ -619,7 +619,7 @@ def test_bias_correct_irrad_qdm(): assert np.allclose(base_res._res_arrays['dni'], base_dni) res = NSRDB.preload_SAM(h5, sites) - params = list(0.1*np.ones(10)) + params = list(0.1 * np.ones(10)) bc['ghi_params_oh'] = json.dumps(params) res.bias_correct(bc) bc_ghi = res._res_arrays['ghi'] @@ -630,7 +630,7 @@ def test_bias_correct_irrad_qdm(): assert np.allclose(bc_dni, base_dni) res = NSRDB.preload_SAM(h5, sites) - params = list(0.1*np.ones(10)) + params = list(0.1 * np.ones(10)) bc['ghi_params_oh'] = json.dumps(base_params) bc['dni_params_oh'] = json.dumps(params) res.bias_correct(bc) From bcd1f69cee6e947eaa80ec6cdb72da032d492a5f Mon Sep 17 00:00:00 2001 From: grantbuster Date: Fri, 12 Jan 2024 12:12:27 -0700 Subject: [PATCH 17/19] pr comments, moved things around including new bc utils module that will store actual utilities and avoid recursive import from parse table --- rex/bias_correction.py | 259 ++----------------- rex/sam_resource.py | 4 +- rex/temporal_stats/temporal_stats.py | 15 +- rex/utilities/bc_parse_table.py | 106 ++++++++ rex/utilities/bc_utils.py | 357 ++++++++++++++++++++------- 5 files changed, 405 insertions(+), 336 deletions(-) create mode 100644 rex/utilities/bc_parse_table.py diff --git a/rex/bias_correction.py b/rex/bias_correction.py index 6afe610a..cfb945c6 100644 --- a/rex/bias_correction.py +++ b/rex/bias_correction.py @@ -2,10 +2,10 @@ """ Module to perform bias correction of renewable energy resource data """ -import scipy import numpy as np import logging +from rex.utilities.bc_utils import QuantileDeltaMapping logger = logging.getLogger(__name__) @@ -157,235 +157,6 @@ def lin_ws(ws, scalar=1, adder=0): return ws -class _QuantileDeltaMapping: - """Class for quantile delta mapping based on the method from - Cannon et al., 2015 - - Note that this is a utility class for implementing QDM and should not be - requested directly as a method in the reV/rex bias correction table input - - Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM - Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in - Quantiles and Extremes? Journal of Climate 28, 6938–6959 (2015). - """ - - def __init__(self, params_oh, params_mh, params_mf, dist='empirical', - relative=True, sampling='linear', log_base=10): - """ - Parameters - ---------- - params_oh : np.ndarray - 2D array of **observed historical** distribution parameters created - from a multi-year set of data where the shape is (space, N). This - can be the output of a parametric distribution fit like - ``scipy.stats.weibull_min.fit()`` where N is the number of - parameters for that distribution, or this can define the x-values - of N points from an empirical CDF that will be linearly - interpolated between. If this is an empirical CDF, this must - include the 0th and 100th percentile values and have even - percentile spacing between values. - params_mh : np.ndarray - Same requirements as params_oh. This input arg is for the **modeled - historical distribution**. - params_mf : np.ndarray | None - Same requirements as params_oh. This input arg is for the **modeled - future distribution**. If this is None, this defaults to params_mh - (no future data, just corrected to modeled historical distribution) - dist : str | np.ndarray - Probability distribution name to use to model the data which - determines how the param args are used. This can "empirical" or any - continuous distribution name from ``scipy.stats``. Can also be a 1D - array of dist inputs if being used from reV, but they must all be - the same option. - relative : bool | np.ndarray - Flag to preserve relative rather than absolute changes in - quantiles. relative=False (default) will multiply by the change in - quantiles while relative=True will add. See Equations 4-6 from - Cannon et al., 2015 for more details. Can also be a 1D array of - dist inputs if being used from reV, but they must all be the same - option. - sampling : str | np.ndarray - If dist="empirical", this is an option for how the quantiles were - sampled to produce the params inputs, e.g., how to sample the - y-axis of the distribution. "linear" will do even spacing, "log" - will concentrate samples near quantile=0, and "invlog" will - concentrate samples near quantile=1. Can also be a 1D array of dist - inputs if being used from reV, but they must all be the same - option. - log_base : int | float | np.ndarray - Log base value if sampling is "log" or "invlog". A higher value - will concentrate more samples at the extreme sides of the - distribution. Can also be a 1D array of dist inputs if being used - from reV, but they must all be the same option. - """ - - self.params_oh = params_oh - self.params_mh = params_mh - self.params_mf = params_mf if params_mf is not None else params_mh - self.relative = bool(self._clean_kwarg(relative)) - self.dist_name = str(self._clean_kwarg(dist)) - self.sampling = str(self._clean_kwarg(sampling)) - self.log_base = float(self._clean_kwarg(log_base)) - self.scipy_dist = None - - if self.dist_name != 'empirical': - self.scipy_dist = getattr(scipy.stats, self.dist_name, None) - if self.scipy_dist is None: - msg = ('Could not get requested distribution "{}" from ' - '``scipy.stats``. Please double check your spelling ' - 'and select "empirical" or one of the continuous ' - 'distribution options from here: ' - 'https://docs.scipy.org/doc/scipy/reference/stats.html' - .format(self.dist_name)) - logger.error(msg) - raise KeyError(msg) - - @staticmethod - def _clean_kwarg(inp): - """Clean any kwargs inputs (e.g., dist, relative) that might be - provided as an array and must be collapsed into a single string or - boolean value""" - unique = np.unique(inp) - msg = ('_QuantileDeltaMapping kwargs must have only one unique input ' - 'even if being called with arrays as part of reV but found: {}' - .format(unique)) - assert len(unique) == 1, msg - - while isinstance(inp, np.ndarray): - inp = inp[0] - - return inp - - def _clean_params(self, params, arr_shape): - """Verify and clean 2D parameter arrays for passing into empirical - distribution or scipy continuous distribution functions. - - Parameters - ---------- - params : np.ndarray - Input params shape should be (space, N) where N is the number of - parameters for the distribution. - arr_shape : tuple - Array shape should be (time, space). - - Returns - ------- - params : np.ndarray | list - If a scipy continuous dist is set, this output will be params - unpacked along axis=1 into a list so that the list entries - represent the scipy distribution parameters - (e.g., shape, scale, loc) and each list entry is of shape (space,) - """ - - msg = f'params must be 2D array but received {type(params)}' - assert isinstance(params, np.ndarray), msg - - if len(params.shape) == 1: - params = np.expand_dims(params, 0) - - msg = (f'params must be 2D array of shape ({arr_shape[1]}, N) ' - f'but received shape {params.shape}') - assert len(params.shape) == 2, msg - assert params.shape[0] == arr_shape[1], msg - - if self.scipy_dist is not None: - params = [params[:, i] for i in range(params.shape[1])] - - return params - - def _get_quantiles(self, n_samples): - """If dist='empirical', this will get the quantile values for the CDF - x-values specified in the input params""" - - if self.sampling == 'linear': - quantiles = np.linspace(0, 1, n_samples) - - elif self.sampling == 'log': - quantiles = np.logspace(0, 1, n_samples, base=self.log_base) - quantiles = (quantiles - 1) / (self.log_base - 1) - - elif self.sampling == 'invlog': - quantiles = np.logspace(0, 1, n_samples, base=self.log_base) - quantiles = (quantiles - 1) / (self.log_base - 1) - quantiles = np.array(sorted(1 - quantiles)) - - else: - msg = ('sampling option must be linear, log, or invlog, but ' - 'received: {}'.format(self.sampling)) - logger.error(msg) - raise KeyError(msg) - - return quantiles - - def cdf(self, x, params): - """Run the CDF function e.g., convert physical variable to quantile""" - - if self.scipy_dist is None: - p = np.zeros_like(x) - for idx in range(x.shape[1]): - xp = params[idx, :] - fp = self._get_quantiles(len(xp)) - p[:, idx] = np.interp(x[:, idx], xp, fp) - else: - p = self.scipy_dist.cdf(x, *params) - - return p - - def ppf(self, p, params): - """Run the inverse CDF function (percent point function) e.g., convert - quantile to physical variable""" - - if self.scipy_dist is None: - x = np.zeros_like(p) - for idx in range(p.shape[1]): - fp = params[idx, :] - xp = self._get_quantiles(len(fp)) - x[:, idx] = np.interp(p[:, idx], xp, fp) - else: - x = self.scipy_dist.ppf(p, *params) - - return x - - def __call__(self, arr): - """Run the QDM function to bias correct an array - - Parameters - ---------- - arr : np.ndarray - 2D array of values in shape (time, space) - - Returns - ------- - arr : np.ndarray - Bias corrected copy of the input array with same shape. - """ - - if len(arr.shape) == 1: - arr = np.expand_dims(arr, 1) - - params_oh = self._clean_params(self.params_oh, arr.shape) - params_mh = self._clean_params(self.params_mh, arr.shape) - params_mf = self._clean_params(self.params_mf, arr.shape) - - q_mf = self.cdf(arr, params_mf) # Tau_m_p - x_oh = self.ppf(q_mf, params_oh) # x^_o:m_h:p - x_mh_mf = self.ppf(q_mf, params_mh) # F-1_mh[Tau_m_p] - - if self.relative: - x_mh_mf[x_mh_mf == 0] = 0.001 # arbitrary limit to prevent div 0 - delta = arr / x_mh_mf - arr_bc = x_oh * delta - else: - delta = arr - x_mh_mf - arr_bc = x_oh + delta - - msg = ('Input shape {} does not match QDM bias corrected output ' - 'shape {}!'.format(arr.shape, arr_bc.shape)) - assert arr.shape == arr_bc.shape, msg - - return arr_bc - - def qdm_irrad(ghi, dni, dhi, ghi_params_oh, dni_params_oh, ghi_params_mh, dni_params_mh, @@ -452,7 +223,8 @@ def qdm_irrad(ghi, dni, dhi, sampling : str | np.ndarray If dist="empirical", this is an option for how the quantiles were sampled to produce the params inputs, e.g., how to sample the - y-axis of the distribution. "linear" will do even spacing, "log" + y-axis of the distribution (see sampling functions in + ``rex.utilities.bc_utils``). "linear" will do even spacing, "log" will concentrate samples near quantile=0, and "invlog" will concentrate samples near quantile=1. Can also be a 1D array of dist inputs if being used from reV, but they must all be the same option. @@ -474,14 +246,14 @@ def qdm_irrad(ghi, dni, dhi, ghi_zeros, dni_zeros, dhi_zeros, cos_sza = _irrad_pre_proc(ghi, dni, dhi) - ghi_qdm = _QuantileDeltaMapping(ghi_params_oh, ghi_params_mh, - ghi_params_mf, dist=dist, - relative=relative, sampling=sampling, - log_base=log_base) - dni_qdm = _QuantileDeltaMapping(dni_params_oh, dni_params_mh, - dni_params_mf, dist=dist, - relative=relative, sampling=sampling, - log_base=log_base) + ghi_qdm = QuantileDeltaMapping(ghi_params_oh, ghi_params_mh, + ghi_params_mf, dist=dist, + relative=relative, sampling=sampling, + log_base=log_base) + dni_qdm = QuantileDeltaMapping(dni_params_oh, dni_params_mh, + dni_params_mf, dist=dist, + relative=relative, sampling=sampling, + log_base=log_base) # This will prevent inverse CDF functions from returning zero resulting in # a divide by zero error in the calculation of the QDM delta. These zeros @@ -544,7 +316,8 @@ def qdm_ws(ws, params_oh, params_mh, params_mf=None, dist='empirical', sampling : str | np.ndarray If dist="empirical", this is an option for how the quantiles were sampled to produce the params inputs, e.g., how to sample the - y-axis of the distribution. "linear" will do even spacing, "log" + y-axis of the distribution (see sampling functions in + ``rex.utilities.bc_utils``). "linear" will do even spacing, "log" will concentrate samples near quantile=0, and "invlog" will concentrate samples near quantile=1. Can also be a 1D array of dist inputs if being used from reV, but they must all be the same option. @@ -560,9 +333,9 @@ def qdm_ws(ws, params_oh, params_mh, params_mf=None, dist='empirical', 2D array of windspeed values in shape (time, space) """ - qdm = _QuantileDeltaMapping(params_oh, params_mh, params_mf, dist=dist, - relative=relative, sampling=sampling, - log_base=log_base) + qdm = QuantileDeltaMapping(params_oh, params_mh, params_mf, dist=dist, + relative=relative, sampling=sampling, + log_base=log_base) # This will prevent inverse CDF functions from returning zero resulting in # a divide by zero error in the calculation of the QDM delta. These zeros diff --git a/rex/sam_resource.py b/rex/sam_resource.py index b504b9c9..204a70c5 100644 --- a/rex/sam_resource.py +++ b/rex/sam_resource.py @@ -9,7 +9,7 @@ from warnings import warn import logging -from rex.utilities.bc_utils import _parse_bc_table +from rex.utilities.bc_parse_table import parse_bc_table from rex.utilities.exceptions import (ResourceKeyError, ResourceRuntimeError, ResourceValueError, SAMInputWarning) from rex.utilities.parse_keys import parse_keys @@ -719,7 +719,7 @@ def bias_correct(self, bc_df): bias correction methods. """ - bc_fun, bc_fun_kwargs, bool_bc = _parse_bc_table(bc_df, self.sites) + bc_fun, bc_fun_kwargs, bool_bc = parse_bc_table(bc_df, self.sites) if not bool_bc.any(): return diff --git a/rex/temporal_stats/temporal_stats.py b/rex/temporal_stats/temporal_stats.py index c6510a36..59d520db 100644 --- a/rex/temporal_stats/temporal_stats.py +++ b/rex/temporal_stats/temporal_stats.py @@ -10,6 +10,8 @@ import pandas as pd from rex.resource import Resource +from rex.utilities.bc_utils import (sample_q_linear, sample_q_log, + sample_q_invlog) from rex.utilities.execution import SpawnProcessPool from rex.utilities.loggers import log_mem, log_versions, create_dirs from rex.utilities.utilities import get_lat_lon_cols, slice_sites @@ -93,7 +95,8 @@ def cdf(data, n_samples=50, sampling='linear', log_base=10, decimals=None): n_samples : int Number of points to fit the CDF sampling : str - Option for quantile sampling, e.g., how to sample the y-axis of the + Option for quantile sampling (see sampling functions in + ``rex.utilities.bc_utils``), e.g., how to sample the y-axis of the distribution. "linear" will do even spacing, "log" will concentrate samples near quantile=0, and "invlog" will concentrate samples near quantile=1 @@ -119,15 +122,13 @@ def cdf(data, n_samples=50, sampling='linear', log_base=10, decimals=None): if nan_mask.all(): return np.zeros(n_samples) + sampling = sampling.casefold() if sampling == 'linear': - quantiles = np.linspace(0, 1, n_samples) + quantiles = sample_q_linear(n_samples) elif sampling == 'log': - quantiles = np.logspace(0, 1, n_samples, base=log_base) - quantiles = (quantiles - 1) / (log_base - 1) + quantiles = sample_q_log(n_samples, log_base) elif sampling == 'invlog': - quantiles = np.logspace(0, 1, n_samples, base=log_base) - quantiles = (quantiles - 1) / (log_base - 1) - quantiles = np.array(sorted(1 - quantiles)) + quantiles = sample_q_invlog(n_samples, log_base) else: msg = ('sampling option must be linear, log, or invlog, but received: ' '{}'.format(sampling)) diff --git a/rex/utilities/bc_parse_table.py b/rex/utilities/bc_parse_table.py new file mode 100644 index 00000000..0f65f3df --- /dev/null +++ b/rex/utilities/bc_parse_table.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- +""" +rex bias correction utilities. +""" +import json +import numpy as np +import logging +from warnings import warn +import rex.bias_correction + + +logger = logging.getLogger(__name__) + + +def parse_bc_table(bc_df, gids): + """Parse the bias correction table for required bc functions and kwargs + + Parameters + ---------- + bc_df : pd.DataFrame + DataFrame with wind or solar resource bias correction table. This + must have columns "gid" and "method", where "gid" is the resource + file indices, and "method" is a function name from the + ``rex.bias_correction`` module. Only windspeed or GHI+DNI+DHI are + corrected, depending on the technology. See the + ``rex.bias_correction`` module for more details on available + bias correction methods. + gids : list | np.ndarray + Array of integer gids (spatial indices) from the source h5 file. + This is used to get the correct bias correction parameters from + ``bias_correct`` table based on its ``gid`` column + + Returns + ------- + bc_fun : function + Function from ``rex.bias_correction`` to use. + bc_fun_kwargs : dict + Kwargs from ``bc_df`` to input to ``bc_fun``. This may include extra + kwargs that are not required by ``bc_fun`` and should be cleaned before + passing to the function. + bool_bc : np.ndarray + 1D Boolean array with length equal to the ``gids`` input with ``True`` + where data has available bias correction inputs in ``bc_df`` and + ``False`` where not + """ + + if 'method' not in bc_df: + msg = ('Bias correction table provided, but "method" column not ' + 'found! Only see columns: {}. Need to specify "method" which ' + 'is a function name from `rex.bias_correction`' + .format(list(bc_df.columns))) + logger.error(msg) + raise KeyError(msg) + + if bc_df.index.name != 'gid': + if 'gid' not in bc_df: + msg = ('Bias correction table must have "gid" column but only ' + 'found: {}'.format(list(bc_df.columns))) + logger.error(msg) + raise KeyError(msg) + bc_df = bc_df.set_index('gid') + + gid_arr = np.array(gids) + bool_bc = np.isin(gid_arr, bc_df.index.values) + + if not bool_bc.any(): + return None, {}, bool_bc + + if not bool_bc.all(): + missing = gid_arr[~bool_bc] + msg = ('{} sites were missing from the bias correction table, ' + 'not bias correcting: {}'.format(len(missing), missing)) + logger.warning(msg) + warn(msg) + + fun_name = bc_df['method'].unique() + msg = ('rex bias correction currently only supports a single unique ' + 'bias correction method per chunk of sites but received: {}' + .format(fun_name)) + assert len(fun_name) == 1, msg + bc_fun = getattr(rex.bias_correction, fun_name[0], None) + if bc_fun is None: + avail = [x for x in dir(rex.bias_correction) if not x.startswith('_')] + msg = ('Could not find method name "{}" in ``rex.bias_correction`` ' + 'which has the available objects: {}' + .format(fun_name[0], avail)) + logger.error(msg) + raise KeyError(msg) + + bc_fun_kwargs = {} + for col in bc_df.columns: + + # load serialized lists from string columns in bc_df into nested lists + sample = bc_df[col].values[0] + if isinstance(sample, str) and '[' in sample and ']' in sample: + bc_df.loc[:, col] = bc_df[col].apply(json.loads) + + arr = bc_df.loc[gid_arr[bool_bc], col].values + + # nested lists in bc_df converted to arr of shape (space, N) + if isinstance(arr[0], (list, tuple)): + arr = np.array(arr.tolist()) + + bc_fun_kwargs[col] = arr + + return bc_fun, bc_fun_kwargs, bool_bc diff --git a/rex/utilities/bc_utils.py b/rex/utilities/bc_utils.py index 45995c70..d74183d3 100644 --- a/rex/utilities/bc_utils.py +++ b/rex/utilities/bc_utils.py @@ -2,104 +2,293 @@ """ rex bias correction utilities. """ -import json +import scipy import numpy as np import logging -from warnings import warn -import rex.bias_correction logger = logging.getLogger(__name__) -def _parse_bc_table(bc_df, gids): - """Parse the bias correction table for required bc functions and kwargs +def sample_q_linear(n_samples): + """Sample quantiles from 0 to 1 inclusive linearly with even spacing Parameters ---------- - bc_df : pd.DataFrame - DataFrame with wind or solar resource bias correction table. This - must have columns "gid" and "method", where "gid" is the resource - file indices, and "method" is a function name from the - ``rex.bias_correction`` module. Only windspeed or GHI+DNI+DHI are - corrected, depending on the technology. See the - ``rex.bias_correction`` module for more details on available - bias correction methods. - gids : list | np.ndarray - Array of integer gids (spatial indices) from the source h5 file. - This is used to get the correct bias correction parameters from - ``bias_correct`` table based on its ``gid`` column + n_samples : int + Number of points to sample between 0 and 1 Returns ------- - bc_fun : function - Function from ``rex.bias_correction`` to use. - bc_fun_kwargs : dict - Kwargs from ``bc_df`` to input to ``bc_fun``. This may include extra - kwargs that are not required by ``bc_fun`` and should be cleaned before - passing to the function. - bool_bc : np.ndarray - 1D Boolean array with length equal to the ``gids`` input with ``True`` - where data has available bias correction inputs in ``bc_df`` and - ``False`` where not + quantiles : np.ndarray + 1D array of evenly spaced samples from 0 to 1 """ + quantiles = np.linspace(0, 1, n_samples) + return quantiles - if 'method' not in bc_df: - msg = ('Bias correction table provided, but "method" column not ' - 'found! Only see columns: {}. Need to specify "method" which ' - 'is a function name from `rex.bias_correction`' - .format(list(bc_df.columns))) - raise KeyError(msg) - - if bc_df.index.name != 'gid': - if 'gid' not in bc_df: - msg = ('Bias correction table must have "gid" column but only ' - 'found: {}'.format(list(bc_df.columns))) + +def sample_q_log(n_samples, log_base): + """Sample quantiles from 0 to 1 while concentrating samples near quantile=0 + + Parameters + ---------- + n_samples : int + Number of points to sample between 0 and 1 + log_base : int | float + Log base value. A higher value will concentrate more samples at the + extreme sides of the distribution. + + Returns + ------- + quantiles : np.ndarray + 1D array of log-spaced samples from 0 to 1 + """ + quantiles = np.logspace(0, 1, n_samples, base=log_base) + quantiles = (quantiles - 1) / (log_base - 1) + return quantiles + + +def sample_q_invlog(n_samples, log_base): + """Sample quantiles from 0 to 1 while concentrating samples near quantile=1 + + Parameters + ---------- + n_samples : int + Number of points to sample between 0 and 1 + log_base : int | float + Log base value. A higher value will concentrate more samples at the + extreme sides of the distribution. + + Returns + ------- + quantiles : np.ndarray + 1D array of log-spaced samples from 0 to 1 + """ + quantiles = np.logspace(0, 1, n_samples, base=log_base) + quantiles = (quantiles - 1) / (log_base - 1) + quantiles = np.array(sorted(1 - quantiles)) + return quantiles + + +class QuantileDeltaMapping: + """Class for quantile delta mapping based on the method from + Cannon et al., 2015 + + Note that this is a utility class for implementing QDM and should not be + requested directly as a method in the reV/rex bias correction table input + + Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM + Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in + Quantiles and Extremes? Journal of Climate 28, 6938–6959 (2015). + """ + + def __init__(self, params_oh, params_mh, params_mf, dist='empirical', + relative=True, sampling='linear', log_base=10): + """ + Parameters + ---------- + params_oh : np.ndarray + 2D array of **observed historical** distribution parameters created + from a multi-year set of data where the shape is (space, N). This + can be the output of a parametric distribution fit like + ``scipy.stats.weibull_min.fit()`` where N is the number of + parameters for that distribution, or this can define the x-values + of N points from an empirical CDF that will be linearly + interpolated between. If this is an empirical CDF, this must + include the 0th and 100th percentile values and have even + percentile spacing between values. + params_mh : np.ndarray + Same requirements as params_oh. This input arg is for the **modeled + historical distribution**. + params_mf : np.ndarray | None + Same requirements as params_oh. This input arg is for the **modeled + future distribution**. If this is None, this defaults to params_mh + (no future data, just corrected to modeled historical distribution) + dist : str | np.ndarray + Probability distribution name to use to model the data which + determines how the param args are used. This can "empirical" or any + continuous distribution name from ``scipy.stats``. Can also be a 1D + array of dist inputs if being used from reV, but they must all be + the same option. + relative : bool | np.ndarray + Flag to preserve relative rather than absolute changes in + quantiles. relative=False (default) will multiply by the change in + quantiles while relative=True will add. See Equations 4-6 from + Cannon et al., 2015 for more details. Can also be a 1D array of + dist inputs if being used from reV, but they must all be the same + option. + sampling : str | np.ndarray + If dist="empirical", this is an option for how the quantiles were + sampled to produce the params inputs, e.g., how to sample the + y-axis of the distribution (see sampling functions in + ``rex.utilities.bc_utils``). "linear" will do even spacing, "log" + will concentrate samples near quantile=0, and "invlog" will + concentrate samples near quantile=1. Can also be a 1D array of dist + inputs if being used from reV, but they must all be the same + option. + log_base : int | float | np.ndarray + Log base value if sampling is "log" or "invlog". A higher value + will concentrate more samples at the extreme sides of the + distribution. Can also be a 1D array of dist inputs if being used + from reV, but they must all be the same option. + """ + + self.params_oh = params_oh + self.params_mh = params_mh + self.params_mf = params_mf if params_mf is not None else params_mh + self.relative = bool(self._clean_kwarg(relative)) + self.dist_name = str(self._clean_kwarg(dist)).casefold() + self.sampling = str(self._clean_kwarg(sampling)).casefold() + self.log_base = float(self._clean_kwarg(log_base)) + self.scipy_dist = None + + if self.dist_name != 'empirical': + self.scipy_dist = getattr(scipy.stats, self.dist_name, None) + if self.scipy_dist is None: + msg = ('Could not get requested distribution "{}" from ' + '``scipy.stats``. Please double check your spelling ' + 'and select "empirical" or one of the continuous ' + 'distribution options from here: ' + 'https://docs.scipy.org/doc/scipy/reference/stats.html' + .format(self.dist_name)) + logger.error(msg) + raise KeyError(msg) + + @staticmethod + def _clean_kwarg(inp): + """Clean any kwargs inputs (e.g., dist, relative) that might be + provided as an array and must be collapsed into a single string or + boolean value""" + unique = np.unique(inp) + msg = ('_QuantileDeltaMapping kwargs must have only one unique input ' + 'even if being called with arrays as part of reV but found: {}' + .format(unique)) + assert len(unique) == 1, msg + + while isinstance(inp, np.ndarray): + inp = inp[0] + + return inp + + def _clean_params(self, params, arr_shape): + """Verify and clean 2D parameter arrays for passing into empirical + distribution or scipy continuous distribution functions. + + Parameters + ---------- + params : np.ndarray + Input params shape should be (space, N) where N is the number of + parameters for the distribution. + arr_shape : tuple + Array shape should be (time, space). + + Returns + ------- + params : np.ndarray | list + If a scipy continuous dist is set, this output will be params + unpacked along axis=1 into a list so that the list entries + represent the scipy distribution parameters + (e.g., shape, scale, loc) and each list entry is of shape (space,) + """ + + msg = f'params must be 2D array but received {type(params)}' + assert isinstance(params, np.ndarray), msg + + if len(params.shape) == 1: + params = np.expand_dims(params, 0) + + msg = (f'params must be 2D array of shape ({arr_shape[1]}, N) ' + f'but received shape {params.shape}') + assert len(params.shape) == 2, msg + assert params.shape[0] == arr_shape[1], msg + + if self.scipy_dist is not None: + params = [params[:, i] for i in range(params.shape[1])] + + return params + + def _get_quantiles(self, n_samples): + """If dist='empirical', this will get the quantile values for the CDF + x-values specified in the input params""" + + if self.sampling == 'linear': + quantiles = sample_q_linear(n_samples) + elif self.sampling == 'log': + quantiles = sample_q_log(n_samples, self.log_base) + elif self.sampling == 'invlog': + quantiles = sample_q_invlog(n_samples, self.log_base) + else: + msg = ('sampling option must be linear, log, or invlog, but ' + 'received: {}'.format(self.sampling)) logger.error(msg) raise KeyError(msg) - bc_df = bc_df.set_index('gid') - - gid_arr = np.array(gids) - bool_bc = np.isin(gid_arr, bc_df.index.values) - - if not bool_bc.any(): - return None, {}, bool_bc - - elif not bool_bc.all(): - missing = gid_arr[~bool_bc] - msg = ('{} sites were missing from the bias correction table, ' - 'not bias correcting: {}'.format(len(missing), missing)) - logger.warning(msg) - warn(msg) - - fun_name = bc_df['method'].unique() - msg = ('rex bias correction currently only supports a single unique ' - 'bias correction method per chunk of sites but received: {}' - .format(fun_name)) - assert len(fun_name) == 1, msg - bc_fun = getattr(rex.bias_correction, fun_name[0], None) - if bc_fun is None: - avail = [x for x in dir(rex.bias_correction) if not x.startswith('_')] - msg = ('Could not find method name "{}" in ``rex.bias_correction`` ' - 'which has the available objects: {}' - .format(fun_name[0], avail)) - logger.error(msg) - raise KeyError(msg) - - bc_fun_kwargs = {} - for col in bc_df.columns: - - # load serialized lists from string columns in bc_df into nested lists - sample = bc_df[col].values[0] - if isinstance(sample, str) and '[' in sample and ']' in sample: - bc_df.loc[:, col] = bc_df[col].apply(json.loads) - - arr = bc_df.loc[gid_arr[bool_bc], col].values - - # nested lists in bc_df converted to arr of shape (space, N) - if isinstance(arr[0], (list, tuple)): - arr = np.array(arr.tolist()) - - bc_fun_kwargs[col] = arr - - return bc_fun, bc_fun_kwargs, bool_bc + + return quantiles + + def cdf(self, x, params): + """Run the CDF function e.g., convert physical variable to quantile""" + + if self.scipy_dist is None: + p = np.zeros_like(x) + for idx in range(x.shape[1]): + xp = params[idx, :] + fp = self._get_quantiles(len(xp)) + p[:, idx] = np.interp(x[:, idx], xp, fp) + else: + p = self.scipy_dist.cdf(x, *params) + + return p + + def ppf(self, p, params): + """Run the inverse CDF function (percent point function) e.g., convert + quantile to physical variable""" + + if self.scipy_dist is None: + x = np.zeros_like(p) + for idx in range(p.shape[1]): + fp = params[idx, :] + xp = self._get_quantiles(len(fp)) + x[:, idx] = np.interp(p[:, idx], xp, fp) + else: + x = self.scipy_dist.ppf(p, *params) + + return x + + def __call__(self, arr): + """Run the QDM function to bias correct an array + + Parameters + ---------- + arr : np.ndarray + 2D array of values in shape (time, space) + + Returns + ------- + arr : np.ndarray + Bias corrected copy of the input array with same shape. + """ + + if len(arr.shape) == 1: + arr = np.expand_dims(arr, 1) + + params_oh = self._clean_params(self.params_oh, arr.shape) + params_mh = self._clean_params(self.params_mh, arr.shape) + params_mf = self._clean_params(self.params_mf, arr.shape) + + q_mf = self.cdf(arr, params_mf) # Tau_m_p + x_oh = self.ppf(q_mf, params_oh) # x^_o:m_h:p + x_mh_mf = self.ppf(q_mf, params_mh) # F-1_mh[Tau_m_p] + + if self.relative: + x_mh_mf[x_mh_mf == 0] = 0.001 # arbitrary limit to prevent div 0 + delta = arr / x_mh_mf + arr_bc = x_oh * delta + else: + delta = arr - x_mh_mf + arr_bc = x_oh + delta + + msg = ('Input shape {} does not match QDM bias corrected output ' + 'shape {}!'.format(arr.shape, arr_bc.shape)) + assert arr.shape == arr_bc.shape, msg + + return arr_bc From f58c64662771f2aedce98942d6adca9b427893f9 Mon Sep 17 00:00:00 2001 From: grantbuster Date: Fri, 12 Jan 2024 12:13:01 -0700 Subject: [PATCH 18/19] increment rex version to support reV bias correction dependency --- rex/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rex/version.py b/rex/version.py index 091abf93..dd92f834 100644 --- a/rex/version.py +++ b/rex/version.py @@ -1,3 +1,3 @@ """rex Version number""" -__version__ = "0.2.84" +__version__ = "0.2.85" From 634ec22543e130e9d019c2154521349c8104aa72 Mon Sep 17 00:00:00 2001 From: grantbuster Date: Tue, 16 Jan 2024 10:45:16 -0700 Subject: [PATCH 19/19] added more explicit QDM equation references to Cannon --- rex/utilities/bc_utils.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/rex/utilities/bc_utils.py b/rex/utilities/bc_utils.py index d74183d3..6cb415ea 100644 --- a/rex/utilities/bc_utils.py +++ b/rex/utilities/bc_utils.py @@ -275,17 +275,23 @@ def __call__(self, arr): params_mh = self._clean_params(self.params_mh, arr.shape) params_mf = self._clean_params(self.params_mf, arr.shape) - q_mf = self.cdf(arr, params_mf) # Tau_m_p - x_oh = self.ppf(q_mf, params_oh) # x^_o:m_h:p - x_mh_mf = self.ppf(q_mf, params_mh) # F-1_mh[Tau_m_p] + # Equation references are from Section 3 of Cannon et al 2015: + # Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM + # Precipitation by Quantile Mapping: How Well Do Methods Preserve + # Changes in Quantiles and Extremes? Journal of Climate 28, 6938–6959 + # (2015). + + q_mf = self.cdf(arr, params_mf) # Eq.3: Tau_m_p = F_m_p(x_m_p) + x_oh = self.ppf(q_mf, params_oh) # Eq.5: x^_o:m_h:p = F-1_o_h(Tau_m_p) + x_mh_mf = self.ppf(q_mf, params_mh) # Eq.4 denom: F-1_m_h(Tau_m_p) if self.relative: x_mh_mf[x_mh_mf == 0] = 0.001 # arbitrary limit to prevent div 0 - delta = arr / x_mh_mf - arr_bc = x_oh * delta + delta = arr / x_mh_mf # Eq.4: x_m_p / F-1_m_h(Tau_m_p) + arr_bc = x_oh * delta # Eq.6: x^_m_p = x^_o:m_h:p * delta else: - delta = arr - x_mh_mf - arr_bc = x_oh + delta + delta = arr - x_mh_mf # Eq.4: x_m_p - F-1_m_h(Tau_m_p) + arr_bc = x_oh + delta # Eq.6: x^_m_p = x^_o:m_h:p + delta msg = ('Input shape {} does not match QDM bias corrected output ' 'shape {}!'.format(arr.shape, arr_bc.shape))