diff --git a/rex/bias_correction.py b/rex/bias_correction.py new file mode 100644 index 00000000..cfb945c6 --- /dev/null +++ b/rex/bias_correction.py @@ -0,0 +1,351 @@ +# -*- coding: utf-8 -*- +""" +Module to perform bias correction of renewable energy resource data +""" +import numpy as np +import logging + +from rex.utilities.bc_utils import QuantileDeltaMapping + +logger = logging.getLogger(__name__) + + +def _irrad_pre_proc(ghi, dni, dhi): + """Irradiance data pre processing to get ancillary variables + (run before bias correction). + + Parameters + ---------- + ghi : np.ndarray + 2D array of global horizontal irradiance values in shape (time, space) + dni : np.ndarray + 2D array of direct normal irradiance values in shape (time, space) + dhi : np.ndarray + 2D array of diffuse horizontal irradiance values in shape (time, space) + + Returns + ------- + ghi_zeros : np.ndarray + 2D boolean array, True where ghi==0, same shape as ghi input + dni_zeros : np.ndarray + 2D boolean array, True where dni==0, same shape as dni input + dhi_zeros : np.ndarray + 2D boolean array, True where dhi==0, same shape as dhi input + cos_sza : np.ndarray + 2D array for cos(solar_zenith_angle) calculated from the basic + relationship ``cos_sza = (ghi - dhi) / dni`` + """ + + ghi_zeros = ghi == 0 + dni_zeros = dni == 0 + dhi_zeros = dhi == 0 + with np.errstate(divide='ignore', invalid='ignore'): + cos_sza = (ghi - dhi) / dni + + return ghi_zeros, dni_zeros, dhi_zeros, cos_sza + + +def _irrad_post_proc(ghi, dni, ghi_zeros, dni_zeros, dhi_zeros, cos_sza): + """Irradiance data post processing to calculate DHI and set limits on + irradiance variables (run after bias correction). + + Parameters + ---------- + ghi : np.ndarray + 2D array of global horizontal irradiance values in shape (time, space) + dni : np.ndarray + 2D array of direct normal irradiance values in shape (time, space) + ghi_zeros : np.ndarray + 2D boolean array, True where ghi==0, same shape as ghi input + dni_zeros : np.ndarray + 2D boolean array, True where dni==0, same shape as dni input + dhi_zeros : np.ndarray + 2D boolean array, True where dhi==0, same shape as dhi input + cos_sza : np.ndarray + 2D array for cos(solar_zenith_angle) calculated from the basic + relationship ``cos_sza = (ghi - dhi) / dni`` + + Returns + ------- + ghi : np.ndarray + 2D array of global horizontal irradiance values in shape (time, space) + dni : np.ndarray + 2D array of direct normal irradiance values in shape (time, space) + dhi : np.ndarray + 2D array of diffuse horizontal irradiance values in shape (time, space) + """ + + ghi = np.maximum(0, ghi) + dni = np.maximum(0, dni) + + ghi[ghi_zeros] = 0 + dni[dni_zeros] = 0 + + with np.errstate(divide='ignore', invalid='ignore'): + dhi = ghi - (dni * cos_sza) + + dhi[dni_zeros] = ghi[dni_zeros] + dhi[dhi_zeros] = 0 + dhi = np.maximum(0, dhi) + + assert not np.isnan(dhi).any() + + return ghi, dni, dhi + + +def lin_irrad(ghi, dni, dhi, scalar=1, adder=0): + """Correct GHI and DNI using linear correction factors. Both irradiance + variables are corrected as ``irradiance * scalar + adder``. DHI is + preserved based on the relationship ``dhi = ghi - (dni * cos(sza))``. Times + when GHI and DNI are zero are preserved and negative values are protected + against. + + Parameters + ---------- + ghi : np.ndarray + 2D array of global horizontal irradiance values in shape (time, space) + dni : np.ndarray + 2D array of direct normal irradiance values in shape (time, space) + dhi : np.ndarray + 2D array of diffuse horizontal irradiance values in shape (time, space) + scalar : np.ndarray + 1D array of linear scalar values in the shape (space,) + adder : np.ndarray + 1D array of linear adder values in the shape (space,) + + Returns + ------- + ghi : np.ndarray + 2D array of global horizontal irradiance values in shape (time, space) + dni : np.ndarray + 2D array of direct normal irradiance values in shape (time, space) + dhi : np.ndarray + 2D array of diffuse horizontal irradiance values in shape (time, space) + """ + + ghi_zeros, dni_zeros, dhi_zeros, cos_sza = _irrad_pre_proc(ghi, dni, dhi) + + ghi = ghi * scalar + adder + dni = dni * scalar + adder + + ghi, dni, dhi = _irrad_post_proc(ghi, dni, ghi_zeros, dni_zeros, dhi_zeros, + cos_sza) + + return ghi, dni, dhi + + +def lin_ws(ws, scalar=1, adder=0): + """Correct windspeed using linear correction factors. Windspeed is + corrected as ``windspeed * scalar + adder`` with a minimum of zero. + + Parameters + ---------- + ws : np.ndarray + 2D array of windspeed values in shape (time, space) + scalar : np.ndarray + 1D array of linear scalar values in the shape (space,) + adder : np.ndarray + 1D array of linear adder values in the shape (space,) + + Returns + ------- + ws : np.ndarray + 2D array of windspeed values in shape (time, space) + """ + ws = ws * scalar + adder + ws = np.maximum(ws, 0) + return ws + + +def qdm_irrad(ghi, dni, dhi, + ghi_params_oh, dni_params_oh, + ghi_params_mh, dni_params_mh, + ghi_params_mf=None, dni_params_mf=None, + dist='empirical', relative=True, + sampling='linear', log_base=10): + """Correct irradiance using the quantile delta mapping based on the method + from Cannon et al., 2015 + + Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM + Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in + Quantiles and Extremes? Journal of Climate 28, 6938–6959 (2015). + + Parameters + ---------- + ghi : np.ndarray + 2D array of global horizontal irradiance values in shape (time, space) + dni : np.ndarray + 2D array of direct normal irradiance values in shape (time, space) + dhi : np.ndarray + 2D array of diffuse horizontal irradiance values in shape (time, space) + ghi_params_oh : np.ndarray | list + 2D array of **observed historical** distribution parameters created + from a multi-year set of data where the shape is (space, N). This + can be the output of a parametric distribution fit like + ``scipy.stats.weibull_min.fit()`` where N is the number of + parameters for that distribution, or this can define the x-values + of N points from an empirical CDF that will be linearly + interpolated between. If this is an empirical CDF, this must + include the 0th and 100th percentile values and have even + percentile spacing between values. + dni_params_oh : np.ndarray | list + Same requirements as ghi_params_oh. This input arg is for the + **observed historical distribution** for DNI. + ghi_params_mh : np.ndarray | list + Same requirements as ghi_params_oh. This input arg is for the **modeled + historical distribution** for GHI. + dni_params_mh : np.ndarray | list + Same requirements as ghi_params_oh. This input arg is for the **modeled + historical distribution** for DNI. + ghi_params_mf : np.ndarray | list | None + Same requirements as ghi_params_oh. This input arg is for the **modeled + future distribution** for GHI. If this is None, this defaults to + ghi_params_mh (no future data, just corrected to modeled historical + distribution) + dni_params_mf : np.ndarray | list | None + Same requirements as ghi_params_oh. This input arg is for the **modeled + future distribution** for DNI. If this is None, this defaults to + dni_params_mh. (no future data, just corrected to modeled historical + distribution) + dist : str | np.ndarray + Probability distribution name to use to model the data which + determines how the param args are used. This can "empirical" or any + continuous distribution name from ``scipy.stats``. Can also be a 1D + array of dist inputs if being used from reV, but they must all be + the same option. + relative : bool | np.ndarray + Flag to preserve relative rather than absolute changes in + quantiles. relative=False (default) will multiply by the change in + quantiles while relative=True will add. See Equations 4-6 from + Cannon et al., 2015 for more details. Can also be a 1D array of + dist inputs if being used from reV, but they must all be the same + option. + sampling : str | np.ndarray + If dist="empirical", this is an option for how the quantiles were + sampled to produce the params inputs, e.g., how to sample the + y-axis of the distribution (see sampling functions in + ``rex.utilities.bc_utils``). "linear" will do even spacing, "log" + will concentrate samples near quantile=0, and "invlog" will + concentrate samples near quantile=1. Can also be a 1D array of dist + inputs if being used from reV, but they must all be the same option. + log_base : int | float | np.ndarray + Log base value if sampling is "log" or "invlog". A higher value + will concentrate more samples at the extreme sides of the + distribution. Can also be a 1D array of dist inputs if being used from + reV, but they must all be the same option. + + Returns + ------- + ghi : np.ndarray + 2D array of global horizontal irradiance values in shape (time, space) + dni : np.ndarray + 2D array of direct normal irradiance values in shape (time, space) + dhi : np.ndarray + 2D array of diffuse horizontal irradiance values in shape (time, space) + """ + + ghi_zeros, dni_zeros, dhi_zeros, cos_sza = _irrad_pre_proc(ghi, dni, dhi) + + ghi_qdm = QuantileDeltaMapping(ghi_params_oh, ghi_params_mh, + ghi_params_mf, dist=dist, + relative=relative, sampling=sampling, + log_base=log_base) + dni_qdm = QuantileDeltaMapping(dni_params_oh, dni_params_mh, + dni_params_mf, dist=dist, + relative=relative, sampling=sampling, + log_base=log_base) + + # This will prevent inverse CDF functions from returning zero resulting in + # a divide by zero error in the calculation of the QDM delta. These zeros + # get fixed later in _irrad_post_proc + ghi[ghi_zeros] = 1 + dni[dni_zeros] = 1 + + ghi = ghi_qdm(ghi) + dni = dni_qdm(dni) + + ghi, dni, dhi = _irrad_post_proc(ghi, dni, ghi_zeros, dni_zeros, dhi_zeros, + cos_sza) + + return ghi, dni, dhi + + +def qdm_ws(ws, params_oh, params_mh, params_mf=None, dist='empirical', + relative=True, sampling='linear', log_base=10): + """Correct windspeed using quantile delta mapping based on the method from + Cannon et al., 2015 + + Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM + Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in + Quantiles and Extremes? Journal of Climate 28, 6938–6959 (2015). + + Parameters + ---------- + ws : np.ndarray + 2D array of windspeed values in shape (time, space) + params_oh : np.ndarray | list + 2D array of **observed historical** distribution parameters created + from a multi-year set of data where the shape is (space, N). This + can be the output of a parametric distribution fit like + ``scipy.stats.weibull_min.fit()`` where N is the number of + parameters for that distribution, or this can define the x-values + of N points from an empirical CDF that will be linearly + interpolated between. If this is an empirical CDF, this must + include the 0th and 100th percentile values and have even + percentile spacing between values. + params_mh : np.ndarray | list + Same requirements as params_oh. This input arg is for the **modeled + historical distribution**. + params_mf : np.ndarray | list | None + Same requirements as params_oh. This input arg is for the **modeled + future distribution**. If this is None, this defaults to params_mh + (no future data, just corrected to modeled historical distribution) + dist : str | np.ndarray + Probability distribution name to use to model the data which + determines how the param args are used. This can "empirical" or any + continuous distribution name from ``scipy.stats``. Can also be a 1D + array of dist inputs if being used from reV, but they must all be + the same option. + relative : bool | np.ndarray + Flag to preserve relative rather than absolute changes in + quantiles. relative=False (default) will multiply by the change in + quantiles while relative=True will add. See Equations 4-6 from + Cannon et al., 2015 for more details. Can also be a 1D array of + dist inputs if being used from reV, but they must all be the same + option. + sampling : str | np.ndarray + If dist="empirical", this is an option for how the quantiles were + sampled to produce the params inputs, e.g., how to sample the + y-axis of the distribution (see sampling functions in + ``rex.utilities.bc_utils``). "linear" will do even spacing, "log" + will concentrate samples near quantile=0, and "invlog" will + concentrate samples near quantile=1. Can also be a 1D array of dist + inputs if being used from reV, but they must all be the same option. + log_base : int | float | np.ndarray + Log base value if sampling is "log" or "invlog". A higher value + will concentrate more samples at the extreme sides of the + distribution. Can also be a 1D array of dist inputs if being used from + reV, but they must all be the same option. + + Returns + ------- + ws : np.ndarray + 2D array of windspeed values in shape (time, space) + """ + + qdm = QuantileDeltaMapping(params_oh, params_mh, params_mf, dist=dist, + relative=relative, sampling=sampling, + log_base=log_base) + + # This will prevent inverse CDF functions from returning zero resulting in + # a divide by zero error in the calculation of the QDM delta. These zeros + # get fixed later + ws_zeros = ws == 0 + ws[ws_zeros] = 0.01 + + ws = qdm(ws) + + ws = np.maximum(ws, 0) + ws[ws_zeros] = 0 + + return ws diff --git a/rex/sam_resource.py b/rex/sam_resource.py index ff59c786..204a70c5 100644 --- a/rex/sam_resource.py +++ b/rex/sam_resource.py @@ -3,11 +3,13 @@ Module to handle SAM Resource iterator to create site by site resource DataFrames """ +from inspect import signature import numpy as np import pandas as pd from warnings import warn import logging +from rex.utilities.bc_parse_table import parse_bc_table from rex.utilities.exceptions import (ResourceKeyError, ResourceRuntimeError, ResourceValueError, SAMInputWarning) from rex.utilities.parse_keys import parse_keys @@ -709,73 +711,53 @@ def bias_correct(self, bc_df): ---------- bc_df : pd.DataFrame DataFrame with wind or solar resource bias correction table. This - has columns: gid, adder, scalar. If either adder or scalar is not - present, scalar defaults to 1 and adder to 0. Only windspeed or - GHI+DNI are corrected depending on the technology. GHI and DNI are - corrected with the same correction factors. + must have columns "gid" and "method", where "gid" is the resource + file indices, and "method" is a function name from the + ``rex.bias_correction`` module. Only windspeed or GHI+DNI+DHI are + corrected, depending on the technology. See the + ``rex.bias_correction`` module for more details on available + bias correction methods. """ - if 'adder' not in bc_df: - logger.info('Bias correction table provided, but "adder" not ' - 'found, defaulting to 0.') - bc_df['adder'] = 0 - - if 'scalar' not in bc_df: - logger.info('Bias correction table provided, but "scalar" not ' - 'found, defaulting to 1.') - bc_df['scalar'] = 1 - - if bc_df.index.name != 'gid': - msg = ('Bias correction table must have "gid" column but only ' - 'found: {}'.format(list(bc_df.columns))) - assert 'gid' in bc_df, msg - bc_df = bc_df.set_index('gid') - - site_arr = np.array(self.sites) - isin = np.isin(self.sites, bc_df.index.values) - if not isin.all(): - missing = site_arr[~isin] - msg = ('{} sites were missing from the bias correction table, ' - 'not bias correcting: {}'.format(len(missing), missing)) - logger.warning(msg) - warn(msg) - - scalar = np.expand_dims(bc_df.loc[site_arr[isin], 'scalar'].values, 0) - adder = np.expand_dims(bc_df.loc[site_arr[isin], 'adder'].values, 0) + bc_fun, bc_fun_kwargs, bool_bc = parse_bc_table(bc_df, self.sites) + + if not bool_bc.any(): + return if 'ghi' in self._res_arrays and 'dni' in self._res_arrays: + logger.debug('Bias correcting irradiance with function {} ' + 'for sites {}'.format(bc_fun, self.sites)) ghi = self._res_arrays['ghi'] dni = self._res_arrays['dni'] dhi = self._res_arrays['dhi'] - ghi_zeros = ghi == 0 - dni_zeros = dni == 0 - dhi_zeros = dhi == 0 - with np.errstate(divide='ignore', invalid='ignore'): - cos_sza = (ghi - dhi) / dni - - ghi[:, isin] = ghi[:, isin] * scalar + adder - dni[:, isin] = dni[:, isin] * scalar + adder - ghi = np.maximum(0, ghi) - dni = np.maximum(0, dni) - ghi[ghi_zeros] = 0 - dni[dni_zeros] = 0 - with np.errstate(divide='ignore', invalid='ignore'): - dhi[:, isin] = ghi[:, isin] - (dni[:, isin] * cos_sza[:, isin]) - - dhi[dni_zeros] = ghi[dni_zeros] - dhi = np.maximum(0, dhi) - dhi[dhi_zeros] = 0 - assert not np.isnan(dhi).any() + + bc_fun_kwargs['ghi'] = ghi[:, bool_bc] + bc_fun_kwargs['dni'] = dni[:, bool_bc] + bc_fun_kwargs['dhi'] = dhi[:, bool_bc] + + sig = signature(bc_fun) + bc_fun_kwargs = {k: v for k, v in bc_fun_kwargs.items() + if k in sig.parameters} + out = bc_fun(**bc_fun_kwargs) + + ghi[:, bool_bc] = out[0][:, bool_bc] + dni[:, bool_bc] = out[1][:, bool_bc] + dhi[:, bool_bc] = out[2][:, bool_bc] + self._res_arrays['ghi'] = ghi self._res_arrays['dni'] = dni self._res_arrays['dhi'] = dhi elif 'windspeed' in self._res_arrays: - logger.debug('Bias correcting windspeeds.') - arr = self._res_arrays['windspeed'] - arr[:, isin] = arr[:, isin] * scalar + adder - arr = np.maximum(arr, 0) - self._res_arrays['windspeed'] = arr + logger.debug('Bias correcting windspeed with function {} ' + 'for sites {}'.format(bc_fun, self.sites)) + ws = self._res_arrays['windspeed'] + bc_fun_kwargs['ws'] = ws[:, bool_bc] + sig = signature(bc_fun) + bc_fun_kwargs = {k: v for k, v in bc_fun_kwargs.items() + if k in sig.parameters} + ws[:, bool_bc] = bc_fun(**bc_fun_kwargs) + self._res_arrays['windspeed'] = ws if self._mean_arrays is not None: # pylint: disable=consider-iterating-dictionary diff --git a/rex/temporal_stats/temporal_stats.py b/rex/temporal_stats/temporal_stats.py index 3b17b934..59d520db 100644 --- a/rex/temporal_stats/temporal_stats.py +++ b/rex/temporal_stats/temporal_stats.py @@ -10,6 +10,8 @@ import pandas as pd from rex.resource import Resource +from rex.utilities.bc_utils import (sample_q_linear, sample_q_log, + sample_q_invlog) from rex.utilities.execution import SpawnProcessPool from rex.utilities.loggers import log_mem, log_versions, create_dirs from rex.utilities.utilities import get_lat_lon_cols, slice_sites @@ -83,6 +85,72 @@ def circular_mean(data, weights=None, degrees=True, axis=0, return mean +def cdf(data, n_samples=50, sampling='linear', log_base=10, decimals=None): + """Get a number of x-values that define a CDF for the input data. + + Parameters + ---------- + data : np.ndarray + 1D array of data to make a CDF for + n_samples : int + Number of points to fit the CDF + sampling : str + Option for quantile sampling (see sampling functions in + ``rex.utilities.bc_utils``), e.g., how to sample the y-axis of the + distribution. "linear" will do even spacing, "log" will concentrate + samples near quantile=0, and "invlog" will concentrate samples near + quantile=1 + log_base : int | float + Log base value if sampling is "log" or "invlog". A higher value will + concentrate more samples at the extreme sides of the distribution. + decimals : int | None + Precision to round output to (see docstring for np.round). None will + not round outputs (default). + + Returns + ------- + x_values : np.ndarray + 1D array of values with shape (n_samples,). Each value is in the same + units as the input data argument. The x_values[0] is the minimum value + of data (0th percentile) and x_values[-1] is the maximum + (100th percentile). The values are spaced in quantile space (y-axis of + the CDF) according to the sampling option (e.g., evenly spaced if + sampling='linear'). + """ + + nan_mask = np.isnan(data) + if nan_mask.all(): + return np.zeros(n_samples) + + sampling = sampling.casefold() + if sampling == 'linear': + quantiles = sample_q_linear(n_samples) + elif sampling == 'log': + quantiles = sample_q_log(n_samples, log_base) + elif sampling == 'invlog': + quantiles = sample_q_invlog(n_samples, log_base) + else: + msg = ('sampling option must be linear, log, or invlog, but received: ' + '{}'.format(sampling)) + logger.error(msg) + raise KeyError(msg) + + x_values = np.interp(quantiles, np.linspace(0, 1, len(data[~nan_mask])), + sorted(data[~nan_mask])) + + msg = (f'First and last x-value points defining the CDF ' + '({x_values[0]}, {x_values[-1]}) ' + f'were not the min and max data values ' + f'({np.nanmin(data)}, {np.nanmin(data)}).') + assert x_values[0] == np.nanmin(data), msg + assert x_values[-1] == np.nanmax(data), msg + + if decimals is not None: + x_values = np.round(x_values, decimals=decimals) + + return x_values + + class TemporalStats: """ Temporal Statistics from Resource Data @@ -372,8 +440,13 @@ def _compute_stats(cls, res_data, statistics, diurnal=False, month=False): columns = column_names[name] s_data = s_data.T s_data.columns = columns - else: + elif not isinstance(s_data, pd.DataFrame): s_data = s_data.to_frame(name=name) + elif isinstance(s_data, pd.DataFrame) and len(s_data) > 1: + # e.g., if func is scipy.stats.beta.fit(), this collapses + # multiple output parameters into list + s_data['name'] = name + s_data = s_data.groupby('name').agg(list).T res_stats.append(s_data) diff --git a/rex/utilities/bc_parse_table.py b/rex/utilities/bc_parse_table.py new file mode 100644 index 00000000..0f65f3df --- /dev/null +++ b/rex/utilities/bc_parse_table.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- +""" +rex bias correction utilities. +""" +import json +import numpy as np +import logging +from warnings import warn +import rex.bias_correction + + +logger = logging.getLogger(__name__) + + +def parse_bc_table(bc_df, gids): + """Parse the bias correction table for required bc functions and kwargs + + Parameters + ---------- + bc_df : pd.DataFrame + DataFrame with wind or solar resource bias correction table. This + must have columns "gid" and "method", where "gid" is the resource + file indices, and "method" is a function name from the + ``rex.bias_correction`` module. Only windspeed or GHI+DNI+DHI are + corrected, depending on the technology. See the + ``rex.bias_correction`` module for more details on available + bias correction methods. + gids : list | np.ndarray + Array of integer gids (spatial indices) from the source h5 file. + This is used to get the correct bias correction parameters from + ``bias_correct`` table based on its ``gid`` column + + Returns + ------- + bc_fun : function + Function from ``rex.bias_correction`` to use. + bc_fun_kwargs : dict + Kwargs from ``bc_df`` to input to ``bc_fun``. This may include extra + kwargs that are not required by ``bc_fun`` and should be cleaned before + passing to the function. + bool_bc : np.ndarray + 1D Boolean array with length equal to the ``gids`` input with ``True`` + where data has available bias correction inputs in ``bc_df`` and + ``False`` where not + """ + + if 'method' not in bc_df: + msg = ('Bias correction table provided, but "method" column not ' + 'found! Only see columns: {}. Need to specify "method" which ' + 'is a function name from `rex.bias_correction`' + .format(list(bc_df.columns))) + logger.error(msg) + raise KeyError(msg) + + if bc_df.index.name != 'gid': + if 'gid' not in bc_df: + msg = ('Bias correction table must have "gid" column but only ' + 'found: {}'.format(list(bc_df.columns))) + logger.error(msg) + raise KeyError(msg) + bc_df = bc_df.set_index('gid') + + gid_arr = np.array(gids) + bool_bc = np.isin(gid_arr, bc_df.index.values) + + if not bool_bc.any(): + return None, {}, bool_bc + + if not bool_bc.all(): + missing = gid_arr[~bool_bc] + msg = ('{} sites were missing from the bias correction table, ' + 'not bias correcting: {}'.format(len(missing), missing)) + logger.warning(msg) + warn(msg) + + fun_name = bc_df['method'].unique() + msg = ('rex bias correction currently only supports a single unique ' + 'bias correction method per chunk of sites but received: {}' + .format(fun_name)) + assert len(fun_name) == 1, msg + bc_fun = getattr(rex.bias_correction, fun_name[0], None) + if bc_fun is None: + avail = [x for x in dir(rex.bias_correction) if not x.startswith('_')] + msg = ('Could not find method name "{}" in ``rex.bias_correction`` ' + 'which has the available objects: {}' + .format(fun_name[0], avail)) + logger.error(msg) + raise KeyError(msg) + + bc_fun_kwargs = {} + for col in bc_df.columns: + + # load serialized lists from string columns in bc_df into nested lists + sample = bc_df[col].values[0] + if isinstance(sample, str) and '[' in sample and ']' in sample: + bc_df.loc[:, col] = bc_df[col].apply(json.loads) + + arr = bc_df.loc[gid_arr[bool_bc], col].values + + # nested lists in bc_df converted to arr of shape (space, N) + if isinstance(arr[0], (list, tuple)): + arr = np.array(arr.tolist()) + + bc_fun_kwargs[col] = arr + + return bc_fun, bc_fun_kwargs, bool_bc diff --git a/rex/utilities/bc_utils.py b/rex/utilities/bc_utils.py new file mode 100644 index 00000000..6cb415ea --- /dev/null +++ b/rex/utilities/bc_utils.py @@ -0,0 +1,300 @@ +# -*- coding: utf-8 -*- +""" +rex bias correction utilities. +""" +import scipy +import numpy as np +import logging + + +logger = logging.getLogger(__name__) + + +def sample_q_linear(n_samples): + """Sample quantiles from 0 to 1 inclusive linearly with even spacing + + Parameters + ---------- + n_samples : int + Number of points to sample between 0 and 1 + + Returns + ------- + quantiles : np.ndarray + 1D array of evenly spaced samples from 0 to 1 + """ + quantiles = np.linspace(0, 1, n_samples) + return quantiles + + +def sample_q_log(n_samples, log_base): + """Sample quantiles from 0 to 1 while concentrating samples near quantile=0 + + Parameters + ---------- + n_samples : int + Number of points to sample between 0 and 1 + log_base : int | float + Log base value. A higher value will concentrate more samples at the + extreme sides of the distribution. + + Returns + ------- + quantiles : np.ndarray + 1D array of log-spaced samples from 0 to 1 + """ + quantiles = np.logspace(0, 1, n_samples, base=log_base) + quantiles = (quantiles - 1) / (log_base - 1) + return quantiles + + +def sample_q_invlog(n_samples, log_base): + """Sample quantiles from 0 to 1 while concentrating samples near quantile=1 + + Parameters + ---------- + n_samples : int + Number of points to sample between 0 and 1 + log_base : int | float + Log base value. A higher value will concentrate more samples at the + extreme sides of the distribution. + + Returns + ------- + quantiles : np.ndarray + 1D array of log-spaced samples from 0 to 1 + """ + quantiles = np.logspace(0, 1, n_samples, base=log_base) + quantiles = (quantiles - 1) / (log_base - 1) + quantiles = np.array(sorted(1 - quantiles)) + return quantiles + + +class QuantileDeltaMapping: + """Class for quantile delta mapping based on the method from + Cannon et al., 2015 + + Note that this is a utility class for implementing QDM and should not be + requested directly as a method in the reV/rex bias correction table input + + Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM + Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in + Quantiles and Extremes? Journal of Climate 28, 6938–6959 (2015). + """ + + def __init__(self, params_oh, params_mh, params_mf, dist='empirical', + relative=True, sampling='linear', log_base=10): + """ + Parameters + ---------- + params_oh : np.ndarray + 2D array of **observed historical** distribution parameters created + from a multi-year set of data where the shape is (space, N). This + can be the output of a parametric distribution fit like + ``scipy.stats.weibull_min.fit()`` where N is the number of + parameters for that distribution, or this can define the x-values + of N points from an empirical CDF that will be linearly + interpolated between. If this is an empirical CDF, this must + include the 0th and 100th percentile values and have even + percentile spacing between values. + params_mh : np.ndarray + Same requirements as params_oh. This input arg is for the **modeled + historical distribution**. + params_mf : np.ndarray | None + Same requirements as params_oh. This input arg is for the **modeled + future distribution**. If this is None, this defaults to params_mh + (no future data, just corrected to modeled historical distribution) + dist : str | np.ndarray + Probability distribution name to use to model the data which + determines how the param args are used. This can "empirical" or any + continuous distribution name from ``scipy.stats``. Can also be a 1D + array of dist inputs if being used from reV, but they must all be + the same option. + relative : bool | np.ndarray + Flag to preserve relative rather than absolute changes in + quantiles. relative=False (default) will multiply by the change in + quantiles while relative=True will add. See Equations 4-6 from + Cannon et al., 2015 for more details. Can also be a 1D array of + dist inputs if being used from reV, but they must all be the same + option. + sampling : str | np.ndarray + If dist="empirical", this is an option for how the quantiles were + sampled to produce the params inputs, e.g., how to sample the + y-axis of the distribution (see sampling functions in + ``rex.utilities.bc_utils``). "linear" will do even spacing, "log" + will concentrate samples near quantile=0, and "invlog" will + concentrate samples near quantile=1. Can also be a 1D array of dist + inputs if being used from reV, but they must all be the same + option. + log_base : int | float | np.ndarray + Log base value if sampling is "log" or "invlog". A higher value + will concentrate more samples at the extreme sides of the + distribution. Can also be a 1D array of dist inputs if being used + from reV, but they must all be the same option. + """ + + self.params_oh = params_oh + self.params_mh = params_mh + self.params_mf = params_mf if params_mf is not None else params_mh + self.relative = bool(self._clean_kwarg(relative)) + self.dist_name = str(self._clean_kwarg(dist)).casefold() + self.sampling = str(self._clean_kwarg(sampling)).casefold() + self.log_base = float(self._clean_kwarg(log_base)) + self.scipy_dist = None + + if self.dist_name != 'empirical': + self.scipy_dist = getattr(scipy.stats, self.dist_name, None) + if self.scipy_dist is None: + msg = ('Could not get requested distribution "{}" from ' + '``scipy.stats``. Please double check your spelling ' + 'and select "empirical" or one of the continuous ' + 'distribution options from here: ' + 'https://docs.scipy.org/doc/scipy/reference/stats.html' + .format(self.dist_name)) + logger.error(msg) + raise KeyError(msg) + + @staticmethod + def _clean_kwarg(inp): + """Clean any kwargs inputs (e.g., dist, relative) that might be + provided as an array and must be collapsed into a single string or + boolean value""" + unique = np.unique(inp) + msg = ('_QuantileDeltaMapping kwargs must have only one unique input ' + 'even if being called with arrays as part of reV but found: {}' + .format(unique)) + assert len(unique) == 1, msg + + while isinstance(inp, np.ndarray): + inp = inp[0] + + return inp + + def _clean_params(self, params, arr_shape): + """Verify and clean 2D parameter arrays for passing into empirical + distribution or scipy continuous distribution functions. + + Parameters + ---------- + params : np.ndarray + Input params shape should be (space, N) where N is the number of + parameters for the distribution. + arr_shape : tuple + Array shape should be (time, space). + + Returns + ------- + params : np.ndarray | list + If a scipy continuous dist is set, this output will be params + unpacked along axis=1 into a list so that the list entries + represent the scipy distribution parameters + (e.g., shape, scale, loc) and each list entry is of shape (space,) + """ + + msg = f'params must be 2D array but received {type(params)}' + assert isinstance(params, np.ndarray), msg + + if len(params.shape) == 1: + params = np.expand_dims(params, 0) + + msg = (f'params must be 2D array of shape ({arr_shape[1]}, N) ' + f'but received shape {params.shape}') + assert len(params.shape) == 2, msg + assert params.shape[0] == arr_shape[1], msg + + if self.scipy_dist is not None: + params = [params[:, i] for i in range(params.shape[1])] + + return params + + def _get_quantiles(self, n_samples): + """If dist='empirical', this will get the quantile values for the CDF + x-values specified in the input params""" + + if self.sampling == 'linear': + quantiles = sample_q_linear(n_samples) + elif self.sampling == 'log': + quantiles = sample_q_log(n_samples, self.log_base) + elif self.sampling == 'invlog': + quantiles = sample_q_invlog(n_samples, self.log_base) + else: + msg = ('sampling option must be linear, log, or invlog, but ' + 'received: {}'.format(self.sampling)) + logger.error(msg) + raise KeyError(msg) + + return quantiles + + def cdf(self, x, params): + """Run the CDF function e.g., convert physical variable to quantile""" + + if self.scipy_dist is None: + p = np.zeros_like(x) + for idx in range(x.shape[1]): + xp = params[idx, :] + fp = self._get_quantiles(len(xp)) + p[:, idx] = np.interp(x[:, idx], xp, fp) + else: + p = self.scipy_dist.cdf(x, *params) + + return p + + def ppf(self, p, params): + """Run the inverse CDF function (percent point function) e.g., convert + quantile to physical variable""" + + if self.scipy_dist is None: + x = np.zeros_like(p) + for idx in range(p.shape[1]): + fp = params[idx, :] + xp = self._get_quantiles(len(fp)) + x[:, idx] = np.interp(p[:, idx], xp, fp) + else: + x = self.scipy_dist.ppf(p, *params) + + return x + + def __call__(self, arr): + """Run the QDM function to bias correct an array + + Parameters + ---------- + arr : np.ndarray + 2D array of values in shape (time, space) + + Returns + ------- + arr : np.ndarray + Bias corrected copy of the input array with same shape. + """ + + if len(arr.shape) == 1: + arr = np.expand_dims(arr, 1) + + params_oh = self._clean_params(self.params_oh, arr.shape) + params_mh = self._clean_params(self.params_mh, arr.shape) + params_mf = self._clean_params(self.params_mf, arr.shape) + + # Equation references are from Section 3 of Cannon et al 2015: + # Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM + # Precipitation by Quantile Mapping: How Well Do Methods Preserve + # Changes in Quantiles and Extremes? Journal of Climate 28, 6938–6959 + # (2015). + + q_mf = self.cdf(arr, params_mf) # Eq.3: Tau_m_p = F_m_p(x_m_p) + x_oh = self.ppf(q_mf, params_oh) # Eq.5: x^_o:m_h:p = F-1_o_h(Tau_m_p) + x_mh_mf = self.ppf(q_mf, params_mh) # Eq.4 denom: F-1_m_h(Tau_m_p) + + if self.relative: + x_mh_mf[x_mh_mf == 0] = 0.001 # arbitrary limit to prevent div 0 + delta = arr / x_mh_mf # Eq.4: x_m_p / F-1_m_h(Tau_m_p) + arr_bc = x_oh * delta # Eq.6: x^_m_p = x^_o:m_h:p * delta + else: + delta = arr - x_mh_mf # Eq.4: x_m_p - F-1_m_h(Tau_m_p) + arr_bc = x_oh + delta # Eq.6: x^_m_p = x^_o:m_h:p + delta + + msg = ('Input shape {} does not match QDM bias corrected output ' + 'shape {}!'.format(arr.shape, arr_bc.shape)) + assert arr.shape == arr_bc.shape, msg + + return arr_bc diff --git a/rex/version.py b/rex/version.py index 091abf93..dd92f834 100644 --- a/rex/version.py +++ b/rex/version.py @@ -1,3 +1,3 @@ """rex Version number""" -__version__ = "0.2.84" +__version__ = "0.2.85" diff --git a/tests/test_sam_resource.py b/tests/test_sam_resource.py index fa757ede..3dd604c9 100644 --- a/tests/test_sam_resource.py +++ b/tests/test_sam_resource.py @@ -2,6 +2,7 @@ """ pytests for sam_resource """ +import json import tempfile import numpy as np import shutil @@ -10,6 +11,7 @@ import pandas as pd from pandas.testing import assert_series_equal import pytest +from scipy.stats import weibull_min from rex.renewable_resource import WindResource, NSRDB, WaveResource from rex.multi_file_resource import MultiFileNSRDB @@ -394,24 +396,41 @@ def test_nsrdb_and_wtk(): _ = res._get_res_df(0) -def execute_pytest(capture='all', flags='-rapP'): - """Execute module as pytest with detailed summary report. +def test_bias_correct_errors(): + """Negative tests for bad bias correction inputs""" + h5 = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_2012.h5') + sites = slice(0, 20) + hub_heights = 80 - Parameters - ---------- - capture : str - Log or stdout/stderr capture option. ex: log (only logger), - all (includes stdout/stderr) - flags : str - Which tests to show logs and results for. - """ + n = 10 + res = WindResource.preload_SAM(h5, sites, hub_heights) - fname = os.path.basename(__file__) - pytest.main(['-q', '--show-capture={}'.format(capture), fname, flags]) + bc = pd.DataFrame({'gid': np.arange(n), + 'adder': np.random.uniform(-1, 1, n), + 'scalar': np.random.uniform(0.9, 1.1, n)}) + with pytest.raises(KeyError) as record: + res.bias_correct(bc) + assert '"method" column not found!' in str(record.value) + bc = pd.DataFrame({'gidasdfasf': np.arange(n), + 'adder': np.random.uniform(-1, 1, n), + 'scalar': np.random.uniform(0.9, 1.1, n), + 'method': 'lin_ws'}) + with pytest.raises(KeyError) as record: + res.bias_correct(bc) + assert 'must have "gid" column' in str(record.value) + + bc = pd.DataFrame({'gid': np.arange(n), + 'adder': np.random.uniform(-1, 1, n), + 'scalar': np.random.uniform(0.9, 1.1, n), + 'method': 'testfasdfasdf'}) + with pytest.raises(KeyError) as record: + res.bias_correct(bc) + assert 'Could not find method name "test' in str(record.value) -def test_bias_correct_wind(): - """Test linear bias correction functionality on windspeed""" + +def test_bias_correct_wind_lin(): + """Test linear bias correction function on windspeed""" h5 = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_2012.h5') sites = slice(0, 20) hub_heights = 80 @@ -420,37 +439,40 @@ def test_bias_correct_wind(): n = 10 bc = pd.DataFrame({'gid': np.arange(n), 'adder': np.random.uniform(-1, 1, n), - 'scalar': np.random.uniform(0.9, 1.1, n)}) + 'scalar': np.random.uniform(0.9, 1.1, n), + 'method': 'lin_ws'}) + + res = WindResource.preload_SAM(h5, sites, hub_heights) with pytest.warns() as record: - res = WindResource.preload_SAM(h5, sites, hub_heights) res.bias_correct(bc) assert len(record) == 1 assert 'missing from the bias correction' in str(record[0].message) assert np.allclose(res._res_arrays['windspeed'][:, 10:], base_res._res_arrays['windspeed'][:, 10:]) - assert not (res._res_arrays['windspeed'][:, :10] == - base_res._res_arrays['windspeed'][:, :10]).any() + assert not (res._res_arrays['windspeed'][:, :10] + == base_res._res_arrays['windspeed'][:, :10]).any() assert (res._res_arrays['windspeed'] >= 0).all() n = 200 bc = pd.DataFrame({'gid': np.arange(n), 'adder': np.random.uniform(-1, 1, n), - 'scalar': np.random.uniform(0.9, 1.1, n)}) + 'scalar': np.random.uniform(0.9, 1.1, n), + 'method': 'lin_ws'}) with pytest.warns(None) as record: res = WindResource.preload_SAM(h5, sites, hub_heights) res.bias_correct(bc) assert not any(record) - assert not (res._res_arrays['windspeed'] == - base_res._res_arrays['windspeed']).any() + assert not (res._res_arrays['windspeed'] + == base_res._res_arrays['windspeed']).any() assert (res._res_arrays['windspeed'] >= 0).all() -def test_bias_correct_solar(): - """Test adder bias correction functionality on irradiance""" +def test_bias_correct_solar_lin(): + """Test adder bias correction function on irradiance""" h5 = os.path.join(TESTDATADIR, 'nsrdb/ri_100_nsrdb_2012.h5') sites = slice(0, 10) base_res = NSRDB.preload_SAM(h5, sites) @@ -458,7 +480,8 @@ def test_bias_correct_solar(): n = 10 bc = pd.DataFrame({'gid': np.arange(n), 'adder': np.random.uniform(-100, 100, n), - 'scalar': np.random.uniform(1, 1, n)}) + 'scalar': np.random.uniform(1, 1, n), + 'method': 'lin_irrad'}) res = NSRDB.preload_SAM(h5, sites) res.bias_correct(bc) @@ -488,5 +511,152 @@ def test_bias_correct_solar(): assert np.allclose(cos_sza, base_cos_sza, atol=0.005) +def test_bias_correct_wind_pqdm(): + """Test parametric QDM bias correction function on windspeed""" + h5 = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_2012.h5') + n = 20 + sites = slice(0, n) + hub_heights = 80 + base_res = WindResource.preload_SAM(h5, sites, hub_heights) + base_ws = base_res._res_arrays['windspeed'] + shape, loc, scale = weibull_min.fit(base_ws.mean(axis=1)) + + bc = pd.DataFrame({'gid': np.arange(n), + 'method': 'qdm_ws', + 'params_oh': f'[{shape}, {loc}, {scale}]', + 'params_mh': f'[{shape}, {loc}, {scale}]', + 'params_mf': f'[{shape}, {loc}, {scale}]', + 'dist': 'weibull_min', + 'relative': True, + }) + + res = WindResource.preload_SAM(h5, sites, hub_heights) + res.bias_correct(bc) + bc_ws = res._res_arrays['windspeed'] + assert np.allclose(bc_ws, base_ws) + + scale_mod = 0.9 + bc['params_mh'] = f'[{shape}, {loc}, {scale * scale_mod}]' + bc = bc.drop('params_mf', axis=1) + res = WindResource.preload_SAM(h5, sites, hub_heights) + res.bias_correct(bc) + bc_ws = res._res_arrays['windspeed'] + + # spot check using manual calc + idys = np.random.choice(base_ws.shape[0], size=10) + idxs = np.random.choice(base_ws.shape[1], size=10) + for idy, idx in zip(idys, idxs): + p_mf = weibull_min.cdf(base_ws[idy, idx], shape, loc, + scale * scale_mod) + x_oh = weibull_min.ppf(p_mf, shape, loc, scale) + delta = base_ws[idy, idx] / weibull_min.ppf(p_mf, shape, loc, + scale * scale_mod) + single_ws_bc = x_oh * delta + assert np.allclose(single_ws_bc, bc_ws[idy, idx]) + + # almost none of the values should be the same as the input, and none + # should be negative + assert ((bc_ws == base_ws).sum() / bc_ws.size) < 0.001 + assert not (bc_ws < 0).any() + + +def test_bias_correct_wind_qdm(): + """Test empirical QDM bias correction function on windspeed""" + h5 = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_2012.h5') + n = 20 + sites = slice(0, n) + hub_heights = 80 + base_res = WindResource.preload_SAM(h5, sites, hub_heights) + base_ws = base_res._res_arrays['windspeed'] + + base_params = list(np.linspace(0, 100, 10)) + + bc = pd.DataFrame({'gid': np.arange(n), + 'method': 'qdm_ws', + 'params_oh': json.dumps(base_params), + 'params_mh': json.dumps(base_params), + 'params_mf': json.dumps(base_params), + 'dist': 'empirical', + 'relative': True, + }) + + res = WindResource.preload_SAM(h5, sites, hub_heights) + res.bias_correct(bc) + bc_ws = res._res_arrays['windspeed'] + assert np.allclose(bc_ws, base_ws) + + res = WindResource.preload_SAM(h5, sites, hub_heights) + params = sorted(np.random.uniform(10, 20, 10)) + bc['params_oh'] = json.dumps(params) + bc.loc[0, 'params_oh'] = json.dumps(base_params) + res.bias_correct(bc) + bc_ws = res._res_arrays['windspeed'] + assert np.allclose(bc_ws[:, 0], base_ws[:, 0]) + assert ((bc_ws[:, 1:] != base_ws[:, 1:]).sum() / bc_ws[:, 1:].size) > 0.99 + + +def test_bias_correct_irrad_qdm(): + """Test empirical QDM bias correction function on irradiance""" + h5 = os.path.join(TESTDATADIR, 'nsrdb/ri_100_nsrdb_2012.h5') + n = 10 + sites = slice(0, n) + base_res = NSRDB.preload_SAM(h5, sites) + base_ghi = base_res._res_arrays['ghi'] + base_dni = base_res._res_arrays['dni'] + + base_params = list(np.linspace(0, 1300, 10)) + bc = pd.DataFrame({'gid': np.arange(n), + 'ghi_params_oh': json.dumps(base_params), + 'dni_params_oh': json.dumps(base_params), + 'ghi_params_mh': json.dumps(base_params), + 'dni_params_mh': json.dumps(base_params), + 'method': 'qdm_irrad'}, + ) + + res = NSRDB.preload_SAM(h5, sites) + res.bias_correct(bc) + assert np.allclose(base_res._res_arrays['ghi'], base_ghi) + assert np.allclose(base_res._res_arrays['dni'], base_dni) + + res = NSRDB.preload_SAM(h5, sites) + params = list(0.1 * np.ones(10)) + bc['ghi_params_oh'] = json.dumps(params) + res.bias_correct(bc) + bc_ghi = res._res_arrays['ghi'] + bc_dni = res._res_arrays['dni'] + bc_dhi = res._res_arrays['dhi'] + assert (bc_ghi >= 0).all() & (bc_dni >= 0).all() & (bc_dhi >= 0).all() + assert (bc_ghi.mean(axis=0) < base_ghi.mean(axis=0)).all() + assert np.allclose(bc_dni, base_dni) + + res = NSRDB.preload_SAM(h5, sites) + params = list(0.1 * np.ones(10)) + bc['ghi_params_oh'] = json.dumps(base_params) + bc['dni_params_oh'] = json.dumps(params) + res.bias_correct(bc) + bc_ghi = res._res_arrays['ghi'] + bc_dni = res._res_arrays['dni'] + bc_dhi = res._res_arrays['dhi'] + assert (bc_ghi >= 0).all() & (bc_dni >= 0).all() & (bc_dhi >= 0).all() + assert (bc_dni.mean(axis=0) < base_dni.mean(axis=0)).all() + assert np.allclose(bc_ghi, base_ghi) + + +def execute_pytest(capture='all', flags='-rapP'): + """Execute module as pytest with detailed summary report. + + Parameters + ---------- + capture : str + Log or stdout/stderr capture option. ex: log (only logger), + all (includes stdout/stderr) + flags : str + Which tests to show logs and results for. + """ + + fname = os.path.basename(__file__) + pytest.main(['-q', '--show-capture={}'.format(capture), fname, flags]) + + if __name__ == '__main__': execute_pytest() diff --git a/tests/test_temporal_stats.py b/tests/test_temporal_stats.py index 670c0345..1354b69e 100644 --- a/tests/test_temporal_stats.py +++ b/tests/test_temporal_stats.py @@ -7,13 +7,13 @@ import os import pandas as pd import pytest -from scipy.stats import mode +import scipy import tempfile import traceback from rex.multi_year_resource import MultiYearWindResource from rex.renewable_resource import WindResource, NSRDB -from rex.temporal_stats.temporal_stats import TemporalStats, circular_mean +from rex.temporal_stats.temporal_stats import TemporalStats, circular_mean, cdf from rex.temporal_stats.temporal_stats_cli import main from rex.utilities.loggers import LOGGERS from rex import TESTDATADIR @@ -39,7 +39,7 @@ def mode_func(arr, axis=0): """ custom mode stats """ - return mode(arr, axis=axis, keepdims=True).mode[0] + return scipy.stats.mode(arr, axis=axis, keepdims=True).mode[0] @pytest.mark.parametrize(("max_workers", "sites"), @@ -185,7 +185,7 @@ def test_custom_stats(max_workers): msg = 'Mins do not match!' assert np.allclose(truth, test_stats['min'].values), msg - truth = mode(res_data, axis=0, keepdims=True).mode[0] + truth = scipy.stats.mode(res_data, axis=0, keepdims=True).mode[0] msg = 'Modes do not match!' assert np.allclose(truth, test_stats['mode'].values), msg @@ -194,7 +194,7 @@ def test_custom_stats(max_workers): msg = 'January mins do not match!' assert np.allclose(truth, test_stats['Jan_min'].values), msg - truth = mode(res_data[mask], axis=0, keepdims=True).mode[0] + truth = scipy.stats.mode(res_data[mask], axis=0, keepdims=True).mode[0] msg = 'January modes do not match!' assert np.allclose(truth, test_stats['Jan_mode'].values), msg @@ -281,6 +281,45 @@ def test_weighted_circular_means(weights): assert np.allclose(truth, test_stats[name].values, rtol=0, atol=0.01), msg +@pytest.mark.parametrize("n_samples", [21, 50]) +def test_cdf_linear(n_samples): + """Test the CDF function which gets evenly spaced values (in quantile + space) defining the empirical CDF of a dataset""" + data = np.random.normal(0, 1, (1000,)) + x = cdf(data, n_samples=n_samples, decimals=None) + + assert x[0] == data.min() + assert x[-1] == data.max() + + quantiles = np.linspace(0, 1, n_samples) + assert quantiles[0] == 0 + assert quantiles[-1] == 1 + + for i, q in enumerate(quantiles): + assert np.allclose(np.percentile(data, 100 * q), x[i]) + + +@pytest.mark.parametrize(("n_samples", "log_base"), [(21, 10), (50, 2)]) +def test_cdf_invlog(n_samples, log_base): + """Test the CDF function which gets inverse-log-spaced spaced values + (in quantile space) defining the empirical CDF of a dataset""" + data = np.random.normal(0, 1, (10000,)) + x = cdf(data, n_samples=n_samples, decimals=None, sampling='invlog', + log_base=log_base) + + assert x[0] == data.min() + assert x[-1] == data.max() + + quantiles = np.logspace(0, 1, n_samples, base=log_base) + quantiles = (quantiles - 1) / (log_base - 1) + quantiles = np.array(sorted(1 - quantiles)) + assert quantiles[0] == 0 + assert quantiles[-1] == 1 + + for i, q in enumerate(quantiles): + assert np.allclose(np.percentile(data, 100 * q), x[i]) + + def test_mask_zeros(): """Test the irradiance stats functionality with zeros masked out""" res_h5 = os.path.join(TESTDATADIR, 'nsrdb/ri_100_nsrdb_2012.h5') @@ -344,6 +383,34 @@ def test_cli(runner): LOGGERS.clear() +def test_scipy_dist_fit(): + """Test rex temporal stats with a scipy distribution fit function""" + scipy_stats = {'weibull': {'func': scipy.stats.weibull_min.fit}} + + res_h5 = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_*.h5') + dataset = 'windspeed_100m' + sites = slice(0, 10) + stats1 = TemporalStats.run(res_h5, dataset, sites=sites, + statistics=scipy_stats, + res_cls=MultiYearWindResource, + max_workers=1) + assert isinstance(stats1.loc[0, 'weibull'], list) + assert len(stats1.loc[0, 'weibull']) == 3 + + scipy_stats = {'weibull': {'func': scipy.stats.weibull_min.fit, + 'kwargs': {'floc': 0}}} + stats2 = TemporalStats.run(res_h5, dataset, sites=sites, + statistics=scipy_stats, + res_cls=MultiYearWindResource, + max_workers=1) + assert isinstance(stats2.loc[0, 'weibull'], list) + assert len(stats2.loc[0, 'weibull']) == 3 + for gid in range(10): + assert np.allclose(stats2.loc[gid, 'weibull'][0], 2.3, atol=1) + assert np.allclose(stats2.loc[gid, 'weibull'][2], 7.9, atol=1) + assert stats2.loc[gid, 'weibull'][1] == 0 + + def execute_pytest(capture='all', flags='-rapP'): """Execute module as pytest with detailed summary report.