From c7ea82e64385497c04a2f4ba8f290a55816cf776 Mon Sep 17 00:00:00 2001
From: grantbuster <grant.buster@nrel.gov>
Date: Wed, 3 Jan 2024 15:51:02 -0700
Subject: [PATCH 01/19] added handling for temporal stats function that outputs
 multiple values with test using scipy.stats.weibull_min.fit()

---
 rex/temporal_stats/temporal_stats.py |  7 +++++-
 tests/test_temporal_stats.py         | 36 ++++++++++++++++++++++++----
 2 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/rex/temporal_stats/temporal_stats.py b/rex/temporal_stats/temporal_stats.py
index 3b17b934..18de50a4 100644
--- a/rex/temporal_stats/temporal_stats.py
+++ b/rex/temporal_stats/temporal_stats.py
@@ -372,8 +372,13 @@ def _compute_stats(cls, res_data, statistics, diurnal=False, month=False):
                     columns = column_names[name]
                     s_data = s_data.T
                     s_data.columns = columns
-                else:
+                elif not isinstance(s_data, pd.DataFrame):
                     s_data = s_data.to_frame(name=name)
+                elif isinstance(s_data, pd.DataFrame) and len(s_data) > 1:
+                    # e.g., if func is scipy.stats.beta.fit(), this collapses
+                    # multiple output parameters into list
+                    s_data['name'] = name
+                    s_data = s_data.groupby('name').agg(list).T
 
             res_stats.append(s_data)
 
diff --git a/tests/test_temporal_stats.py b/tests/test_temporal_stats.py
index 670c0345..d5702d77 100644
--- a/tests/test_temporal_stats.py
+++ b/tests/test_temporal_stats.py
@@ -7,7 +7,7 @@
 import os
 import pandas as pd
 import pytest
-from scipy.stats import mode
+import scipy
 import tempfile
 import traceback
 
@@ -39,7 +39,7 @@ def mode_func(arr, axis=0):
     """
     custom mode stats
     """
-    return mode(arr, axis=axis, keepdims=True).mode[0]
+    return scipy.stats.mode(arr, axis=axis, keepdims=True).mode[0]
 
 
 @pytest.mark.parametrize(("max_workers", "sites"),
@@ -185,7 +185,7 @@ def test_custom_stats(max_workers):
     msg = 'Mins do not match!'
     assert np.allclose(truth, test_stats['min'].values), msg
 
-    truth = mode(res_data, axis=0, keepdims=True).mode[0]
+    truth = scipy.stats.mode(res_data, axis=0, keepdims=True).mode[0]
     msg = 'Modes do not match!'
     assert np.allclose(truth, test_stats['mode'].values), msg
 
@@ -194,7 +194,7 @@ def test_custom_stats(max_workers):
     msg = 'January mins do not match!'
     assert np.allclose(truth, test_stats['Jan_min'].values), msg
 
-    truth = mode(res_data[mask], axis=0, keepdims=True).mode[0]
+    truth = scipy.stats.mode(res_data[mask], axis=0, keepdims=True).mode[0]
     msg = 'January modes do not match!'
     assert np.allclose(truth, test_stats['Jan_mode'].values), msg
 
@@ -344,6 +344,34 @@ def test_cli(runner):
     LOGGERS.clear()
 
 
+def test_scipy_dist_fit():
+    """Test rex temporal stats with a scipy distribution fit function"""
+    scipy_stats = {'weibull': {'func': scipy.stats.weibull_min.fit}}
+
+    res_h5 = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_*.h5')
+    dataset = 'windspeed_100m'
+    sites = slice(0, 10)
+    stats1 = TemporalStats.run(res_h5, dataset, sites=sites,
+                               statistics=scipy_stats,
+                               res_cls=MultiYearWindResource,
+                               max_workers=1)
+    assert isinstance(stats1.loc[0, 'weibull'], list)
+    assert len(stats1.loc[0, 'weibull']) == 3
+
+    scipy_stats = {'weibull': {'func': scipy.stats.weibull_min.fit,
+                               'kwargs': {'floc': 0}}}
+    stats2 = TemporalStats.run(res_h5, dataset, sites=sites,
+                               statistics=scipy_stats,
+                               res_cls=MultiYearWindResource,
+                               max_workers=1)
+    assert isinstance(stats2.loc[0, 'weibull'], list)
+    assert len(stats2.loc[0, 'weibull']) == 3
+    for gid in range(10):
+        assert np.allclose(stats2.loc[gid, 'weibull'][0], 2.3, atol=1)
+        assert np.allclose(stats2.loc[gid, 'weibull'][2], 7.9, atol=1)
+        assert stats2.loc[gid, 'weibull'][1] == 0
+
+
 def execute_pytest(capture='all', flags='-rapP'):
     """Execute module as pytest with detailed summary report.
 

From 083f17a6c4597702760fd934d661f4b09a7c4da3 Mon Sep 17 00:00:00 2001
From: grantbuster <grant.buster@nrel.gov>
Date: Wed, 3 Jan 2024 16:26:41 -0700
Subject: [PATCH 02/19] started new bias_correction file and dynamically linked
 up functions with the sam_resource bias_correct() method

---
 rex/bias_correction.py     | 88 ++++++++++++++++++++++++++++++++++++++
 rex/sam_resource.py        | 88 +++++++++++++++++++++-----------------
 tests/test_sam_resource.py | 41 ++++++++++--------
 3 files changed, 159 insertions(+), 58 deletions(-)
 create mode 100644 rex/bias_correction.py

diff --git a/rex/bias_correction.py b/rex/bias_correction.py
new file mode 100644
index 00000000..b6de7ac8
--- /dev/null
+++ b/rex/bias_correction.py
@@ -0,0 +1,88 @@
+# -*- coding: utf-8 -*-
+"""
+Module to perform bias correction of renewable energy resource data
+"""
+import numpy as np
+import logging
+
+
+logger = logging.getLogger(__name__)
+
+
+def lin_irrad(ghi, dni, dhi, scalar, adder):
+    """Correct GHI and DNI using linear correction factors. Both irradiance
+    variables are corrected as `(irradiance*scalar)+adder`. DHI is preserved
+    based on the relationship `dhi = ghi - (dni * cos(sza))`. Times when GHI
+    and DNI are zero are preserved and negative values are protected against.
+
+    Parameters
+    ----------
+    ghi : np.ndarray
+        2D array of global horizontal irradiance values in shape (time, space)
+    dni : np.ndarray
+        2D array of direct normal irradiance values in shape (time, space)
+    dhi : np.ndarray
+        2D array of diffuse horizontal irradiance values in shape (time, space)
+    scalar : np.ndarray
+        1D array of linear scalar values in the shape (space,)
+    adder : np.ndarray
+        1D array of linear adder values in the shape (space,)
+
+    Returns
+    -------
+    ghi : np.ndarray
+        2D array of global horizontal irradiance values in shape (time, space)
+    dni : np.ndarray
+        2D array of direct normal irradiance values in shape (time, space)
+    dhi : np.ndarray
+        2D array of diffuse horizontal irradiance values in shape (time, space)
+    """
+
+    ghi_zeros = ghi == 0
+    dni_zeros = dni == 0
+    dhi_zeros = dhi == 0
+    with np.errstate(divide='ignore', invalid='ignore'):
+        cos_sza = (ghi - dhi) / dni
+
+    ghi = ghi * scalar + adder
+    dni = dni * scalar + adder
+
+    ghi = np.maximum(0, ghi)
+    dni = np.maximum(0, dni)
+
+    ghi[ghi_zeros] = 0
+    dni[dni_zeros] = 0
+
+    with np.errstate(divide='ignore', invalid='ignore'):
+        dhi = ghi - (dni * cos_sza)
+
+    dhi[dni_zeros] = ghi[dni_zeros]
+    dhi = np.maximum(0, dhi)
+    dhi[dhi_zeros] = 0
+
+    assert not np.isnan(dhi).any()
+
+    return ghi, dni, dhi
+
+
+def lin_ws(ws, scalar, adder):
+    """Correct windspeed using linear correction factors. Windspeed is
+    corrected as `(windspeed*scalar)+adder` with a minimum of zero.
+
+    Parameters
+    ----------
+    ws : np.ndarray
+        2D array of windspeed values in shape (time, space)
+    scalar : np.ndarray
+        1D array of linear scalar values in the shape (space,)
+    adder : np.ndarray
+        1D array of linear adder values in the shape (space,)
+
+    Returns
+    -------
+    ws : np.ndarray
+        2D array of windspeed values in shape (time, space)
+    """
+    ws = ws * scalar + adder
+    ws = np.maximum(ws, 0)
+    return ws
diff --git a/rex/sam_resource.py b/rex/sam_resource.py
index ff59c786..69a4931c 100644
--- a/rex/sam_resource.py
+++ b/rex/sam_resource.py
@@ -3,11 +3,13 @@
 Module to handle SAM Resource iterator to create site by site resource
 DataFrames
 """
+from inspect import signature
 import numpy as np
 import pandas as pd
 from warnings import warn
 import logging
 
+import rex.bias_correction
 from rex.utilities.exceptions import (ResourceKeyError, ResourceRuntimeError,
                                       ResourceValueError, SAMInputWarning)
 from rex.utilities.parse_keys import parse_keys
@@ -709,21 +711,19 @@ def bias_correct(self, bc_df):
         ----------
         bc_df : pd.DataFrame
             DataFrame with wind or solar resource bias correction table. This
-            has columns: gid, adder, scalar. If either adder or scalar is not
-            present, scalar defaults to 1 and adder to 0. Only windspeed or
-            GHI+DNI are corrected depending on the technology. GHI and DNI are
-            corrected with the same correction factors.
+            must have columns "gid" and "method", where "gid" is the resource
+            file indices, and "method" is a function name from the
+            `rex.bias_correction` module. Only windspeed or GHI+DNI are
+            corrected depending on the technology. GHI and DNI are corrected
+            with the same correction factors.
         """
 
-        if 'adder' not in bc_df:
-            logger.info('Bias correction table provided, but "adder" not '
-                        'found, defaulting to 0.')
-            bc_df['adder'] = 0
-
-        if 'scalar' not in bc_df:
-            logger.info('Bias correction table provided, but "scalar" not '
-                        'found, defaulting to 1.')
-            bc_df['scalar'] = 1
+        if 'method' not in bc_df:
+            msg = ('Bias correction table provided, but "method" not '
+                   'found! Need to specify "method" as a function name '
+                   'from `rex.bias_correction`')
+            logger.error(msg)
+            raise KeyError(msg)
 
         if bc_df.index.name != 'gid':
             msg = ('Bias correction table must have "gid" column but only '
@@ -740,42 +740,52 @@ def bias_correct(self, bc_df):
             logger.warning(msg)
             warn(msg)
 
-        scalar = np.expand_dims(bc_df.loc[site_arr[isin], 'scalar'].values, 0)
-        adder = np.expand_dims(bc_df.loc[site_arr[isin], 'adder'].values, 0)
+        fun_name = bc_df['method'].unique()
+        msg = ('rex bias correction currently only supports a single unique '
+               'bias correction method per chunk of sites but received: {}'
+               .format(fun_name))
+        assert len(fun_name) == 1, msg
+        bc_fun = getattr(rex.bias_correction, fun_name[0])
+
+        bc_fun_kwargs = {}
+        for col in bc_df.columns:
+            arr = bc_df.loc[site_arr[isin], col].values
+            bc_fun_kwargs[col] = np.expand_dims(arr, 0)
 
         if 'ghi' in self._res_arrays and 'dni' in self._res_arrays:
+            logger.debug('Bias correcting irradiance with function {} '
+                         'for sites {}'.format(bc_fun, self.sites))
             ghi = self._res_arrays['ghi']
             dni = self._res_arrays['dni']
             dhi = self._res_arrays['dhi']
-            ghi_zeros = ghi == 0
-            dni_zeros = dni == 0
-            dhi_zeros = dhi == 0
-            with np.errstate(divide='ignore', invalid='ignore'):
-                cos_sza = (ghi - dhi) / dni
-
-            ghi[:, isin] = ghi[:, isin] * scalar + adder
-            dni[:, isin] = dni[:, isin] * scalar + adder
-            ghi = np.maximum(0, ghi)
-            dni = np.maximum(0, dni)
-            ghi[ghi_zeros] = 0
-            dni[dni_zeros] = 0
-            with np.errstate(divide='ignore', invalid='ignore'):
-                dhi[:, isin] = ghi[:, isin] - (dni[:, isin] * cos_sza[:, isin])
-
-            dhi[dni_zeros] = ghi[dni_zeros]
-            dhi = np.maximum(0, dhi)
-            dhi[dhi_zeros] = 0
-            assert not np.isnan(dhi).any()
+
+            bc_fun_kwargs['ghi'] = ghi[:, isin]
+            bc_fun_kwargs['dni'] = dni[:, isin]
+            bc_fun_kwargs['dhi'] = dhi[:, isin]
+
+            sig = signature(bc_fun)
+            bc_fun_kwargs = {k: v for k, v in bc_fun_kwargs.items()
+                             if k in sig.parameters}
+            out = bc_fun(**bc_fun_kwargs)
+
+            ghi[:, isin] = out[0][:, isin]
+            dni[:, isin] = out[1][:, isin]
+            dhi[:, isin] = out[2][:, isin]
+
             self._res_arrays['ghi'] = ghi
             self._res_arrays['dni'] = dni
             self._res_arrays['dhi'] = dhi
 
         elif 'windspeed' in self._res_arrays:
-            logger.debug('Bias correcting windspeeds.')
-            arr = self._res_arrays['windspeed']
-            arr[:, isin] = arr[:, isin] * scalar + adder
-            arr = np.maximum(arr, 0)
-            self._res_arrays['windspeed'] = arr
+            logger.debug('Bias correcting windspeed with function {} '
+                         'for sites {}'.format(bc_fun, self.sites))
+            ws = self._res_arrays['windspeed']
+            bc_fun_kwargs['ws'] = ws[:, isin]
+            sig = signature(bc_fun)
+            bc_fun_kwargs = {k: v for k, v in bc_fun_kwargs.items()
+                             if k in sig.parameters}
+            ws[:, isin] = bc_fun(**bc_fun_kwargs)
+            self._res_arrays['windspeed'] = ws
 
         if self._mean_arrays is not None:
             # pylint: disable=consider-iterating-dictionary
diff --git a/tests/test_sam_resource.py b/tests/test_sam_resource.py
index fa757ede..b814edb5 100644
--- a/tests/test_sam_resource.py
+++ b/tests/test_sam_resource.py
@@ -394,22 +394,6 @@ def test_nsrdb_and_wtk():
         _ = res._get_res_df(0)
 
 
-def execute_pytest(capture='all', flags='-rapP'):
-    """Execute module as pytest with detailed summary report.
-
-    Parameters
-    ----------
-    capture : str
-        Log or stdout/stderr capture option. ex: log (only logger),
-        all (includes stdout/stderr)
-    flags : str
-        Which tests to show logs and results for.
-    """
-
-    fname = os.path.basename(__file__)
-    pytest.main(['-q', '--show-capture={}'.format(capture), fname, flags])
-
-
 def test_bias_correct_wind():
     """Test linear bias correction functionality on windspeed"""
     h5 = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_2012.h5')
@@ -420,7 +404,8 @@ def test_bias_correct_wind():
     n = 10
     bc = pd.DataFrame({'gid': np.arange(n),
                        'adder': np.random.uniform(-1, 1, n),
-                       'scalar': np.random.uniform(0.9, 1.1, n)})
+                       'scalar': np.random.uniform(0.9, 1.1, n),
+                       'method': 'lin_ws'})
 
     with pytest.warns() as record:
         res = WindResource.preload_SAM(h5, sites, hub_heights)
@@ -437,7 +422,8 @@ def test_bias_correct_wind():
     n = 200
     bc = pd.DataFrame({'gid': np.arange(n),
                        'adder': np.random.uniform(-1, 1, n),
-                       'scalar': np.random.uniform(0.9, 1.1, n)})
+                       'scalar': np.random.uniform(0.9, 1.1, n),
+                       'method': 'lin_ws'})
 
     with pytest.warns(None) as record:
         res = WindResource.preload_SAM(h5, sites, hub_heights)
@@ -458,7 +444,8 @@ def test_bias_correct_solar():
     n = 10
     bc = pd.DataFrame({'gid': np.arange(n),
                        'adder': np.random.uniform(-100, 100, n),
-                       'scalar': np.random.uniform(1, 1, n)})
+                       'scalar': np.random.uniform(1, 1, n),
+                       'method': 'lin_irrad'})
 
     res = NSRDB.preload_SAM(h5, sites)
     res.bias_correct(bc)
@@ -488,5 +475,21 @@ def test_bias_correct_solar():
         assert np.allclose(cos_sza, base_cos_sza, atol=0.005)
 
 
+def execute_pytest(capture='all', flags='-rapP'):
+    """Execute module as pytest with detailed summary report.
+
+    Parameters
+    ----------
+    capture : str
+        Log or stdout/stderr capture option. ex: log (only logger),
+        all (includes stdout/stderr)
+    flags : str
+        Which tests to show logs and results for.
+    """
+
+    fname = os.path.basename(__file__)
+    pytest.main(['-q', '--show-capture={}'.format(capture), fname, flags])
+
+
 if __name__ == '__main__':
     execute_pytest()

From 5a2711d17bfcb7d62fc941552cd30ab1434a648b Mon Sep 17 00:00:00 2001
From: grantbuster <grant.buster@nrel.gov>
Date: Thu, 4 Jan 2024 10:44:41 -0700
Subject: [PATCH 03/19] moved bias correct table and function parsing to
 separate module for use in reV

---
 rex/bias_correction.py    | 13 +++---
 rex/sam_resource.py       | 59 +++++++-------------------
 rex/utilities/bc_utils.py | 87 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 108 insertions(+), 51 deletions(-)
 create mode 100644 rex/utilities/bc_utils.py

diff --git a/rex/bias_correction.py b/rex/bias_correction.py
index b6de7ac8..854577ff 100644
--- a/rex/bias_correction.py
+++ b/rex/bias_correction.py
@@ -9,11 +9,12 @@
 logger = logging.getLogger(__name__)
 
 
-def lin_irrad(ghi, dni, dhi, scalar, adder):
+def lin_irrad(ghi, dni, dhi, scalar=1, adder=0):
     """Correct GHI and DNI using linear correction factors. Both irradiance
-    variables are corrected as `(irradiance*scalar)+adder`. DHI is preserved
-    based on the relationship `dhi = ghi - (dni * cos(sza))`. Times when GHI
-    and DNI are zero are preserved and negative values are protected against.
+    variables are corrected as ``irradiance * scalar + adder``. DHI is
+    preserved based on the relationship ``dhi = ghi - (dni * cos(sza))``. Times
+    when GHI and DNI are zero are preserved and negative values are protected
+    against.
 
     Parameters
     ----------
@@ -65,9 +66,9 @@ def lin_irrad(ghi, dni, dhi, scalar, adder):
     return ghi, dni, dhi
 
 
-def lin_ws(ws, scalar, adder):
+def lin_ws(ws, scalar=1, adder=0):
     """Correct windspeed using linear correction factors. Windspeed is
-    corrected as `(windspeed*scalar)+adder` with a minimum of zero.
+    corrected as ``windspeed * scalar + adder`` with a minimum of zero.
 
     Parameters
     ----------
diff --git a/rex/sam_resource.py b/rex/sam_resource.py
index 69a4931c..a1119c5f 100644
--- a/rex/sam_resource.py
+++ b/rex/sam_resource.py
@@ -9,7 +9,7 @@
 from warnings import warn
 import logging
 
-import rex.bias_correction
+from rex.utilities.bc_utils import _parse_bc_table
 from rex.utilities.exceptions import (ResourceKeyError, ResourceRuntimeError,
                                       ResourceValueError, SAMInputWarning)
 from rex.utilities.parse_keys import parse_keys
@@ -713,44 +713,13 @@ def bias_correct(self, bc_df):
             DataFrame with wind or solar resource bias correction table. This
             must have columns "gid" and "method", where "gid" is the resource
             file indices, and "method" is a function name from the
-            `rex.bias_correction` module. Only windspeed or GHI+DNI are
-            corrected depending on the technology. GHI and DNI are corrected
-            with the same correction factors.
+            ``rex.bias_correction`` module. Only windspeed or GHI+DNI+DHI are
+            corrected, depending on the technology. See the
+            ``rex.bias_correction`` module for more details on available
+            bias correction methods.
         """
 
-        if 'method' not in bc_df:
-            msg = ('Bias correction table provided, but "method" not '
-                   'found! Need to specify "method" as a function name '
-                   'from `rex.bias_correction`')
-            logger.error(msg)
-            raise KeyError(msg)
-
-        if bc_df.index.name != 'gid':
-            msg = ('Bias correction table must have "gid" column but only '
-                   'found: {}'.format(list(bc_df.columns)))
-            assert 'gid' in bc_df, msg
-            bc_df = bc_df.set_index('gid')
-
-        site_arr = np.array(self.sites)
-        isin = np.isin(self.sites, bc_df.index.values)
-        if not isin.all():
-            missing = site_arr[~isin]
-            msg = ('{} sites were missing from the bias correction table, '
-                   'not bias correcting: {}'.format(len(missing), missing))
-            logger.warning(msg)
-            warn(msg)
-
-        fun_name = bc_df['method'].unique()
-        msg = ('rex bias correction currently only supports a single unique '
-               'bias correction method per chunk of sites but received: {}'
-               .format(fun_name))
-        assert len(fun_name) == 1, msg
-        bc_fun = getattr(rex.bias_correction, fun_name[0])
-
-        bc_fun_kwargs = {}
-        for col in bc_df.columns:
-            arr = bc_df.loc[site_arr[isin], col].values
-            bc_fun_kwargs[col] = np.expand_dims(arr, 0)
+        bc_fun, bc_fun_kwargs, bool_bc = _parse_bc_table(bc_df, self.sites)
 
         if 'ghi' in self._res_arrays and 'dni' in self._res_arrays:
             logger.debug('Bias correcting irradiance with function {} '
@@ -759,18 +728,18 @@ def bias_correct(self, bc_df):
             dni = self._res_arrays['dni']
             dhi = self._res_arrays['dhi']
 
-            bc_fun_kwargs['ghi'] = ghi[:, isin]
-            bc_fun_kwargs['dni'] = dni[:, isin]
-            bc_fun_kwargs['dhi'] = dhi[:, isin]
+            bc_fun_kwargs['ghi'] = ghi[:, bool_bc]
+            bc_fun_kwargs['dni'] = dni[:, bool_bc]
+            bc_fun_kwargs['dhi'] = dhi[:, bool_bc]
 
             sig = signature(bc_fun)
             bc_fun_kwargs = {k: v for k, v in bc_fun_kwargs.items()
                              if k in sig.parameters}
             out = bc_fun(**bc_fun_kwargs)
 
-            ghi[:, isin] = out[0][:, isin]
-            dni[:, isin] = out[1][:, isin]
-            dhi[:, isin] = out[2][:, isin]
+            ghi[:, bool_bc] = out[0][:, bool_bc]
+            dni[:, bool_bc] = out[1][:, bool_bc]
+            dhi[:, bool_bc] = out[2][:, bool_bc]
 
             self._res_arrays['ghi'] = ghi
             self._res_arrays['dni'] = dni
@@ -780,11 +749,11 @@ def bias_correct(self, bc_df):
             logger.debug('Bias correcting windspeed with function {} '
                          'for sites {}'.format(bc_fun, self.sites))
             ws = self._res_arrays['windspeed']
-            bc_fun_kwargs['ws'] = ws[:, isin]
+            bc_fun_kwargs['ws'] = ws[:, bool_bc]
             sig = signature(bc_fun)
             bc_fun_kwargs = {k: v for k, v in bc_fun_kwargs.items()
                              if k in sig.parameters}
-            ws[:, isin] = bc_fun(**bc_fun_kwargs)
+            ws[:, bool_bc] = bc_fun(**bc_fun_kwargs)
             self._res_arrays['windspeed'] = ws
 
         if self._mean_arrays is not None:
diff --git a/rex/utilities/bc_utils.py b/rex/utilities/bc_utils.py
new file mode 100644
index 00000000..2fc0b331
--- /dev/null
+++ b/rex/utilities/bc_utils.py
@@ -0,0 +1,87 @@
+# -*- coding: utf-8 -*-
+"""
+rex bias correction utilities.
+"""
+import numpy as np
+import logging
+from warnings import warn
+import rex.bias_correction
+
+
+logger = logging.getLogger(__name__)
+
+
+def _parse_bc_table(bc_df, gids):
+    """Parse the bias correction table for required bc functions and kwargs
+
+    Parameters
+    ----------
+    bc_df : pd.DataFrame
+        DataFrame with wind or solar resource bias correction table. This
+        must have columns "gid" and "method", where "gid" is the resource
+        file indices, and "method" is a function name from the
+        ``rex.bias_correction`` module. Only windspeed or GHI+DNI+DHI are
+        corrected, depending on the technology. See the
+        ``rex.bias_correction`` module for more details on available
+        bias correction methods.
+    gids : list | np.ndarray
+        Array of integer gids (spatial indices) from the source h5 file.
+        This is used to get the correct bias correction parameters from
+        ``bias_correct`` table based on its ``gid`` column
+
+    Returns
+    -------
+    bc_fun : function
+        Function from ``rex.bias_correction`` to use.
+    bc_fun_kwargs : dict
+        Kwargs from ``bc_df`` to input to ``bc_fun``. This may include extra
+        kwargs that are not required by ``bc_fun`` and should be cleaned before
+        passing to the function.
+    bool_bc : np.ndarray
+        1D Boolean array with length equal to the ``gids`` input with ``True``
+        where data has available bias correction inputs in ``bc_df`` and
+        ``False`` where not
+    """
+
+    if 'method' not in bc_df:
+        msg = ('Bias correction table provided, but "method" not '
+               'found! Need to specify "method" as a function name '
+               'from `rex.bias_correction`')
+        logger.error(msg)
+        raise KeyError(msg)
+
+    if bc_df.index.name != 'gid':
+        msg = ('Bias correction table must have "gid" column but only '
+               'found: {}'.format(list(bc_df.columns)))
+        assert 'gid' in bc_df, msg
+        bc_df = bc_df.set_index('gid')
+
+    gid_arr = np.array(gids)
+    bool_bc = np.isin(gid_arr, bc_df.index.values)
+    if not bool_bc.all():
+        missing = gid_arr[~bool_bc]
+        msg = ('{} sites were missing from the bias correction table, '
+               'not bias correcting: {}'.format(len(missing), missing))
+        logger.warning(msg)
+        warn(msg)
+
+    fun_name = bc_df['method'].unique()
+    msg = ('rex bias correction currently only supports a single unique '
+           'bias correction method per chunk of sites but received: {}'
+           .format(fun_name))
+    assert len(fun_name) == 1, msg
+    bc_fun = getattr(rex.bias_correction, fun_name[0], None)
+    if bc_fun is None:
+        avail = [x for x in dir(rex.bias_correction) if not x.startswith('_')]
+        msg = ('Could not find method name "{}" in ``rex.bias_correction`` '
+               'which has the available objects: {}'
+               .format(fun_name[0], avail))
+        logger.error(msg)
+        raise KeyError(msg)
+
+    bc_fun_kwargs = {}
+    for col in bc_df.columns:
+        arr = bc_df.loc[gid_arr[bool_bc], col].values
+        bc_fun_kwargs[col] = np.expand_dims(arr, 0)
+
+    return bc_fun, bc_fun_kwargs, bool_bc

From 29ea3570c65d29ff99857d21ed8c9e99db2be714 Mon Sep 17 00:00:00 2001
From: grantbuster <grant.buster@nrel.gov>
Date: Thu, 4 Jan 2024 10:57:55 -0700
Subject: [PATCH 04/19] negative tests for bad bc inputs

---
 rex/utilities/bc_utils.py  | 16 ++++++++------
 tests/test_sam_resource.py | 44 +++++++++++++++++++++++++++++++++-----
 2 files changed, 48 insertions(+), 12 deletions(-)

diff --git a/rex/utilities/bc_utils.py b/rex/utilities/bc_utils.py
index 2fc0b331..184abe65 100644
--- a/rex/utilities/bc_utils.py
+++ b/rex/utilities/bc_utils.py
@@ -44,16 +44,18 @@ def _parse_bc_table(bc_df, gids):
     """
 
     if 'method' not in bc_df:
-        msg = ('Bias correction table provided, but "method" not '
-               'found! Need to specify "method" as a function name '
-               'from `rex.bias_correction`')
-        logger.error(msg)
+        msg = ('Bias correction table provided, but "method" column not '
+               'found! Only see columns: {}. Need to specify "method" which '
+               'is a function name from `rex.bias_correction`'
+               .format(list(bc_df.columns)))
         raise KeyError(msg)
 
     if bc_df.index.name != 'gid':
-        msg = ('Bias correction table must have "gid" column but only '
-               'found: {}'.format(list(bc_df.columns)))
-        assert 'gid' in bc_df, msg
+        if 'gid' not in bc_df:
+            msg = ('Bias correction table must have "gid" column but only '
+                   'found: {}'.format(list(bc_df.columns)))
+            logger.error(msg)
+            raise KeyError(msg)
         bc_df = bc_df.set_index('gid')
 
     gid_arr = np.array(gids)
diff --git a/tests/test_sam_resource.py b/tests/test_sam_resource.py
index b814edb5..aa75290a 100644
--- a/tests/test_sam_resource.py
+++ b/tests/test_sam_resource.py
@@ -394,6 +394,39 @@ def test_nsrdb_and_wtk():
         _ = res._get_res_df(0)
 
 
+def test_bias_correct_errors():
+    """Negative tests for bad bias correction inputs"""
+    h5 = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_2012.h5')
+    sites = slice(0, 20)
+    hub_heights = 80
+
+    n = 10
+    res = WindResource.preload_SAM(h5, sites, hub_heights)
+
+    bc = pd.DataFrame({'gid': np.arange(n),
+                       'adder': np.random.uniform(-1, 1, n),
+                       'scalar': np.random.uniform(0.9, 1.1, n)})
+    with pytest.raises(KeyError) as record:
+        res.bias_correct(bc)
+    assert '"method" column not found!' in str(record.value)
+
+    bc = pd.DataFrame({'gidasdfasf': np.arange(n),
+                       'adder': np.random.uniform(-1, 1, n),
+                       'scalar': np.random.uniform(0.9, 1.1, n),
+                       'method': 'lin_ws'})
+    with pytest.raises(KeyError) as record:
+        res.bias_correct(bc)
+    assert 'must have "gid" column' in str(record.value)
+
+    bc = pd.DataFrame({'gid': np.arange(n),
+                       'adder': np.random.uniform(-1, 1, n),
+                       'scalar': np.random.uniform(0.9, 1.1, n),
+                       'method': 'testfasdfasdf'})
+    with pytest.raises(KeyError) as record:
+        res.bias_correct(bc)
+    assert 'Could not find method name "test' in str(record.value)
+
+
 def test_bias_correct_wind():
     """Test linear bias correction functionality on windspeed"""
     h5 = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_2012.h5')
@@ -407,16 +440,17 @@ def test_bias_correct_wind():
                        'scalar': np.random.uniform(0.9, 1.1, n),
                        'method': 'lin_ws'})
 
+    res = WindResource.preload_SAM(h5, sites, hub_heights)
+
     with pytest.warns() as record:
-        res = WindResource.preload_SAM(h5, sites, hub_heights)
         res.bias_correct(bc)
 
         assert len(record) == 1
         assert 'missing from the bias correction' in str(record[0].message)
         assert np.allclose(res._res_arrays['windspeed'][:, 10:],
                            base_res._res_arrays['windspeed'][:, 10:])
-        assert not (res._res_arrays['windspeed'][:, :10] ==
-                    base_res._res_arrays['windspeed'][:, :10]).any()
+        assert not (res._res_arrays['windspeed'][:, :10]
+                    == base_res._res_arrays['windspeed'][:, :10]).any()
         assert (res._res_arrays['windspeed'] >= 0).all()
 
     n = 200
@@ -430,8 +464,8 @@ def test_bias_correct_wind():
         res.bias_correct(bc)
 
         assert not any(record)
-        assert not (res._res_arrays['windspeed'] ==
-                    base_res._res_arrays['windspeed']).any()
+        assert not (res._res_arrays['windspeed']
+                    == base_res._res_arrays['windspeed']).any()
         assert (res._res_arrays['windspeed'] >= 0).all()
 
 

From 13e8a4607b21059d535fe05b85c86200c8ad713c Mon Sep 17 00:00:00 2001
From: grantbuster <grant.buster@nrel.gov>
Date: Thu, 4 Jan 2024 14:42:04 -0700
Subject: [PATCH 05/19] started parametric quantile delta mapping bias
 correction method

---
 rex/bias_correction.py     | 150 +++++++++++++++++++++++++++++++++++++
 rex/utilities/bc_utils.py  |  14 +++-
 tests/test_sam_resource.py |  30 +++++++-
 3 files changed, 189 insertions(+), 5 deletions(-)

diff --git a/rex/bias_correction.py b/rex/bias_correction.py
index 854577ff..cd00f04f 100644
--- a/rex/bias_correction.py
+++ b/rex/bias_correction.py
@@ -2,6 +2,7 @@
 """
 Module to perform bias correction of renewable energy resource data
 """
+import scipy
 import numpy as np
 import logging
 
@@ -87,3 +88,152 @@ def lin_ws(ws, scalar=1, adder=0):
     ws = ws * scalar + adder
     ws = np.maximum(ws, 0)
     return ws
+
+
+class PQDM:
+    """Correct windspeed using parametric quantile delta mapping based on a
+    parametric implementation of the method from Cannon et al., 2015
+
+    Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM
+    Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in
+    Quantiles and Extremes? Journal of Climate 28, 6938–6959 (2015).
+    """
+
+    def __init__(self, params_oh, params_mh, params_mf, dist, relative):
+        """
+        Parameters
+        ----------
+        ws : np.ndarray
+            2D array of windspeed values in shape (time, space)
+        params_oh : np.ndarray | list
+            2D array of probability distribution parameters created using a
+            function like ``scipy.stats.weibull_min.fit()`` where the shape is
+            (space, N) with N being the number of parameters required by the
+            specified distribution. This input arg is for the **observed
+            historical distribution**.
+        params_mh : np.ndarray | list
+            Same requirements as params_oh. This input arg is for the **modeled
+            historical distribution**.
+        params_mf : np.ndarray | list | None
+            Same requirements as params_oh. This input arg is for the **modeled
+            future distribution**. If this is None, this defaults to params_mh
+            (no future data).
+        dist : str | np.ndarray
+            Parametric probability distribution name to use to model the
+            windspeed. This can be any distribution name from ``scipy.stats``,
+            but "weibull_min" is a common choice for windspeed distributions.
+            Can also be an array of dist inputs if being used from reV, but
+            they must all be the same option.
+        relative : bool | np.ndarray
+            Flag to preserve relative rather than absolute changes in
+            quantiles. relative=False (default) will multiply by the change in
+            quantiles while relative=True will add. See Equations 4-6 from
+            Cannon et al., 2015 for more details. Can also be an array of dist
+            inputs if being used from reV, but they must all be the same
+            option.
+        """
+        self.params_oh = params_oh
+        self.params_mh = params_mh
+        self.params_mf = params_mf
+        self.dist = self._clean_kwarg(dist)
+        self.relative = self._clean_kwarg(relative)
+
+        self.dist_obj = getattr(scipy.stats, self.dist, None)
+        if self.dist_obj is None:
+            msg = ('Could not get requested distribution "{}" from '
+                   '``scipy.stats``. Please double check your spelling and '
+                   'select one of the options from here: '
+                   'https://docs.scipy.org/doc/scipy/reference/stats.html'
+                   .format(self.dist))
+            logger.error(msg)
+            raise KeyError(msg)
+
+    @staticmethod
+    def _clean_kwarg(inp):
+        unique = np.unique(inp)
+        msg = ('PQDM kwargs must have only one unique input even if being '
+               'called with arrays as part of reV but found: {}'
+               .format(unique))
+        assert len(unique) == 1, msg
+
+        while isinstance(inp, np.ndarray):
+            inp = inp[0]
+
+        return inp
+
+    @staticmethod
+    def _clean_params(params, arr_shape):
+        """Params shape should be (space, N) where N is the number of
+        parameters for the distribution. Array shape should be
+        (time, space)."""
+        if arr_shape[1] == params.shape[0]:
+            params = [params[:, i] for i in range(params.shape[1])]
+        return params
+
+    def __call__(self, arr):
+        params_oh = self._clean_params(self.params_oh, arr.shape)
+        params_mh = self._clean_params(self.params_mh, arr.shape)
+        params_mf = self._clean_params(self.params_mf, arr.shape)
+
+        p_mf = self.dist_obj.cdf(arr, *params_mf)
+        x_oh = self.dist_obj.ppf(p_mf, *params_oh)
+
+        if self.relative:
+            delta = arr / self.dist_obj.ppf(p_mf, *params_mh)
+            arr_bc = x_oh * delta
+        else:
+            delta = arr - self.dist_obj.ppf(p_mf, *params_mh)
+            arr_bc = x_oh + delta
+
+        return arr_bc
+
+
+def pqdm_ws(ws, params_oh, params_mh, params_mf=None, dist='weibull_min',
+            relative=True):
+    """Correct windspeed using parametric quantile delta mapping based on a
+    parametric implementation of the method from Cannon et al., 2015
+
+    Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM
+    Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in
+    Quantiles and Extremes? Journal of Climate 28, 6938–6959 (2015).
+
+    Parameters
+    ----------
+    ws : np.ndarray
+        2D array of windspeed values in shape (time, space)
+    params_oh : np.ndarray | list
+        2D array of probability distribution parameters created using a
+        function like ``scipy.stats.weibull_min.fit()`` where the shape is
+        (space, N) with N being the number of parameters required by the
+        specified distribution. This input arg is for the **observed historical
+        distribution**.
+    params_mh : np.ndarray | list
+        Same requirements as params_oh. This input arg is for the **modeled
+        historical distribution**.
+    params_mf : np.ndarray | list | None
+        Same requirements as params_oh. This input arg is for the **modeled
+        future distribution**. If this is None, this defaults to params_mh
+        (no future data).
+    dist : str | np.ndarray
+        Parametric probability distribution name to use to model the windspeed.
+        This can be any distribution name from ``scipy.stats``, but
+        "weibull_min" is a common choice for windspeed distributions. Can also
+        be an array of dist inputs if being used from reV, but they must all be
+        the same option.
+    relative : bool | np.ndarray
+        Flag to preserve relative rather than absolute changes in quantiles.
+        relative=False (default) will multiply by the change in quantiles while
+        relative=True will add. See Equations 4-6 from Cannon et al., 2015 for
+        more details. Can also be an array of dist inputs if being used from
+        reV, but they must all be the same option.
+
+    Returns
+    -------
+    ws : np.ndarray
+        2D array of windspeed values in shape (time, space)
+    """
+
+    pqdm = PQDM(params_oh, params_mh, params_mf, dist, relative)
+    ws = pqdm(ws)
+    ws = np.maximum(ws, 0)
+    return ws
diff --git a/rex/utilities/bc_utils.py b/rex/utilities/bc_utils.py
index 184abe65..224f935a 100644
--- a/rex/utilities/bc_utils.py
+++ b/rex/utilities/bc_utils.py
@@ -2,6 +2,7 @@
 """
 rex bias correction utilities.
 """
+import json
 import numpy as np
 import logging
 from warnings import warn
@@ -83,7 +84,18 @@ def _parse_bc_table(bc_df, gids):
 
     bc_fun_kwargs = {}
     for col in bc_df.columns:
+
+        # load serialized lists from string columns in bc_df into nested lists
+        sample = bc_df[col].values[0]
+        if isinstance(sample, str) and '[' in sample and ']' in sample:
+            bc_df[col] = bc_df[col].apply(json.loads)
+
         arr = bc_df.loc[gid_arr[bool_bc], col].values
-        bc_fun_kwargs[col] = np.expand_dims(arr, 0)
+
+        # nested lists in bc_df converted to arr of shape (space, N)
+        if isinstance(arr[0], (list, tuple)):
+            arr = np.array(arr.tolist())
+
+        bc_fun_kwargs[col] = arr
 
     return bc_fun, bc_fun_kwargs, bool_bc
diff --git a/tests/test_sam_resource.py b/tests/test_sam_resource.py
index aa75290a..44fa0675 100644
--- a/tests/test_sam_resource.py
+++ b/tests/test_sam_resource.py
@@ -427,8 +427,8 @@ def test_bias_correct_errors():
     assert 'Could not find method name "test' in str(record.value)
 
 
-def test_bias_correct_wind():
-    """Test linear bias correction functionality on windspeed"""
+def test_bias_correct_wind_lin():
+    """Test linear bias correction function on windspeed"""
     h5 = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_2012.h5')
     sites = slice(0, 20)
     hub_heights = 80
@@ -469,8 +469,8 @@ def test_bias_correct_wind():
         assert (res._res_arrays['windspeed'] >= 0).all()
 
 
-def test_bias_correct_solar():
-    """Test adder bias correction functionality on irradiance"""
+def test_bias_correct_solar_lin():
+    """Test adder bias correction function on irradiance"""
     h5 = os.path.join(TESTDATADIR, 'nsrdb/ri_100_nsrdb_2012.h5')
     sites = slice(0, 10)
     base_res = NSRDB.preload_SAM(h5, sites)
@@ -509,6 +509,28 @@ def test_bias_correct_solar():
         assert np.allclose(cos_sza, base_cos_sza, atol=0.005)
 
 
+def test_bias_correct_wind_pqdm():
+    """Test parametric QDM bias correction function on windspeed"""
+    h5 = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_2012.h5')
+    sites = slice(0, 20)
+    hub_heights = 80
+    base_res = WindResource.preload_SAM(h5, sites, hub_heights)
+
+    n = 10
+    bc = pd.DataFrame({'gid': np.arange(n),
+                       'method': 'pqdm_ws',
+                       'params_oh': '[2.6, -0.8, 8.9]',
+                       'params_mh': '[2.6, -0.8, 8.9]',
+                       'params_mf': '[2.6, -0.8, 8.9]',
+                       'dist': 'weibull_min',
+                       'relative': True,
+                       })
+
+    res = WindResource.preload_SAM(h5, sites, hub_heights)
+    res.bias_correct(bc)
+
+
+
 def execute_pytest(capture='all', flags='-rapP'):
     """Execute module as pytest with detailed summary report.
 

From d36793acff737d37cf59a3974cb57ec097cf953f Mon Sep 17 00:00:00 2001
From: grantbuster <grant.buster@nrel.gov>
Date: Thu, 4 Jan 2024 16:04:50 -0700
Subject: [PATCH 06/19] finished PQDM for wind with thorough test

---
 rex/bias_correction.py     | 81 ++++++++++++++++++++++++++++----------
 tests/test_sam_resource.py | 38 +++++++++++++++---
 2 files changed, 93 insertions(+), 26 deletions(-)

diff --git a/rex/bias_correction.py b/rex/bias_correction.py
index cd00f04f..0e6d22cb 100644
--- a/rex/bias_correction.py
+++ b/rex/bias_correction.py
@@ -91,8 +91,11 @@ def lin_ws(ws, scalar=1, adder=0):
 
 
 class PQDM:
-    """Correct windspeed using parametric quantile delta mapping based on a
-    parametric implementation of the method from Cannon et al., 2015
+    """Class for parametric quantile delta mapping based on a parametric
+    implementation of the method from Cannon et al., 2015
+
+    Note that this is a utility class for implementing PQDM and should not be
+    requested directly as a method in the reV/rex bias correction table input
 
     Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM
     Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in
@@ -105,16 +108,16 @@ def __init__(self, params_oh, params_mh, params_mf, dist, relative):
         ----------
         ws : np.ndarray
             2D array of windspeed values in shape (time, space)
-        params_oh : np.ndarray | list
+        params_oh : np.ndarray
             2D array of probability distribution parameters created using a
             function like ``scipy.stats.weibull_min.fit()`` where the shape is
             (space, N) with N being the number of parameters required by the
-            specified distribution. This input arg is for the **observed
-            historical distribution**.
-        params_mh : np.ndarray | list
+            specified distribution e.g., (shape, loc, scale) for weibull_min.
+            This input arg is for the **observed historical distribution**.
+        params_mh : np.ndarray
             Same requirements as params_oh. This input arg is for the **modeled
             historical distribution**.
-        params_mf : np.ndarray | list | None
+        params_mf : np.ndarray | None
             Same requirements as params_oh. This input arg is for the **modeled
             future distribution**. If this is None, this defaults to params_mh
             (no future data).
@@ -134,22 +137,25 @@ def __init__(self, params_oh, params_mh, params_mf, dist, relative):
         """
         self.params_oh = params_oh
         self.params_mh = params_mh
-        self.params_mf = params_mf
-        self.dist = self._clean_kwarg(dist)
+        self.params_mf = params_mf if params_mf is not None else params_mh
+        self.dist_name = self._clean_kwarg(dist)
         self.relative = self._clean_kwarg(relative)
 
-        self.dist_obj = getattr(scipy.stats, self.dist, None)
-        if self.dist_obj is None:
+        self.scipy_dist = getattr(scipy.stats, self.dist_name, None)
+        if self.scipy_dist is None:
             msg = ('Could not get requested distribution "{}" from '
                    '``scipy.stats``. Please double check your spelling and '
                    'select one of the options from here: '
                    'https://docs.scipy.org/doc/scipy/reference/stats.html'
-                   .format(self.dist))
+                   .format(self.dist_name))
             logger.error(msg)
             raise KeyError(msg)
 
     @staticmethod
     def _clean_kwarg(inp):
+        """Clean any kwargs inputs (e.g., dist, relative) that might be
+        provided as an array and must be collapsed into a single string or
+        boolean value"""
         unique = np.unique(inp)
         msg = ('PQDM kwargs must have only one unique input even if being '
                'called with arrays as part of reV but found: {}'
@@ -163,28 +169,61 @@ def _clean_kwarg(inp):
 
     @staticmethod
     def _clean_params(params, arr_shape):
-        """Params shape should be (space, N) where N is the number of
-        parameters for the distribution. Array shape should be
-        (time, space)."""
+        """Re-organize 2D parameter arrays for passing into scipy distribution
+        functions.
+
+        Parameters
+        ----------
+        params : np.ndarray
+            Input params shape should be (space, N) where N is the number of
+            parameters for the distribution.
+        arr_shape : tuple
+            Array shape should be (time, space).
+
+        Returns
+        -------
+        params : np.ndarray | list
+            If the two inputs are of the expected shape, this output will be
+            params unpacked along axis=1 into a list so that the list entries
+            represent the scipy distribution parameters (e.g., shape, scale,
+            loc) and each list entry is of shape (space,)
+        """
         if arr_shape[1] == params.shape[0]:
             params = [params[:, i] for i in range(params.shape[1])]
         return params
 
     def __call__(self, arr):
+        """Run the PQDM function to bias correct an array
+
+        Parameters
+        ----------
+        arr : np.ndarray
+            2D array of values in shape (time, space)
+
+        Returns
+        -------
+        arr : np.ndarray
+            Bias corrected copy of the input array with same shape.
+        """
+
         params_oh = self._clean_params(self.params_oh, arr.shape)
         params_mh = self._clean_params(self.params_mh, arr.shape)
         params_mf = self._clean_params(self.params_mf, arr.shape)
 
-        p_mf = self.dist_obj.cdf(arr, *params_mf)
-        x_oh = self.dist_obj.ppf(p_mf, *params_oh)
+        p_mf = self.scipy_dist.cdf(arr, *params_mf)
+        x_oh = self.scipy_dist.ppf(p_mf, *params_oh)
 
         if self.relative:
-            delta = arr / self.dist_obj.ppf(p_mf, *params_mh)
+            delta = arr / self.scipy_dist.ppf(p_mf, *params_mh)
             arr_bc = x_oh * delta
         else:
-            delta = arr - self.dist_obj.ppf(p_mf, *params_mh)
+            delta = arr - self.scipy_dist.ppf(p_mf, *params_mh)
             arr_bc = x_oh + delta
 
+        msg = ('Input shape {} does not match PQDM bias corrected output '
+               'shape {}!'.format(arr.shape, arr_bc.shape))
+        assert arr.shape == arr_bc.shape, msg
+
         return arr_bc
 
 
@@ -205,8 +244,8 @@ def pqdm_ws(ws, params_oh, params_mh, params_mf=None, dist='weibull_min',
         2D array of probability distribution parameters created using a
         function like ``scipy.stats.weibull_min.fit()`` where the shape is
         (space, N) with N being the number of parameters required by the
-        specified distribution. This input arg is for the **observed historical
-        distribution**.
+        specified distribution e.g., (shape, loc, scale) for weibull_min. This
+        input arg is for the **observed historical distribution**.
     params_mh : np.ndarray | list
         Same requirements as params_oh. This input arg is for the **modeled
         historical distribution**.
diff --git a/tests/test_sam_resource.py b/tests/test_sam_resource.py
index 44fa0675..9eb5d1c3 100644
--- a/tests/test_sam_resource.py
+++ b/tests/test_sam_resource.py
@@ -10,6 +10,7 @@
 import pandas as pd
 from pandas.testing import assert_series_equal
 import pytest
+from scipy.stats import weibull_min
 
 from rex.renewable_resource import WindResource, NSRDB, WaveResource
 from rex.multi_file_resource import MultiFileNSRDB
@@ -512,23 +513,50 @@ def test_bias_correct_solar_lin():
 def test_bias_correct_wind_pqdm():
     """Test parametric QDM bias correction function on windspeed"""
     h5 = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_2012.h5')
-    sites = slice(0, 20)
+    n = 20
+    sites = slice(0, n)
     hub_heights = 80
     base_res = WindResource.preload_SAM(h5, sites, hub_heights)
+    base_ws = base_res._res_arrays['windspeed']
+    shape, loc, scale = weibull_min.fit(base_ws.mean(axis=1))
 
-    n = 10
     bc = pd.DataFrame({'gid': np.arange(n),
                        'method': 'pqdm_ws',
-                       'params_oh': '[2.6, -0.8, 8.9]',
-                       'params_mh': '[2.6, -0.8, 8.9]',
-                       'params_mf': '[2.6, -0.8, 8.9]',
+                       'params_oh': f'[{shape}, {loc}, {scale}]',
+                       'params_mh': f'[{shape}, {loc}, {scale}]',
+                       'params_mf': f'[{shape}, {loc}, {scale}]',
                        'dist': 'weibull_min',
                        'relative': True,
                        })
 
     res = WindResource.preload_SAM(h5, sites, hub_heights)
     res.bias_correct(bc)
+    bc_ws = res._res_arrays['windspeed']
+    assert np.allclose(bc_ws, base_ws)
 
+    scale_mod = 0.9
+    bc['params_mh'] = f'[{shape}, {loc}, {scale * scale_mod}]'
+    bc = bc.drop('params_mf', axis=1)
+    res = WindResource.preload_SAM(h5, sites, hub_heights)
+    res.bias_correct(bc)
+    bc_ws = res._res_arrays['windspeed']
+
+    # spot check using manual calc
+    idys = np.random.choice(base_ws.shape[0], size=10)
+    idxs = np.random.choice(base_ws.shape[1], size=10)
+    for idy, idx in zip(idys, idxs):
+        p_mf = weibull_min.cdf(base_ws[idy, idx], shape, loc,
+                               scale * scale_mod)
+        x_oh = weibull_min.ppf(p_mf, shape, loc, scale)
+        delta = base_ws[idy, idx] / weibull_min.ppf(p_mf, shape, loc,
+                                                    scale * scale_mod)
+        single_ws_bc = x_oh * delta
+        assert np.allclose(single_ws_bc, bc_ws[idy, idx])
+
+    # almost none of the values should be the same as the input, and none
+    # should be negative
+    assert ((bc_ws == base_ws).sum() / bc_ws.size) < 0.001
+    assert not (bc_ws < 0).any()
 
 
 def execute_pytest(capture='all', flags='-rapP'):

From bd8e7d832e26519dad76d9cbd61b6151476a68ce Mon Sep 17 00:00:00 2001
From: grantbuster <grant.buster@nrel.gov>
Date: Fri, 5 Jan 2024 10:50:05 -0700
Subject: [PATCH 07/19] added PQDM for irradiance

---
 rex/bias_correction.py | 190 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 169 insertions(+), 21 deletions(-)

diff --git a/rex/bias_correction.py b/rex/bias_correction.py
index 0e6d22cb..c0b07c73 100644
--- a/rex/bias_correction.py
+++ b/rex/bias_correction.py
@@ -10,12 +10,9 @@
 logger = logging.getLogger(__name__)
 
 
-def lin_irrad(ghi, dni, dhi, scalar=1, adder=0):
-    """Correct GHI and DNI using linear correction factors. Both irradiance
-    variables are corrected as ``irradiance * scalar + adder``. DHI is
-    preserved based on the relationship ``dhi = ghi - (dni * cos(sza))``. Times
-    when GHI and DNI are zero are preserved and negative values are protected
-    against.
+def _irrad_pre_proc(ghi, dni, dhi):
+    """Irradiance data pre processing to get ancillary variables
+    (run before bias correction).
 
     Parameters
     ----------
@@ -25,19 +22,18 @@ def lin_irrad(ghi, dni, dhi, scalar=1, adder=0):
         2D array of direct normal irradiance values in shape (time, space)
     dhi : np.ndarray
         2D array of diffuse horizontal irradiance values in shape (time, space)
-    scalar : np.ndarray
-        1D array of linear scalar values in the shape (space,)
-    adder : np.ndarray
-        1D array of linear adder values in the shape (space,)
 
     Returns
     -------
-    ghi : np.ndarray
-        2D array of global horizontal irradiance values in shape (time, space)
-    dni : np.ndarray
-        2D array of direct normal irradiance values in shape (time, space)
-    dhi : np.ndarray
-        2D array of diffuse horizontal irradiance values in shape (time, space)
+    ghi_zeros : np.ndarray
+        2D boolean array, True where ghi==0, same shape as ghi input
+    dni_zeros : np.ndarray
+        2D boolean array, True where dni==0, same shape as dni input
+    dhi_zeros : np.ndarray
+        2D boolean array, True where dhi==0, same shape as dhi input
+    cos_sza : np.ndarray
+        2D array for cos(solar_zenith_angle) calculated from the basic
+        relationship ``cos_sza = (ghi - dhi) / dni``
     """
 
     ghi_zeros = ghi == 0
@@ -46,8 +42,38 @@ def lin_irrad(ghi, dni, dhi, scalar=1, adder=0):
     with np.errstate(divide='ignore', invalid='ignore'):
         cos_sza = (ghi - dhi) / dni
 
-    ghi = ghi * scalar + adder
-    dni = dni * scalar + adder
+    return ghi_zeros, dni_zeros, dhi_zeros, cos_sza
+
+
+def _irrad_post_proc(ghi, dni, ghi_zeros, dni_zeros, dhi_zeros, cos_sza):
+    """Irradiance data post processing to calculate DHI and set limits on
+    irradiance variables (run after bias correction).
+
+    Parameters
+    ----------
+    ghi : np.ndarray
+        2D array of global horizontal irradiance values in shape (time, space)
+    dni : np.ndarray
+        2D array of direct normal irradiance values in shape (time, space)
+    ghi_zeros : np.ndarray
+        2D boolean array, True where ghi==0, same shape as ghi input
+    dni_zeros : np.ndarray
+        2D boolean array, True where dni==0, same shape as dni input
+    dhi_zeros : np.ndarray
+        2D boolean array, True where dhi==0, same shape as dhi input
+    cos_sza : np.ndarray
+        2D array for cos(solar_zenith_angle) calculated from the basic
+        relationship ``cos_sza = (ghi - dhi) / dni``
+
+    Returns
+    -------
+    ghi : np.ndarray
+        2D array of global horizontal irradiance values in shape (time, space)
+    dni : np.ndarray
+        2D array of direct normal irradiance values in shape (time, space)
+    dhi : np.ndarray
+        2D array of diffuse horizontal irradiance values in shape (time, space)
+    """
 
     ghi = np.maximum(0, ghi)
     dni = np.maximum(0, dni)
@@ -67,6 +93,47 @@ def lin_irrad(ghi, dni, dhi, scalar=1, adder=0):
     return ghi, dni, dhi
 
 
+def lin_irrad(ghi, dni, dhi, scalar=1, adder=0):
+    """Correct GHI and DNI using linear correction factors. Both irradiance
+    variables are corrected as ``irradiance * scalar + adder``. DHI is
+    preserved based on the relationship ``dhi = ghi - (dni * cos(sza))``. Times
+    when GHI and DNI are zero are preserved and negative values are protected
+    against.
+
+    Parameters
+    ----------
+    ghi : np.ndarray
+        2D array of global horizontal irradiance values in shape (time, space)
+    dni : np.ndarray
+        2D array of direct normal irradiance values in shape (time, space)
+    dhi : np.ndarray
+        2D array of diffuse horizontal irradiance values in shape (time, space)
+    scalar : np.ndarray
+        1D array of linear scalar values in the shape (space,)
+    adder : np.ndarray
+        1D array of linear adder values in the shape (space,)
+
+    Returns
+    -------
+    ghi : np.ndarray
+        2D array of global horizontal irradiance values in shape (time, space)
+    dni : np.ndarray
+        2D array of direct normal irradiance values in shape (time, space)
+    dhi : np.ndarray
+        2D array of diffuse horizontal irradiance values in shape (time, space)
+    """
+
+    ghi_zeros, dni_zeros, dhi_zeros, cos_sza = _irrad_pre_proc(ghi, dni, dhi)
+
+    ghi = ghi * scalar + adder
+    dni = dni * scalar + adder
+
+    ghi, dni, dhi = _irrad_post_proc(ghi, dni, ghi_zeros, dni_zeros, dhi_zeros,
+                                     cos_sza)
+
+    return ghi, dni, dhi
+
+
 def lin_ws(ws, scalar=1, adder=0):
     """Correct windspeed using linear correction factors. Windspeed is
     corrected as ``windspeed * scalar + adder`` with a minimum of zero.
@@ -90,7 +157,7 @@ def lin_ws(ws, scalar=1, adder=0):
     return ws
 
 
-class PQDM:
+class _PQDM:
     """Class for parametric quantile delta mapping based on a parametric
     implementation of the method from Cannon et al., 2015
 
@@ -157,7 +224,7 @@ def _clean_kwarg(inp):
         provided as an array and must be collapsed into a single string or
         boolean value"""
         unique = np.unique(inp)
-        msg = ('PQDM kwargs must have only one unique input even if being '
+        msg = ('_PQDM kwargs must have only one unique input even if being '
                'called with arrays as part of reV but found: {}'
                .format(unique))
         assert len(unique) == 1, msg
@@ -227,6 +294,87 @@ def __call__(self, arr):
         return arr_bc
 
 
+def pqdm_irrad(ghi, dni, dhi,
+               ghi_params_oh, dni_params_oh,
+               ghi_params_mh, dni_params_mh,
+               ghi_params_mf=None, dni_params_mf=None,
+               dist='weibull_min', relative=True):
+    """Correct irradiance using parametric quantile delta mapping based on a
+    parametric implementation of the method from Cannon et al., 2015
+
+    Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM
+    Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in
+    Quantiles and Extremes? Journal of Climate 28, 6938–6959 (2015).
+
+    Parameters
+    ----------
+    ghi : np.ndarray
+        2D array of global horizontal irradiance values in shape (time, space)
+    dni : np.ndarray
+        2D array of direct normal irradiance values in shape (time, space)
+    dhi : np.ndarray
+        2D array of diffuse horizontal irradiance values in shape (time, space)
+    ghi_params_oh : np.ndarray | list
+        2D array of probability distribution parameters for GHI created using a
+        function like ``scipy.stats.weibull_min.fit()`` where the shape is
+        (space, N) with N being the number of parameters required by the
+        specified distribution e.g., (shape, loc, scale) for weibull_min. This
+        input arg is for the **observed historical distribution**.
+    dni_params_oh : np.ndarray | list
+        Same requirements as ghi_params_oh. This input arg is for the
+        **observed historical distribution** for DNI.
+    ghi_params_mh : np.ndarray | list
+        Same requirements as ghi_params_oh. This input arg is for the **modeled
+        historical distribution**.
+    dni_params_mh : np.ndarray | list
+        Same requirements as ghi_params_oh. This input arg is for the **modeled
+        historical distribution** for DNI.
+    ghi_params_mf : np.ndarray | list | None
+        Same requirements as ghi_params_oh. This input arg is for the **modeled
+        future distribution**. If this is None, this defaults to ghi_params_mh
+        (no future data).
+    dni_params_mf : np.ndarray | list | None
+        Same requirements as ghi_params_oh. This input arg is for the **modeled
+        future distribution** for DNI. If this is None, this defaults to
+        dni_params_mh (no future data).
+    dist : str | np.ndarray
+        Parametric probability distribution name to use to model the windspeed.
+        This can be any distribution name from ``scipy.stats``. Can also
+        be an array of dist inputs if being used from reV, but they must all be
+        the same option.
+    relative : bool | np.ndarray
+        Flag to preserve relative rather than absolute changes in quantiles.
+        relative=False (default) will multiply by the change in quantiles while
+        relative=True will add. See Equations 4-6 from Cannon et al., 2015 for
+        more details. Can also be an array of dist inputs if being used from
+        reV, but they must all be the same option.
+
+    Returns
+    -------
+    ghi : np.ndarray
+        2D array of global horizontal irradiance values in shape (time, space)
+    dni : np.ndarray
+        2D array of direct normal irradiance values in shape (time, space)
+    dhi : np.ndarray
+        2D array of diffuse horizontal irradiance values in shape (time, space)
+    """
+
+    ghi_zeros, dni_zeros, dhi_zeros, cos_sza = _irrad_pre_proc(ghi, dni, dhi)
+
+    ghi_pqdm = _PQDM(ghi_params_oh, ghi_params_mh, ghi_params_mf,
+                     dist, relative)
+    dni_pqdm = _PQDM(dni_params_oh, dni_params_mh, dni_params_mf,
+                     dist, relative)
+
+    ghi = ghi_pqdm(ghi)
+    dni = dni_pqdm(dni)
+
+    ghi, dni, dhi = _irrad_post_proc(ghi, dni, ghi_zeros, dni_zeros, dhi_zeros,
+                                     cos_sza)
+
+    return ghi, dni, dhi
+
+
 def pqdm_ws(ws, params_oh, params_mh, params_mf=None, dist='weibull_min',
             relative=True):
     """Correct windspeed using parametric quantile delta mapping based on a
@@ -272,7 +420,7 @@ def pqdm_ws(ws, params_oh, params_mh, params_mf=None, dist='weibull_min',
         2D array of windspeed values in shape (time, space)
     """
 
-    pqdm = PQDM(params_oh, params_mh, params_mf, dist, relative)
+    pqdm = _PQDM(params_oh, params_mh, params_mf, dist, relative)
     ws = pqdm(ws)
     ws = np.maximum(ws, 0)
     return ws

From f5f12c6d16f152aed5781b0a0e5ba097d1fec39d Mon Sep 17 00:00:00 2001
From: grantbuster <grant.buster@nrel.gov>
Date: Fri, 5 Jan 2024 14:22:47 -0700
Subject: [PATCH 08/19] implemented empirical QDM with tests

---
 rex/bias_correction.py     | 262 +++++++++++++++++++++++--------------
 tests/test_sam_resource.py |  85 +++++++++++-
 2 files changed, 247 insertions(+), 100 deletions(-)

diff --git a/rex/bias_correction.py b/rex/bias_correction.py
index c0b07c73..649df17f 100644
--- a/rex/bias_correction.py
+++ b/rex/bias_correction.py
@@ -85,8 +85,8 @@ def _irrad_post_proc(ghi, dni, ghi_zeros, dni_zeros, dhi_zeros, cos_sza):
         dhi = ghi - (dni * cos_sza)
 
     dhi[dni_zeros] = ghi[dni_zeros]
-    dhi = np.maximum(0, dhi)
     dhi[dhi_zeros] = 0
+    dhi = np.maximum(0, dhi)
 
     assert not np.isnan(dhi).any()
 
@@ -157,11 +157,11 @@ def lin_ws(ws, scalar=1, adder=0):
     return ws
 
 
-class _PQDM:
-    """Class for parametric quantile delta mapping based on a parametric
-    implementation of the method from Cannon et al., 2015
+class _QuantileDeltaMapping:
+    """Class for quantile delta mapping based on the method from
+    Cannon et al., 2015
 
-    Note that this is a utility class for implementing PQDM and should not be
+    Note that this is a utility class for implementing QDM and should not be
     requested directly as a method in the reV/rex bias correction table input
 
     Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM
@@ -173,50 +173,56 @@ def __init__(self, params_oh, params_mh, params_mf, dist, relative):
         """
         Parameters
         ----------
-        ws : np.ndarray
-            2D array of windspeed values in shape (time, space)
         params_oh : np.ndarray
-            2D array of probability distribution parameters created using a
-            function like ``scipy.stats.weibull_min.fit()`` where the shape is
-            (space, N) with N being the number of parameters required by the
-            specified distribution e.g., (shape, loc, scale) for weibull_min.
-            This input arg is for the **observed historical distribution**.
+            2D array of **observed historical** distribution parameters created
+            from a multi-year set of data where the shape is (space, N). This
+            can be the output of a parametric distribution fit like
+            ``scipy.stats.weibull_min.fit()`` where N is the number of
+            parameters for that distribution, or this can define the x-values
+            of N points from an empirical CDF that will be linearly
+            interpolated between. If this is an empirical CDF, this must
+            include the 0th and 100th percentile values and have even
+            percentile spacing between values.
         params_mh : np.ndarray
             Same requirements as params_oh. This input arg is for the **modeled
             historical distribution**.
         params_mf : np.ndarray | None
             Same requirements as params_oh. This input arg is for the **modeled
             future distribution**. If this is None, this defaults to params_mh
-            (no future data).
+            (no future data, just corrected to modeled historical distribution)
         dist : str | np.ndarray
-            Parametric probability distribution name to use to model the
-            windspeed. This can be any distribution name from ``scipy.stats``,
-            but "weibull_min" is a common choice for windspeed distributions.
-            Can also be an array of dist inputs if being used from reV, but
-            they must all be the same option.
+            Probability distribution name to use to model the data which
+            determines how the param args are used. This can "empirical" or any
+            continuous distribution name from ``scipy.stats``. Can also be a 1D
+            array of dist inputs if being used from reV, but they must all be
+            the same option.
         relative : bool | np.ndarray
             Flag to preserve relative rather than absolute changes in
             quantiles. relative=False (default) will multiply by the change in
             quantiles while relative=True will add. See Equations 4-6 from
-            Cannon et al., 2015 for more details. Can also be an array of dist
-            inputs if being used from reV, but they must all be the same
+            Cannon et al., 2015 for more details. Can also be a 1D array of
+            dist inputs if being used from reV, but they must all be the same
             option.
         """
+
         self.params_oh = params_oh
         self.params_mh = params_mh
         self.params_mf = params_mf if params_mf is not None else params_mh
-        self.dist_name = self._clean_kwarg(dist)
         self.relative = self._clean_kwarg(relative)
-
-        self.scipy_dist = getattr(scipy.stats, self.dist_name, None)
-        if self.scipy_dist is None:
-            msg = ('Could not get requested distribution "{}" from '
-                   '``scipy.stats``. Please double check your spelling and '
-                   'select one of the options from here: '
-                   'https://docs.scipy.org/doc/scipy/reference/stats.html'
-                   .format(self.dist_name))
-            logger.error(msg)
-            raise KeyError(msg)
+        self.dist_name = self._clean_kwarg(dist)
+        self.scipy_dist = None
+
+        if self.dist_name != 'empirical':
+            self.scipy_dist = getattr(scipy.stats, self.dist_name, None)
+            if self.scipy_dist is None:
+                msg = ('Could not get requested distribution "{}" from '
+                       '``scipy.stats``. Please double check your spelling '
+                       'and select "empirical" or one of the continuous '
+                       'distribution options from here: '
+                       'https://docs.scipy.org/doc/scipy/reference/stats.html'
+                       .format(self.dist_name))
+                logger.error(msg)
+                raise KeyError(msg)
 
     @staticmethod
     def _clean_kwarg(inp):
@@ -224,8 +230,8 @@ def _clean_kwarg(inp):
         provided as an array and must be collapsed into a single string or
         boolean value"""
         unique = np.unique(inp)
-        msg = ('_PQDM kwargs must have only one unique input even if being '
-               'called with arrays as part of reV but found: {}'
+        msg = ('_QuantileDeltaMapping kwargs must have only one unique input '
+               'even if being called with arrays as part of reV but found: {}'
                .format(unique))
         assert len(unique) == 1, msg
 
@@ -234,10 +240,9 @@ def _clean_kwarg(inp):
 
         return inp
 
-    @staticmethod
-    def _clean_params(params, arr_shape):
-        """Re-organize 2D parameter arrays for passing into scipy distribution
-        functions.
+    def _clean_params(self, params, arr_shape):
+        """Verify and clean 2D parameter arrays for passing into empirical
+        distribution or scipy continuous distribution functions.
 
         Parameters
         ----------
@@ -250,17 +255,56 @@ def _clean_params(params, arr_shape):
         Returns
         -------
         params : np.ndarray | list
-            If the two inputs are of the expected shape, this output will be
-            params unpacked along axis=1 into a list so that the list entries
-            represent the scipy distribution parameters (e.g., shape, scale,
-            loc) and each list entry is of shape (space,)
+            If a scipy continuous dist is set, this output will be params
+            unpacked along axis=1 into a list so that the list entries
+            represent the scipy distribution parameters
+            (e.g., shape, scale, loc) and each list entry is of shape (space,)
         """
-        if arr_shape[1] == params.shape[0]:
+
+        msg = f'params must be 2D array but received {type(params)}'
+        assert isinstance(params, np.ndarray), msg
+        msg = (f'params must be 2D array of shape ({arr_shape[1]}, N) '
+               f'but received shape {params.shape}')
+        assert len(params.shape) == 2, msg
+        assert params.shape[0] == arr_shape[1], msg
+
+        if self.scipy_dist is not None:
             params = [params[:, i] for i in range(params.shape[1])]
+
         return params
 
+    def cdf(self, x, params):
+        """Run the CDF function e.g., convert physical variable to quantile"""
+
+        if self.scipy_dist is None:
+            p = np.zeros_like(x)
+            for idx in range(x.shape[1]):
+                xp = params[idx, :]
+                fp = np.linspace(0, 1, len(xp))
+                p[:, idx] = np.interp(x[:, idx], xp, fp)
+
+        else:
+            p = self.scipy_dist.cdf(x, *params)
+
+        return p
+
+    def ppf(self, p, params):
+        """Run the inverse CDF function (percent point function) e.g., convert
+        quantile to physical variable"""
+
+        if self.scipy_dist is None:
+            x = np.zeros_like(p)
+            for idx in range(p.shape[1]):
+                fp = params[idx, :]
+                xp = np.linspace(0, 1, len(fp))
+                x[:, idx] = np.interp(p[:, idx], xp, fp)
+        else:
+            x = self.scipy_dist.ppf(p, *params)
+
+        return x
+
     def __call__(self, arr):
-        """Run the PQDM function to bias correct an array
+        """Run the QDM function to bias correct an array
 
         Parameters
         ----------
@@ -277,30 +321,30 @@ def __call__(self, arr):
         params_mh = self._clean_params(self.params_mh, arr.shape)
         params_mf = self._clean_params(self.params_mf, arr.shape)
 
-        p_mf = self.scipy_dist.cdf(arr, *params_mf)
-        x_oh = self.scipy_dist.ppf(p_mf, *params_oh)
+        p_mf = self.cdf(arr, params_mf)
+        x_oh = self.ppf(p_mf, params_oh)
 
         if self.relative:
-            delta = arr / self.scipy_dist.ppf(p_mf, *params_mh)
+            delta = arr / self.ppf(p_mf, params_mh)
             arr_bc = x_oh * delta
         else:
-            delta = arr - self.scipy_dist.ppf(p_mf, *params_mh)
+            delta = arr - self.ppf(p_mf, params_mh)
             arr_bc = x_oh + delta
 
-        msg = ('Input shape {} does not match PQDM bias corrected output '
+        msg = ('Input shape {} does not match QDM bias corrected output '
                'shape {}!'.format(arr.shape, arr_bc.shape))
         assert arr.shape == arr_bc.shape, msg
 
         return arr_bc
 
 
-def pqdm_irrad(ghi, dni, dhi,
-               ghi_params_oh, dni_params_oh,
-               ghi_params_mh, dni_params_mh,
-               ghi_params_mf=None, dni_params_mf=None,
-               dist='weibull_min', relative=True):
-    """Correct irradiance using parametric quantile delta mapping based on a
-    parametric implementation of the method from Cannon et al., 2015
+def qdm_irrad(ghi, dni, dhi,
+              ghi_params_oh, dni_params_oh,
+              ghi_params_mh, dni_params_mh,
+              ghi_params_mf=None, dni_params_mf=None,
+              dist='empirical', relative=True):
+    """Correct irradiance using the quantile delta mapping based on the method
+    from Cannon et al., 2015
 
     Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM
     Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in
@@ -315,39 +359,47 @@ def pqdm_irrad(ghi, dni, dhi,
     dhi : np.ndarray
         2D array of diffuse horizontal irradiance values in shape (time, space)
     ghi_params_oh : np.ndarray | list
-        2D array of probability distribution parameters for GHI created using a
-        function like ``scipy.stats.weibull_min.fit()`` where the shape is
-        (space, N) with N being the number of parameters required by the
-        specified distribution e.g., (shape, loc, scale) for weibull_min. This
-        input arg is for the **observed historical distribution**.
+        2D array of **observed historical** distribution parameters created
+        from a multi-year set of data where the shape is (space, N). This
+        can be the output of a parametric distribution fit like
+        ``scipy.stats.weibull_min.fit()`` where N is the number of
+        parameters for that distribution, or this can define the x-values
+        of N points from an empirical CDF that will be linearly
+        interpolated between. If this is an empirical CDF, this must
+        include the 0th and 100th percentile values and have even
+        percentile spacing between values.
     dni_params_oh : np.ndarray | list
         Same requirements as ghi_params_oh. This input arg is for the
         **observed historical distribution** for DNI.
     ghi_params_mh : np.ndarray | list
         Same requirements as ghi_params_oh. This input arg is for the **modeled
-        historical distribution**.
+        historical distribution** for GHI.
     dni_params_mh : np.ndarray | list
         Same requirements as ghi_params_oh. This input arg is for the **modeled
         historical distribution** for DNI.
     ghi_params_mf : np.ndarray | list | None
         Same requirements as ghi_params_oh. This input arg is for the **modeled
-        future distribution**. If this is None, this defaults to ghi_params_mh
-        (no future data).
+        future distribution** for GHI. If this is None, this defaults to
+        ghi_params_mh (no future data, just corrected to modeled historical
+        distribution)
     dni_params_mf : np.ndarray | list | None
         Same requirements as ghi_params_oh. This input arg is for the **modeled
         future distribution** for DNI. If this is None, this defaults to
-        dni_params_mh (no future data).
+        dni_params_mh. (no future data, just corrected to modeled historical
+        distribution)
     dist : str | np.ndarray
-        Parametric probability distribution name to use to model the windspeed.
-        This can be any distribution name from ``scipy.stats``. Can also
-        be an array of dist inputs if being used from reV, but they must all be
+        Probability distribution name to use to model the data which
+        determines how the param args are used. This can "empirical" or any
+        continuous distribution name from ``scipy.stats``. Can also be a 1D
+        array of dist inputs if being used from reV, but they must all be
         the same option.
     relative : bool | np.ndarray
-        Flag to preserve relative rather than absolute changes in quantiles.
-        relative=False (default) will multiply by the change in quantiles while
-        relative=True will add. See Equations 4-6 from Cannon et al., 2015 for
-        more details. Can also be an array of dist inputs if being used from
-        reV, but they must all be the same option.
+        Flag to preserve relative rather than absolute changes in
+        quantiles. relative=False (default) will multiply by the change in
+        quantiles while relative=True will add. See Equations 4-6 from
+        Cannon et al., 2015 for more details. Can also be a 1D array of
+        dist inputs if being used from reV, but they must all be the same
+        option.
 
     Returns
     -------
@@ -361,13 +413,19 @@ def pqdm_irrad(ghi, dni, dhi,
 
     ghi_zeros, dni_zeros, dhi_zeros, cos_sza = _irrad_pre_proc(ghi, dni, dhi)
 
-    ghi_pqdm = _PQDM(ghi_params_oh, ghi_params_mh, ghi_params_mf,
-                     dist, relative)
-    dni_pqdm = _PQDM(dni_params_oh, dni_params_mh, dni_params_mf,
-                     dist, relative)
+    ghi_qdm = _QuantileDeltaMapping(ghi_params_oh, ghi_params_mh,
+                                    ghi_params_mf, dist, relative)
+    dni_qdm = _QuantileDeltaMapping(dni_params_oh, dni_params_mh,
+                                    dni_params_mf, dist, relative)
+
+    # This will prevent inverse CDF functions from returning zero resulting in
+    # a divide by zero error in the calculation of the QDM delta. These zeros
+    # get fixed later in _irrad_post_proc
+    ghi[ghi_zeros] = 1500
+    dni[dni_zeros] = 1500
 
-    ghi = ghi_pqdm(ghi)
-    dni = dni_pqdm(dni)
+    ghi = ghi_qdm(ghi)
+    dni = dni_qdm(dni)
 
     ghi, dni, dhi = _irrad_post_proc(ghi, dni, ghi_zeros, dni_zeros, dhi_zeros,
                                      cos_sza)
@@ -375,10 +433,10 @@ def pqdm_irrad(ghi, dni, dhi,
     return ghi, dni, dhi
 
 
-def pqdm_ws(ws, params_oh, params_mh, params_mf=None, dist='weibull_min',
-            relative=True):
-    """Correct windspeed using parametric quantile delta mapping based on a
-    parametric implementation of the method from Cannon et al., 2015
+def qdm_ws(ws, params_oh, params_mh, params_mf=None, dist='empirical',
+           relative=True):
+    """Correct windspeed using quantile delta mapping based on the method from
+    Cannon et al., 2015
 
     Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM
     Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in
@@ -389,30 +447,35 @@ def pqdm_ws(ws, params_oh, params_mh, params_mf=None, dist='weibull_min',
     ws : np.ndarray
         2D array of windspeed values in shape (time, space)
     params_oh : np.ndarray | list
-        2D array of probability distribution parameters created using a
-        function like ``scipy.stats.weibull_min.fit()`` where the shape is
-        (space, N) with N being the number of parameters required by the
-        specified distribution e.g., (shape, loc, scale) for weibull_min. This
-        input arg is for the **observed historical distribution**.
+        2D array of **observed historical** distribution parameters created
+        from a multi-year set of data where the shape is (space, N). This
+        can be the output of a parametric distribution fit like
+        ``scipy.stats.weibull_min.fit()`` where N is the number of
+        parameters for that distribution, or this can define the x-values
+        of N points from an empirical CDF that will be linearly
+        interpolated between. If this is an empirical CDF, this must
+        include the 0th and 100th percentile values and have even
+        percentile spacing between values.
     params_mh : np.ndarray | list
         Same requirements as params_oh. This input arg is for the **modeled
         historical distribution**.
     params_mf : np.ndarray | list | None
         Same requirements as params_oh. This input arg is for the **modeled
         future distribution**. If this is None, this defaults to params_mh
-        (no future data).
+        (no future data, just corrected to modeled historical distribution)
     dist : str | np.ndarray
-        Parametric probability distribution name to use to model the windspeed.
-        This can be any distribution name from ``scipy.stats``, but
-        "weibull_min" is a common choice for windspeed distributions. Can also
-        be an array of dist inputs if being used from reV, but they must all be
+        Probability distribution name to use to model the data which
+        determines how the param args are used. This can "empirical" or any
+        continuous distribution name from ``scipy.stats``. Can also be a 1D
+        array of dist inputs if being used from reV, but they must all be
         the same option.
     relative : bool | np.ndarray
-        Flag to preserve relative rather than absolute changes in quantiles.
-        relative=False (default) will multiply by the change in quantiles while
-        relative=True will add. See Equations 4-6 from Cannon et al., 2015 for
-        more details. Can also be an array of dist inputs if being used from
-        reV, but they must all be the same option.
+        Flag to preserve relative rather than absolute changes in
+        quantiles. relative=False (default) will multiply by the change in
+        quantiles while relative=True will add. See Equations 4-6 from
+        Cannon et al., 2015 for more details. Can also be a 1D array of
+        dist inputs if being used from reV, but they must all be the same
+        option.
 
     Returns
     -------
@@ -420,7 +483,8 @@ def pqdm_ws(ws, params_oh, params_mh, params_mf=None, dist='weibull_min',
         2D array of windspeed values in shape (time, space)
     """
 
-    pqdm = _PQDM(params_oh, params_mh, params_mf, dist, relative)
-    ws = pqdm(ws)
+    qdm = _QuantileDeltaMapping(params_oh, params_mh, params_mf, dist,
+                                relative)
+    ws = qdm(ws)
     ws = np.maximum(ws, 0)
     return ws
diff --git a/tests/test_sam_resource.py b/tests/test_sam_resource.py
index 9eb5d1c3..17667219 100644
--- a/tests/test_sam_resource.py
+++ b/tests/test_sam_resource.py
@@ -2,6 +2,7 @@
 """
 pytests for sam_resource
 """
+import json
 import tempfile
 import numpy as np
 import shutil
@@ -521,7 +522,7 @@ def test_bias_correct_wind_pqdm():
     shape, loc, scale = weibull_min.fit(base_ws.mean(axis=1))
 
     bc = pd.DataFrame({'gid': np.arange(n),
-                       'method': 'pqdm_ws',
+                       'method': 'qdm_ws',
                        'params_oh': f'[{shape}, {loc}, {scale}]',
                        'params_mh': f'[{shape}, {loc}, {scale}]',
                        'params_mf': f'[{shape}, {loc}, {scale}]',
@@ -559,6 +560,88 @@ def test_bias_correct_wind_pqdm():
     assert not (bc_ws < 0).any()
 
 
+def test_bias_correct_wind_qdm():
+    """Test empirical QDM bias correction function on windspeed"""
+    h5 = os.path.join(TESTDATADIR, 'wtk/ri_100_wtk_2012.h5')
+    n = 20
+    sites = slice(0, n)
+    hub_heights = 80
+    base_res = WindResource.preload_SAM(h5, sites, hub_heights)
+    base_ws = base_res._res_arrays['windspeed']
+
+    base_params = list(np.linspace(0, 100, 10))
+
+    bc = pd.DataFrame({'gid': np.arange(n),
+                       'method': 'qdm_ws',
+                       'params_oh': json.dumps(base_params),
+                       'params_mh': json.dumps(base_params),
+                       'params_mf': json.dumps(base_params),
+                       'dist': 'empirical',
+                       'relative': True,
+                       })
+
+    res = WindResource.preload_SAM(h5, sites, hub_heights)
+    res.bias_correct(bc)
+    bc_ws = res._res_arrays['windspeed']
+    assert np.allclose(bc_ws, base_ws)
+
+    res = WindResource.preload_SAM(h5, sites, hub_heights)
+    params = list(0.1*np.ones(10))
+    bc['params_oh'] = json.dumps(params)
+    bc.loc[0, 'params_oh'] = json.dumps(base_params)
+    res.bias_correct(bc)
+    bc_ws = res._res_arrays['windspeed']
+    assert np.allclose(bc_ws[:, 0], base_ws[:, 0])
+    assert ((bc_ws[:, 1:] > base_ws[:, 1:]).sum() / bc_ws[:, 1:].size) < 1e-4
+
+
+def test_bias_correct_irrad_qdm():
+    """Test empirical QDM bias correction function on irradiance"""
+    h5 = os.path.join(TESTDATADIR, 'nsrdb/ri_100_nsrdb_2012.h5')
+    n = 10
+    sites = slice(0, n)
+    base_res = NSRDB.preload_SAM(h5, sites)
+    base_ghi = base_res._res_arrays['ghi']
+    base_dni = base_res._res_arrays['dni']
+
+    base_params = list(np.linspace(0, 1300, 10))
+    bc = pd.DataFrame({'gid': np.arange(n),
+                       'ghi_params_oh': json.dumps(base_params),
+                       'dni_params_oh': json.dumps(base_params),
+                       'ghi_params_mh': json.dumps(base_params),
+                       'dni_params_mh': json.dumps(base_params),
+                       'method': 'qdm_irrad'},
+                      )
+
+    res = NSRDB.preload_SAM(h5, sites)
+    res.bias_correct(bc)
+    assert np.allclose(base_res._res_arrays['ghi'], base_ghi)
+    assert np.allclose(base_res._res_arrays['dni'], base_dni)
+
+    res = NSRDB.preload_SAM(h5, sites)
+    params = list(0.1*np.ones(10))
+    bc['ghi_params_oh'] = json.dumps(params)
+    res.bias_correct(bc)
+    bc_ghi = res._res_arrays['ghi']
+    bc_dni = res._res_arrays['dni']
+    bc_dhi = res._res_arrays['dhi']
+    assert (bc_ghi >= 0).all() & (bc_dni >= 0).all() & (bc_dhi >= 0).all()
+    assert (bc_ghi.mean(axis=0) < base_ghi.mean(axis=0)).all()
+    assert np.allclose(bc_dni, base_dni)
+
+    res = NSRDB.preload_SAM(h5, sites)
+    params = list(0.1*np.ones(10))
+    bc['ghi_params_oh'] = json.dumps(base_params)
+    bc['dni_params_oh'] = json.dumps(params)
+    res.bias_correct(bc)
+    bc_ghi = res._res_arrays['ghi']
+    bc_dni = res._res_arrays['dni']
+    bc_dhi = res._res_arrays['dhi']
+    assert (bc_ghi >= 0).all() & (bc_dni >= 0).all() & (bc_dhi >= 0).all()
+    assert (bc_dni.mean(axis=0) < base_dni.mean(axis=0)).all()
+    assert np.allclose(bc_ghi, base_ghi)
+
+
 def execute_pytest(capture='all', flags='-rapP'):
     """Execute module as pytest with detailed summary report.
 

From 3eadb3bbf2513bc3512c02932b39bc88d77eb0ea Mon Sep 17 00:00:00 2001
From: grantbuster <grant.buster@nrel.gov>
Date: Fri, 5 Jan 2024 15:00:22 -0700
Subject: [PATCH 09/19] added cdf function for temporal stats

---
 rex/temporal_stats/temporal_stats.py | 34 ++++++++++++++++++++++++++++
 tests/test_temporal_stats.py         | 16 ++++++++++++-
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/rex/temporal_stats/temporal_stats.py b/rex/temporal_stats/temporal_stats.py
index 18de50a4..a411ae01 100644
--- a/rex/temporal_stats/temporal_stats.py
+++ b/rex/temporal_stats/temporal_stats.py
@@ -83,6 +83,40 @@ def circular_mean(data, weights=None, degrees=True, axis=0,
     return mean
 
 
+def cdf(data, n=50, decimals=None):
+    """Get a number of x-values that define a CDF for the input data.
+
+    Parameters
+    ----------
+    data : np.ndarray
+        1D array of data to make a CDF for
+    n : int
+        Number of points to fit the CDF
+    decimals : int | None
+        Precision to round output to (see docstring for np.round). None will
+        not round outputs (default).
+
+    Returns
+    -------
+    x : np.ndarray
+        1D array of values with shape (n,). Each value is in the same units as
+        the input data argument. The x[0] is the minimum value of data (0th
+        percentile) and x[-1] is the maximum (100th percentile). The values are
+        evenly spaced in quantile space on the y-axis of the CDF.
+    """
+
+    p = np.linspace(0, 1, n)
+    x = np.interp(p, np.linspace(0, 1, len(data)), sorted(data))
+
+    assert x[0] == data.min()
+    assert x[-1] == data.max()
+
+    if decimals is not None:
+        x = np.round(x, decimals=decimals)
+
+    return x
+
+
 class TemporalStats:
     """
     Temporal Statistics from Resource Data
diff --git a/tests/test_temporal_stats.py b/tests/test_temporal_stats.py
index d5702d77..a55e260e 100644
--- a/tests/test_temporal_stats.py
+++ b/tests/test_temporal_stats.py
@@ -13,7 +13,7 @@
 
 from rex.multi_year_resource import MultiYearWindResource
 from rex.renewable_resource import WindResource, NSRDB
-from rex.temporal_stats.temporal_stats import TemporalStats, circular_mean
+from rex.temporal_stats.temporal_stats import TemporalStats, circular_mean, cdf
 from rex.temporal_stats.temporal_stats_cli import main
 from rex.utilities.loggers import LOGGERS
 from rex import TESTDATADIR
@@ -281,6 +281,20 @@ def test_weighted_circular_means(weights):
     assert np.allclose(truth, test_stats[name].values, rtol=0, atol=0.01), msg
 
 
+def test_cdf(n=21):
+    """Test the CDF function which gets evenly spaced values (in quantile
+    space) defining the empirical CDF of a dataset"""
+    data = np.random.normal(0, 1, (1000,))
+    x = cdf(data, n=n, decimals=None)
+
+    quantiles = np.linspace(0, 1, n)
+    assert quantiles[0] == 0
+    assert quantiles[-1] == 1
+
+    for i, q in enumerate(quantiles):
+        assert np.allclose(np.percentile(data, 100 * q), x[i])
+
+
 def test_mask_zeros():
     """Test the irradiance stats functionality with zeros masked out"""
     res_h5 = os.path.join(TESTDATADIR, 'nsrdb/ri_100_nsrdb_2012.h5')

From 3962d9d2e826bff712d10676bbd51e1c7c119cdb Mon Sep 17 00:00:00 2001
From: grantbuster <grant.buster@nrel.gov>
Date: Fri, 5 Jan 2024 15:22:24 -0700
Subject: [PATCH 10/19] handle nans for temporal means cdfs

---
 rex/bias_correction.py               |  4 ++--
 rex/temporal_stats/temporal_stats.py | 16 ++++++++++++----
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/rex/bias_correction.py b/rex/bias_correction.py
index 649df17f..5479f9e1 100644
--- a/rex/bias_correction.py
+++ b/rex/bias_correction.py
@@ -421,8 +421,8 @@ def qdm_irrad(ghi, dni, dhi,
     # This will prevent inverse CDF functions from returning zero resulting in
     # a divide by zero error in the calculation of the QDM delta. These zeros
     # get fixed later in _irrad_post_proc
-    ghi[ghi_zeros] = 1500
-    dni[dni_zeros] = 1500
+    ghi[ghi_zeros] = 1
+    dni[dni_zeros] = 1
 
     ghi = ghi_qdm(ghi)
     dni = dni_qdm(dni)
diff --git a/rex/temporal_stats/temporal_stats.py b/rex/temporal_stats/temporal_stats.py
index a411ae01..bed01bab 100644
--- a/rex/temporal_stats/temporal_stats.py
+++ b/rex/temporal_stats/temporal_stats.py
@@ -105,11 +105,19 @@ def cdf(data, n=50, decimals=None):
         evenly spaced in quantile space on the y-axis of the CDF.
     """
 
-    p = np.linspace(0, 1, n)
-    x = np.interp(p, np.linspace(0, 1, len(data)), sorted(data))
+    nan_mask = np.isnan(data)
+    if nan_mask.all():
+        return np.zeros(n)
 
-    assert x[0] == data.min()
-    assert x[-1] == data.max()
+    p = np.linspace(0, 1, n)
+    x = np.interp(p, np.linspace(0, 1, len(data[~nan_mask])),
+                  sorted(data[~nan_mask]))
+
+    msg = (f'First and last points defining the CDF ({x[0]}, {x[-1]}) '
+           f'were not the min and max data values '
+           f'({np.nanmin(data)}, {np.nanmin(data)}).')
+    assert x[0] == np.nanmin(data), msg
+    assert x[-1] == np.nanmax(data), msg
 
     if decimals is not None:
         x = np.round(x, decimals=decimals)

From 6d83f387673b52fd98e43bb325892bd486587cff Mon Sep 17 00:00:00 2001
From: grantbuster <grant.buster@nrel.gov>
Date: Fri, 5 Jan 2024 15:22:24 -0700
Subject: [PATCH 11/19] handle nans for temporal means cdfs

---
 rex/temporal_stats/temporal_stats.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rex/temporal_stats/temporal_stats.py b/rex/temporal_stats/temporal_stats.py
index bed01bab..7dd80f1b 100644
--- a/rex/temporal_stats/temporal_stats.py
+++ b/rex/temporal_stats/temporal_stats.py
@@ -83,7 +83,7 @@ def circular_mean(data, weights=None, degrees=True, axis=0,
     return mean
 
 
-def cdf(data, n=50, decimals=None):
+def cdf(data, n=10, decimals=None):
     """Get a number of x-values that define a CDF for the input data.
 
     Parameters

From cffe740440f06cfc298d95c439b91b9bdee72850 Mon Sep 17 00:00:00 2001
From: grantbuster <grant.buster@nrel.gov>
Date: Sat, 6 Jan 2024 12:20:44 -0700
Subject: [PATCH 12/19] added nonlinear sampling for CDF

---
 rex/temporal_stats/temporal_stats.py | 57 ++++++++++++++++++++--------
 1 file changed, 41 insertions(+), 16 deletions(-)

diff --git a/rex/temporal_stats/temporal_stats.py b/rex/temporal_stats/temporal_stats.py
index 7dd80f1b..c6510a36 100644
--- a/rex/temporal_stats/temporal_stats.py
+++ b/rex/temporal_stats/temporal_stats.py
@@ -83,46 +83,71 @@ def circular_mean(data, weights=None, degrees=True, axis=0,
     return mean
 
 
-def cdf(data, n=10, decimals=None):
+def cdf(data, n_samples=50, sampling='linear', log_base=10, decimals=None):
     """Get a number of x-values that define a CDF for the input data.
 
     Parameters
     ----------
     data : np.ndarray
         1D array of data to make a CDF for
-    n : int
+    n_samples : int
         Number of points to fit the CDF
+    sampling : str
+        Option for quantile sampling, e.g., how to sample the y-axis of the
+        distribution. "linear" will do even spacing, "log" will concentrate
+        samples near quantile=0, and "invlog" will concentrate samples near
+        quantile=1
+    log_base : int | float
+        Log base value if sampling is "log" or "invlog". A higher value will
+        concentrate more samples at the extreme sides of the distribution.
     decimals : int | None
         Precision to round output to (see docstring for np.round). None will
         not round outputs (default).
 
     Returns
     -------
-    x : np.ndarray
-        1D array of values with shape (n,). Each value is in the same units as
-        the input data argument. The x[0] is the minimum value of data (0th
-        percentile) and x[-1] is the maximum (100th percentile). The values are
-        evenly spaced in quantile space on the y-axis of the CDF.
+    x_values : np.ndarray
+        1D array of values with shape (n_samples,). Each value is in the same
+        units as the input data argument. The x_values[0] is the minimum value
+        of data (0th percentile) and x_values[-1] is the maximum
+        (100th percentile). The values are spaced in quantile space (y-axis of
+        the CDF) according to the sampling option (e.g., evenly spaced if
+        sampling='linear').
     """
 
     nan_mask = np.isnan(data)
     if nan_mask.all():
-        return np.zeros(n)
+        return np.zeros(n_samples)
+
+    if sampling == 'linear':
+        quantiles = np.linspace(0, 1, n_samples)
+    elif sampling == 'log':
+        quantiles = np.logspace(0, 1, n_samples, base=log_base)
+        quantiles = (quantiles - 1) / (log_base - 1)
+    elif sampling == 'invlog':
+        quantiles = np.logspace(0, 1, n_samples, base=log_base)
+        quantiles = (quantiles - 1) / (log_base - 1)
+        quantiles = np.array(sorted(1 - quantiles))
+    else:
+        msg = ('sampling option must be linear, log, or invlog, but received: '
+               '{}'.format(sampling))
+        logger.error(msg)
+        raise KeyError(msg)
 
-    p = np.linspace(0, 1, n)
-    x = np.interp(p, np.linspace(0, 1, len(data[~nan_mask])),
-                  sorted(data[~nan_mask]))
+    x_values = np.interp(quantiles, np.linspace(0, 1, len(data[~nan_mask])),
+                         sorted(data[~nan_mask]))
 
-    msg = (f'First and last points defining the CDF ({x[0]}, {x[-1]}) '
+    msg = (f'First and last x-value points defining the CDF '
+           '({x_values[0]}, {x_values[-1]}) '
            f'were not the min and max data values '
            f'({np.nanmin(data)}, {np.nanmin(data)}).')
-    assert x[0] == np.nanmin(data), msg
-    assert x[-1] == np.nanmax(data), msg
+    assert x_values[0] == np.nanmin(data), msg
+    assert x_values[-1] == np.nanmax(data), msg
 
     if decimals is not None:
-        x = np.round(x, decimals=decimals)
+        x_values = np.round(x_values, decimals=decimals)
 
-    return x
+    return x_values
 
 
 class TemporalStats:

From d97d05191dbf59215a9bad2e9d8129c982655fba Mon Sep 17 00:00:00 2001
From: grantbuster <grant.buster@nrel.gov>
Date: Sat, 6 Jan 2024 12:42:10 -0700
Subject: [PATCH 13/19] added option for nonlinear quantile sampling when doing
 empirical CDF bias correction

---
 rex/bias_correction.py | 93 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 81 insertions(+), 12 deletions(-)

diff --git a/rex/bias_correction.py b/rex/bias_correction.py
index 5479f9e1..59307591 100644
--- a/rex/bias_correction.py
+++ b/rex/bias_correction.py
@@ -169,7 +169,8 @@ class _QuantileDeltaMapping:
     Quantiles and Extremes? Journal of Climate 28, 6938–6959 (2015).
     """
 
-    def __init__(self, params_oh, params_mh, params_mf, dist, relative):
+    def __init__(self, params_oh, params_mh, params_mf, dist='empirical',
+                 relative=True, sampling='linear', log_base=10):
         """
         Parameters
         ----------
@@ -203,13 +204,28 @@ def __init__(self, params_oh, params_mh, params_mf, dist, relative):
             Cannon et al., 2015 for more details. Can also be a 1D array of
             dist inputs if being used from reV, but they must all be the same
             option.
+        sampling : str | np.ndarray
+            If dist="empirical", this is an option for how the quantiles were
+            sampled to produce the params inputs, e.g., how to sample the
+            y-axis of the distribution. "linear" will do even spacing, "log"
+            will concentrate samples near quantile=0, and "invlog" will
+            concentrate samples near quantile=1. Can also be a 1D array of dist
+            inputs if being used from reV, but they must all be the same
+            option.
+        log_base : int | float | np.ndarray
+            Log base value if sampling is "log" or "invlog". A higher value
+            will concentrate more samples at the extreme sides of the
+            distribution. Can also be a 1D array of dist inputs if being used
+            from reV, but they must all be the same option.
         """
 
         self.params_oh = params_oh
         self.params_mh = params_mh
         self.params_mf = params_mf if params_mf is not None else params_mh
-        self.relative = self._clean_kwarg(relative)
-        self.dist_name = self._clean_kwarg(dist)
+        self.relative = bool(self._clean_kwarg(relative))
+        self.dist_name = str(self._clean_kwarg(dist))
+        self.sampling = str(self._clean_kwarg(sampling))
+        self.log_base = float(self._clean_kwarg(log_base))
         self.scipy_dist = None
 
         if self.dist_name != 'empirical':
@@ -273,6 +289,30 @@ def _clean_params(self, params, arr_shape):
 
         return params
 
+    def _get_quantiles(self, n_samples):
+        """If dist='empirical', this will get the quantile values for the CDF
+        x-values specified in the input params"""
+
+        if self.sampling == 'linear':
+            quantiles = np.linspace(0, 1, n_samples)
+
+        elif self.sampling == 'log':
+            quantiles = np.logspace(0, 1, n_samples, base=self.log_base)
+            quantiles = (quantiles - 1) / (self.log_base - 1)
+
+        elif self.sampling == 'invlog':
+            quantiles = np.logspace(0, 1, n_samples, base=self.log_base)
+            quantiles = (quantiles - 1) / (self.log_base - 1)
+            quantiles = np.array(sorted(1 - quantiles))
+
+        else:
+            msg = ('sampling option must be linear, log, or invlog, but '
+                   'received: {}'.format(self.sampling))
+            logger.error(msg)
+            raise KeyError(msg)
+
+        return quantiles
+
     def cdf(self, x, params):
         """Run the CDF function e.g., convert physical variable to quantile"""
 
@@ -280,9 +320,8 @@ def cdf(self, x, params):
             p = np.zeros_like(x)
             for idx in range(x.shape[1]):
                 xp = params[idx, :]
-                fp = np.linspace(0, 1, len(xp))
+                fp = self._get_quantiles(len(xp))
                 p[:, idx] = np.interp(x[:, idx], xp, fp)
-
         else:
             p = self.scipy_dist.cdf(x, *params)
 
@@ -296,7 +335,7 @@ def ppf(self, p, params):
             x = np.zeros_like(p)
             for idx in range(p.shape[1]):
                 fp = params[idx, :]
-                xp = np.linspace(0, 1, len(fp))
+                xp = self._get_quantiles(len(fp))
                 x[:, idx] = np.interp(p[:, idx], xp, fp)
         else:
             x = self.scipy_dist.ppf(p, *params)
@@ -342,7 +381,8 @@ def qdm_irrad(ghi, dni, dhi,
               ghi_params_oh, dni_params_oh,
               ghi_params_mh, dni_params_mh,
               ghi_params_mf=None, dni_params_mf=None,
-              dist='empirical', relative=True):
+              dist='empirical', relative=True,
+              sampling='linear', log_base=10):
     """Correct irradiance using the quantile delta mapping based on the method
     from Cannon et al., 2015
 
@@ -400,6 +440,18 @@ def qdm_irrad(ghi, dni, dhi,
         Cannon et al., 2015 for more details. Can also be a 1D array of
         dist inputs if being used from reV, but they must all be the same
         option.
+    sampling : str | np.ndarray
+        If dist="empirical", this is an option for how the quantiles were
+        sampled to produce the params inputs, e.g., how to sample the
+        y-axis of the distribution. "linear" will do even spacing, "log"
+        will concentrate samples near quantile=0, and "invlog" will
+        concentrate samples near quantile=1. Can also be a 1D array of dist
+        inputs if being used from reV, but they must all be the same option.
+    log_base : int | float | np.ndarray
+        Log base value if sampling is "log" or "invlog". A higher value
+        will concentrate more samples at the extreme sides of the
+        distribution. Can also be a 1D array of dist inputs if being used from
+        reV, but they must all be the same option.
 
     Returns
     -------
@@ -414,9 +466,13 @@ def qdm_irrad(ghi, dni, dhi,
     ghi_zeros, dni_zeros, dhi_zeros, cos_sza = _irrad_pre_proc(ghi, dni, dhi)
 
     ghi_qdm = _QuantileDeltaMapping(ghi_params_oh, ghi_params_mh,
-                                    ghi_params_mf, dist, relative)
+                                    ghi_params_mf, dist=dist,
+                                    relative=relative, sampling=sampling,
+                                    log_base=log_base)
     dni_qdm = _QuantileDeltaMapping(dni_params_oh, dni_params_mh,
-                                    dni_params_mf, dist, relative)
+                                    dni_params_mf, dist=dist,
+                                    relative=relative, sampling=sampling,
+                                    log_base=log_base)
 
     # This will prevent inverse CDF functions from returning zero resulting in
     # a divide by zero error in the calculation of the QDM delta. These zeros
@@ -434,7 +490,7 @@ def qdm_irrad(ghi, dni, dhi,
 
 
 def qdm_ws(ws, params_oh, params_mh, params_mf=None, dist='empirical',
-           relative=True):
+           relative=True, sampling='linear', log_base=10):
     """Correct windspeed using quantile delta mapping based on the method from
     Cannon et al., 2015
 
@@ -476,6 +532,18 @@ def qdm_ws(ws, params_oh, params_mh, params_mf=None, dist='empirical',
         Cannon et al., 2015 for more details. Can also be a 1D array of
         dist inputs if being used from reV, but they must all be the same
         option.
+    sampling : str | np.ndarray
+        If dist="empirical", this is an option for how the quantiles were
+        sampled to produce the params inputs, e.g., how to sample the
+        y-axis of the distribution. "linear" will do even spacing, "log"
+        will concentrate samples near quantile=0, and "invlog" will
+        concentrate samples near quantile=1. Can also be a 1D array of dist
+        inputs if being used from reV, but they must all be the same option.
+    log_base : int | float | np.ndarray
+        Log base value if sampling is "log" or "invlog". A higher value
+        will concentrate more samples at the extreme sides of the
+        distribution. Can also be a 1D array of dist inputs if being used from
+        reV, but they must all be the same option.
 
     Returns
     -------
@@ -483,8 +551,9 @@ def qdm_ws(ws, params_oh, params_mh, params_mf=None, dist='empirical',
         2D array of windspeed values in shape (time, space)
     """
 
-    qdm = _QuantileDeltaMapping(params_oh, params_mh, params_mf, dist,
-                                relative)
+    qdm = _QuantileDeltaMapping(params_oh, params_mh, params_mf, dist=dist,
+                                relative=relative, sampling=sampling,
+                                log_base=log_base)
     ws = qdm(ws)
     ws = np.maximum(ws, 0)
     return ws

From db2d5ae9724cc033ad3c7685e68c02511cb645bc Mon Sep 17 00:00:00 2001
From: grantbuster <grant.buster@nrel.gov>
Date: Sun, 7 Jan 2024 14:39:28 -0700
Subject: [PATCH 14/19] more flexible shapes and protect against div0

---
 rex/bias_correction.py | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/rex/bias_correction.py b/rex/bias_correction.py
index 59307591..6afe610a 100644
--- a/rex/bias_correction.py
+++ b/rex/bias_correction.py
@@ -279,6 +279,10 @@ def _clean_params(self, params, arr_shape):
 
         msg = f'params must be 2D array but received {type(params)}'
         assert isinstance(params, np.ndarray), msg
+
+        if len(params.shape) == 1:
+            params = np.expand_dims(params, 0)
+
         msg = (f'params must be 2D array of shape ({arr_shape[1]}, N) '
                f'but received shape {params.shape}')
         assert len(params.shape) == 2, msg
@@ -356,18 +360,23 @@ def __call__(self, arr):
             Bias corrected copy of the input array with same shape.
         """
 
+        if len(arr.shape) == 1:
+            arr = np.expand_dims(arr, 1)
+
         params_oh = self._clean_params(self.params_oh, arr.shape)
         params_mh = self._clean_params(self.params_mh, arr.shape)
         params_mf = self._clean_params(self.params_mf, arr.shape)
 
-        p_mf = self.cdf(arr, params_mf)
-        x_oh = self.ppf(p_mf, params_oh)
+        q_mf = self.cdf(arr, params_mf)  # Tau_m_p
+        x_oh = self.ppf(q_mf, params_oh)  # x^_o:m_h:p
+        x_mh_mf = self.ppf(q_mf, params_mh)  # F-1_mh[Tau_m_p]
 
         if self.relative:
-            delta = arr / self.ppf(p_mf, params_mh)
+            x_mh_mf[x_mh_mf == 0] = 0.001  # arbitrary limit to prevent div 0
+            delta = arr / x_mh_mf
             arr_bc = x_oh * delta
         else:
-            delta = arr - self.ppf(p_mf, params_mh)
+            delta = arr - x_mh_mf
             arr_bc = x_oh + delta
 
         msg = ('Input shape {} does not match QDM bias corrected output '
@@ -554,6 +563,16 @@ def qdm_ws(ws, params_oh, params_mh, params_mf=None, dist='empirical',
     qdm = _QuantileDeltaMapping(params_oh, params_mh, params_mf, dist=dist,
                                 relative=relative, sampling=sampling,
                                 log_base=log_base)
+
+    # This will prevent inverse CDF functions from returning zero resulting in
+    # a divide by zero error in the calculation of the QDM delta. These zeros
+    # get fixed later
+    ws_zeros = ws == 0
+    ws[ws_zeros] = 0.01
+
     ws = qdm(ws)
+
     ws = np.maximum(ws, 0)
+    ws[ws_zeros] = 0
+
     return ws

From 215f6094282f01ce6463903f9b92dff025fdba69 Mon Sep 17 00:00:00 2001
From: grantbuster <grant.buster@nrel.gov>
Date: Sun, 7 Jan 2024 14:59:56 -0700
Subject: [PATCH 15/19] more tests on log cdf

---
 tests/test_sam_resource.py   |  4 ++--
 tests/test_temporal_stats.py | 31 ++++++++++++++++++++++++++++---
 2 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/tests/test_sam_resource.py b/tests/test_sam_resource.py
index 17667219..13e3e1b2 100644
--- a/tests/test_sam_resource.py
+++ b/tests/test_sam_resource.py
@@ -586,13 +586,13 @@ def test_bias_correct_wind_qdm():
     assert np.allclose(bc_ws, base_ws)
 
     res = WindResource.preload_SAM(h5, sites, hub_heights)
-    params = list(0.1*np.ones(10))
+    params = sorted(np.random.uniform(10, 20, 10))
     bc['params_oh'] = json.dumps(params)
     bc.loc[0, 'params_oh'] = json.dumps(base_params)
     res.bias_correct(bc)
     bc_ws = res._res_arrays['windspeed']
     assert np.allclose(bc_ws[:, 0], base_ws[:, 0])
-    assert ((bc_ws[:, 1:] > base_ws[:, 1:]).sum() / bc_ws[:, 1:].size) < 1e-4
+    assert ((bc_ws[:, 1:] != base_ws[:, 1:]).sum() / bc_ws[:, 1:].size) > 0.99
 
 
 def test_bias_correct_irrad_qdm():
diff --git a/tests/test_temporal_stats.py b/tests/test_temporal_stats.py
index a55e260e..1354b69e 100644
--- a/tests/test_temporal_stats.py
+++ b/tests/test_temporal_stats.py
@@ -281,13 +281,38 @@ def test_weighted_circular_means(weights):
     assert np.allclose(truth, test_stats[name].values, rtol=0, atol=0.01), msg
 
 
-def test_cdf(n=21):
+@pytest.mark.parametrize("n_samples", [21, 50])
+def test_cdf_linear(n_samples):
     """Test the CDF function which gets evenly spaced values (in quantile
     space) defining the empirical CDF of a dataset"""
     data = np.random.normal(0, 1, (1000,))
-    x = cdf(data, n=n, decimals=None)
+    x = cdf(data, n_samples=n_samples, decimals=None)
 
-    quantiles = np.linspace(0, 1, n)
+    assert x[0] == data.min()
+    assert x[-1] == data.max()
+
+    quantiles = np.linspace(0, 1, n_samples)
+    assert quantiles[0] == 0
+    assert quantiles[-1] == 1
+
+    for i, q in enumerate(quantiles):
+        assert np.allclose(np.percentile(data, 100 * q), x[i])
+
+
+@pytest.mark.parametrize(("n_samples", "log_base"), [(21, 10), (50, 2)])
+def test_cdf_invlog(n_samples, log_base):
+    """Test the CDF function which gets inverse-log-spaced spaced values
+    (in quantile space) defining the empirical CDF of a dataset"""
+    data = np.random.normal(0, 1, (10000,))
+    x = cdf(data, n_samples=n_samples, decimals=None, sampling='invlog',
+            log_base=log_base)
+
+    assert x[0] == data.min()
+    assert x[-1] == data.max()
+
+    quantiles = np.logspace(0, 1, n_samples, base=log_base)
+    quantiles = (quantiles - 1) / (log_base - 1)
+    quantiles = np.array(sorted(1 - quantiles))
     assert quantiles[0] == 0
     assert quantiles[-1] == 1
 

From c70378d5e606677ad39cef86cee5560f887bf45a Mon Sep 17 00:00:00 2001
From: grantbuster <grant.buster@nrel.gov>
Date: Mon, 8 Jan 2024 16:25:36 -0700
Subject: [PATCH 16/19] protect against bias correction table with no relevant
 gids

---
 rex/sam_resource.py        | 3 +++
 rex/utilities/bc_utils.py  | 8 ++++++--
 tests/test_sam_resource.py | 4 ++--
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/rex/sam_resource.py b/rex/sam_resource.py
index a1119c5f..b504b9c9 100644
--- a/rex/sam_resource.py
+++ b/rex/sam_resource.py
@@ -721,6 +721,9 @@ def bias_correct(self, bc_df):
 
         bc_fun, bc_fun_kwargs, bool_bc = _parse_bc_table(bc_df, self.sites)
 
+        if not bool_bc.any():
+            return
+
         if 'ghi' in self._res_arrays and 'dni' in self._res_arrays:
             logger.debug('Bias correcting irradiance with function {} '
                          'for sites {}'.format(bc_fun, self.sites))
diff --git a/rex/utilities/bc_utils.py b/rex/utilities/bc_utils.py
index 224f935a..45995c70 100644
--- a/rex/utilities/bc_utils.py
+++ b/rex/utilities/bc_utils.py
@@ -61,7 +61,11 @@ def _parse_bc_table(bc_df, gids):
 
     gid_arr = np.array(gids)
     bool_bc = np.isin(gid_arr, bc_df.index.values)
-    if not bool_bc.all():
+
+    if not bool_bc.any():
+        return None, {}, bool_bc
+
+    elif not bool_bc.all():
         missing = gid_arr[~bool_bc]
         msg = ('{} sites were missing from the bias correction table, '
                'not bias correcting: {}'.format(len(missing), missing))
@@ -88,7 +92,7 @@ def _parse_bc_table(bc_df, gids):
         # load serialized lists from string columns in bc_df into nested lists
         sample = bc_df[col].values[0]
         if isinstance(sample, str) and '[' in sample and ']' in sample:
-            bc_df[col] = bc_df[col].apply(json.loads)
+            bc_df.loc[:, col] = bc_df[col].apply(json.loads)
 
         arr = bc_df.loc[gid_arr[bool_bc], col].values
 
diff --git a/tests/test_sam_resource.py b/tests/test_sam_resource.py
index 13e3e1b2..3dd604c9 100644
--- a/tests/test_sam_resource.py
+++ b/tests/test_sam_resource.py
@@ -619,7 +619,7 @@ def test_bias_correct_irrad_qdm():
     assert np.allclose(base_res._res_arrays['dni'], base_dni)
 
     res = NSRDB.preload_SAM(h5, sites)
-    params = list(0.1*np.ones(10))
+    params = list(0.1 * np.ones(10))
     bc['ghi_params_oh'] = json.dumps(params)
     res.bias_correct(bc)
     bc_ghi = res._res_arrays['ghi']
@@ -630,7 +630,7 @@ def test_bias_correct_irrad_qdm():
     assert np.allclose(bc_dni, base_dni)
 
     res = NSRDB.preload_SAM(h5, sites)
-    params = list(0.1*np.ones(10))
+    params = list(0.1 * np.ones(10))
     bc['ghi_params_oh'] = json.dumps(base_params)
     bc['dni_params_oh'] = json.dumps(params)
     res.bias_correct(bc)

From bcd1f69cee6e947eaa80ec6cdb72da032d492a5f Mon Sep 17 00:00:00 2001
From: grantbuster <grant.buster@nrel.gov>
Date: Fri, 12 Jan 2024 12:12:27 -0700
Subject: [PATCH 17/19] pr comments, moved things around including new bc utils
 module that will store actual utilities and avoid recursive import from parse
 table

---
 rex/bias_correction.py               | 259 ++-----------------
 rex/sam_resource.py                  |   4 +-
 rex/temporal_stats/temporal_stats.py |  15 +-
 rex/utilities/bc_parse_table.py      | 106 ++++++++
 rex/utilities/bc_utils.py            | 357 ++++++++++++++++++++-------
 5 files changed, 405 insertions(+), 336 deletions(-)
 create mode 100644 rex/utilities/bc_parse_table.py

diff --git a/rex/bias_correction.py b/rex/bias_correction.py
index 6afe610a..cfb945c6 100644
--- a/rex/bias_correction.py
+++ b/rex/bias_correction.py
@@ -2,10 +2,10 @@
 """
 Module to perform bias correction of renewable energy resource data
 """
-import scipy
 import numpy as np
 import logging
 
+from rex.utilities.bc_utils import QuantileDeltaMapping
 
 logger = logging.getLogger(__name__)
 
@@ -157,235 +157,6 @@ def lin_ws(ws, scalar=1, adder=0):
     return ws
 
 
-class _QuantileDeltaMapping:
-    """Class for quantile delta mapping based on the method from
-    Cannon et al., 2015
-
-    Note that this is a utility class for implementing QDM and should not be
-    requested directly as a method in the reV/rex bias correction table input
-
-    Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM
-    Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in
-    Quantiles and Extremes? Journal of Climate 28, 6938–6959 (2015).
-    """
-
-    def __init__(self, params_oh, params_mh, params_mf, dist='empirical',
-                 relative=True, sampling='linear', log_base=10):
-        """
-        Parameters
-        ----------
-        params_oh : np.ndarray
-            2D array of **observed historical** distribution parameters created
-            from a multi-year set of data where the shape is (space, N). This
-            can be the output of a parametric distribution fit like
-            ``scipy.stats.weibull_min.fit()`` where N is the number of
-            parameters for that distribution, or this can define the x-values
-            of N points from an empirical CDF that will be linearly
-            interpolated between. If this is an empirical CDF, this must
-            include the 0th and 100th percentile values and have even
-            percentile spacing between values.
-        params_mh : np.ndarray
-            Same requirements as params_oh. This input arg is for the **modeled
-            historical distribution**.
-        params_mf : np.ndarray | None
-            Same requirements as params_oh. This input arg is for the **modeled
-            future distribution**. If this is None, this defaults to params_mh
-            (no future data, just corrected to modeled historical distribution)
-        dist : str | np.ndarray
-            Probability distribution name to use to model the data which
-            determines how the param args are used. This can "empirical" or any
-            continuous distribution name from ``scipy.stats``. Can also be a 1D
-            array of dist inputs if being used from reV, but they must all be
-            the same option.
-        relative : bool | np.ndarray
-            Flag to preserve relative rather than absolute changes in
-            quantiles. relative=False (default) will multiply by the change in
-            quantiles while relative=True will add. See Equations 4-6 from
-            Cannon et al., 2015 for more details. Can also be a 1D array of
-            dist inputs if being used from reV, but they must all be the same
-            option.
-        sampling : str | np.ndarray
-            If dist="empirical", this is an option for how the quantiles were
-            sampled to produce the params inputs, e.g., how to sample the
-            y-axis of the distribution. "linear" will do even spacing, "log"
-            will concentrate samples near quantile=0, and "invlog" will
-            concentrate samples near quantile=1. Can also be a 1D array of dist
-            inputs if being used from reV, but they must all be the same
-            option.
-        log_base : int | float | np.ndarray
-            Log base value if sampling is "log" or "invlog". A higher value
-            will concentrate more samples at the extreme sides of the
-            distribution. Can also be a 1D array of dist inputs if being used
-            from reV, but they must all be the same option.
-        """
-
-        self.params_oh = params_oh
-        self.params_mh = params_mh
-        self.params_mf = params_mf if params_mf is not None else params_mh
-        self.relative = bool(self._clean_kwarg(relative))
-        self.dist_name = str(self._clean_kwarg(dist))
-        self.sampling = str(self._clean_kwarg(sampling))
-        self.log_base = float(self._clean_kwarg(log_base))
-        self.scipy_dist = None
-
-        if self.dist_name != 'empirical':
-            self.scipy_dist = getattr(scipy.stats, self.dist_name, None)
-            if self.scipy_dist is None:
-                msg = ('Could not get requested distribution "{}" from '
-                       '``scipy.stats``. Please double check your spelling '
-                       'and select "empirical" or one of the continuous '
-                       'distribution options from here: '
-                       'https://docs.scipy.org/doc/scipy/reference/stats.html'
-                       .format(self.dist_name))
-                logger.error(msg)
-                raise KeyError(msg)
-
-    @staticmethod
-    def _clean_kwarg(inp):
-        """Clean any kwargs inputs (e.g., dist, relative) that might be
-        provided as an array and must be collapsed into a single string or
-        boolean value"""
-        unique = np.unique(inp)
-        msg = ('_QuantileDeltaMapping kwargs must have only one unique input '
-               'even if being called with arrays as part of reV but found: {}'
-               .format(unique))
-        assert len(unique) == 1, msg
-
-        while isinstance(inp, np.ndarray):
-            inp = inp[0]
-
-        return inp
-
-    def _clean_params(self, params, arr_shape):
-        """Verify and clean 2D parameter arrays for passing into empirical
-        distribution or scipy continuous distribution functions.
-
-        Parameters
-        ----------
-        params : np.ndarray
-            Input params shape should be (space, N) where N is the number of
-            parameters for the distribution.
-        arr_shape : tuple
-            Array shape should be (time, space).
-
-        Returns
-        -------
-        params : np.ndarray | list
-            If a scipy continuous dist is set, this output will be params
-            unpacked along axis=1 into a list so that the list entries
-            represent the scipy distribution parameters
-            (e.g., shape, scale, loc) and each list entry is of shape (space,)
-        """
-
-        msg = f'params must be 2D array but received {type(params)}'
-        assert isinstance(params, np.ndarray), msg
-
-        if len(params.shape) == 1:
-            params = np.expand_dims(params, 0)
-
-        msg = (f'params must be 2D array of shape ({arr_shape[1]}, N) '
-               f'but received shape {params.shape}')
-        assert len(params.shape) == 2, msg
-        assert params.shape[0] == arr_shape[1], msg
-
-        if self.scipy_dist is not None:
-            params = [params[:, i] for i in range(params.shape[1])]
-
-        return params
-
-    def _get_quantiles(self, n_samples):
-        """If dist='empirical', this will get the quantile values for the CDF
-        x-values specified in the input params"""
-
-        if self.sampling == 'linear':
-            quantiles = np.linspace(0, 1, n_samples)
-
-        elif self.sampling == 'log':
-            quantiles = np.logspace(0, 1, n_samples, base=self.log_base)
-            quantiles = (quantiles - 1) / (self.log_base - 1)
-
-        elif self.sampling == 'invlog':
-            quantiles = np.logspace(0, 1, n_samples, base=self.log_base)
-            quantiles = (quantiles - 1) / (self.log_base - 1)
-            quantiles = np.array(sorted(1 - quantiles))
-
-        else:
-            msg = ('sampling option must be linear, log, or invlog, but '
-                   'received: {}'.format(self.sampling))
-            logger.error(msg)
-            raise KeyError(msg)
-
-        return quantiles
-
-    def cdf(self, x, params):
-        """Run the CDF function e.g., convert physical variable to quantile"""
-
-        if self.scipy_dist is None:
-            p = np.zeros_like(x)
-            for idx in range(x.shape[1]):
-                xp = params[idx, :]
-                fp = self._get_quantiles(len(xp))
-                p[:, idx] = np.interp(x[:, idx], xp, fp)
-        else:
-            p = self.scipy_dist.cdf(x, *params)
-
-        return p
-
-    def ppf(self, p, params):
-        """Run the inverse CDF function (percent point function) e.g., convert
-        quantile to physical variable"""
-
-        if self.scipy_dist is None:
-            x = np.zeros_like(p)
-            for idx in range(p.shape[1]):
-                fp = params[idx, :]
-                xp = self._get_quantiles(len(fp))
-                x[:, idx] = np.interp(p[:, idx], xp, fp)
-        else:
-            x = self.scipy_dist.ppf(p, *params)
-
-        return x
-
-    def __call__(self, arr):
-        """Run the QDM function to bias correct an array
-
-        Parameters
-        ----------
-        arr : np.ndarray
-            2D array of values in shape (time, space)
-
-        Returns
-        -------
-        arr : np.ndarray
-            Bias corrected copy of the input array with same shape.
-        """
-
-        if len(arr.shape) == 1:
-            arr = np.expand_dims(arr, 1)
-
-        params_oh = self._clean_params(self.params_oh, arr.shape)
-        params_mh = self._clean_params(self.params_mh, arr.shape)
-        params_mf = self._clean_params(self.params_mf, arr.shape)
-
-        q_mf = self.cdf(arr, params_mf)  # Tau_m_p
-        x_oh = self.ppf(q_mf, params_oh)  # x^_o:m_h:p
-        x_mh_mf = self.ppf(q_mf, params_mh)  # F-1_mh[Tau_m_p]
-
-        if self.relative:
-            x_mh_mf[x_mh_mf == 0] = 0.001  # arbitrary limit to prevent div 0
-            delta = arr / x_mh_mf
-            arr_bc = x_oh * delta
-        else:
-            delta = arr - x_mh_mf
-            arr_bc = x_oh + delta
-
-        msg = ('Input shape {} does not match QDM bias corrected output '
-               'shape {}!'.format(arr.shape, arr_bc.shape))
-        assert arr.shape == arr_bc.shape, msg
-
-        return arr_bc
-
-
 def qdm_irrad(ghi, dni, dhi,
               ghi_params_oh, dni_params_oh,
               ghi_params_mh, dni_params_mh,
@@ -452,7 +223,8 @@ def qdm_irrad(ghi, dni, dhi,
     sampling : str | np.ndarray
         If dist="empirical", this is an option for how the quantiles were
         sampled to produce the params inputs, e.g., how to sample the
-        y-axis of the distribution. "linear" will do even spacing, "log"
+        y-axis of the distribution (see sampling functions in
+        ``rex.utilities.bc_utils``). "linear" will do even spacing, "log"
         will concentrate samples near quantile=0, and "invlog" will
         concentrate samples near quantile=1. Can also be a 1D array of dist
         inputs if being used from reV, but they must all be the same option.
@@ -474,14 +246,14 @@ def qdm_irrad(ghi, dni, dhi,
 
     ghi_zeros, dni_zeros, dhi_zeros, cos_sza = _irrad_pre_proc(ghi, dni, dhi)
 
-    ghi_qdm = _QuantileDeltaMapping(ghi_params_oh, ghi_params_mh,
-                                    ghi_params_mf, dist=dist,
-                                    relative=relative, sampling=sampling,
-                                    log_base=log_base)
-    dni_qdm = _QuantileDeltaMapping(dni_params_oh, dni_params_mh,
-                                    dni_params_mf, dist=dist,
-                                    relative=relative, sampling=sampling,
-                                    log_base=log_base)
+    ghi_qdm = QuantileDeltaMapping(ghi_params_oh, ghi_params_mh,
+                                   ghi_params_mf, dist=dist,
+                                   relative=relative, sampling=sampling,
+                                   log_base=log_base)
+    dni_qdm = QuantileDeltaMapping(dni_params_oh, dni_params_mh,
+                                   dni_params_mf, dist=dist,
+                                   relative=relative, sampling=sampling,
+                                   log_base=log_base)
 
     # This will prevent inverse CDF functions from returning zero resulting in
     # a divide by zero error in the calculation of the QDM delta. These zeros
@@ -544,7 +316,8 @@ def qdm_ws(ws, params_oh, params_mh, params_mf=None, dist='empirical',
     sampling : str | np.ndarray
         If dist="empirical", this is an option for how the quantiles were
         sampled to produce the params inputs, e.g., how to sample the
-        y-axis of the distribution. "linear" will do even spacing, "log"
+        y-axis of the distribution (see sampling functions in
+        ``rex.utilities.bc_utils``). "linear" will do even spacing, "log"
         will concentrate samples near quantile=0, and "invlog" will
         concentrate samples near quantile=1. Can also be a 1D array of dist
         inputs if being used from reV, but they must all be the same option.
@@ -560,9 +333,9 @@ def qdm_ws(ws, params_oh, params_mh, params_mf=None, dist='empirical',
         2D array of windspeed values in shape (time, space)
     """
 
-    qdm = _QuantileDeltaMapping(params_oh, params_mh, params_mf, dist=dist,
-                                relative=relative, sampling=sampling,
-                                log_base=log_base)
+    qdm = QuantileDeltaMapping(params_oh, params_mh, params_mf, dist=dist,
+                               relative=relative, sampling=sampling,
+                               log_base=log_base)
 
     # This will prevent inverse CDF functions from returning zero resulting in
     # a divide by zero error in the calculation of the QDM delta. These zeros
diff --git a/rex/sam_resource.py b/rex/sam_resource.py
index b504b9c9..204a70c5 100644
--- a/rex/sam_resource.py
+++ b/rex/sam_resource.py
@@ -9,7 +9,7 @@
 from warnings import warn
 import logging
 
-from rex.utilities.bc_utils import _parse_bc_table
+from rex.utilities.bc_parse_table import parse_bc_table
 from rex.utilities.exceptions import (ResourceKeyError, ResourceRuntimeError,
                                       ResourceValueError, SAMInputWarning)
 from rex.utilities.parse_keys import parse_keys
@@ -719,7 +719,7 @@ def bias_correct(self, bc_df):
             bias correction methods.
         """
 
-        bc_fun, bc_fun_kwargs, bool_bc = _parse_bc_table(bc_df, self.sites)
+        bc_fun, bc_fun_kwargs, bool_bc = parse_bc_table(bc_df, self.sites)
 
         if not bool_bc.any():
             return
diff --git a/rex/temporal_stats/temporal_stats.py b/rex/temporal_stats/temporal_stats.py
index c6510a36..59d520db 100644
--- a/rex/temporal_stats/temporal_stats.py
+++ b/rex/temporal_stats/temporal_stats.py
@@ -10,6 +10,8 @@
 import pandas as pd
 
 from rex.resource import Resource
+from rex.utilities.bc_utils import (sample_q_linear, sample_q_log,
+                                    sample_q_invlog)
 from rex.utilities.execution import SpawnProcessPool
 from rex.utilities.loggers import log_mem, log_versions, create_dirs
 from rex.utilities.utilities import get_lat_lon_cols, slice_sites
@@ -93,7 +95,8 @@ def cdf(data, n_samples=50, sampling='linear', log_base=10, decimals=None):
     n_samples : int
         Number of points to fit the CDF
     sampling : str
-        Option for quantile sampling, e.g., how to sample the y-axis of the
+        Option for quantile sampling (see sampling functions in
+        ``rex.utilities.bc_utils``), e.g., how to sample the y-axis of the
         distribution. "linear" will do even spacing, "log" will concentrate
         samples near quantile=0, and "invlog" will concentrate samples near
         quantile=1
@@ -119,15 +122,13 @@ def cdf(data, n_samples=50, sampling='linear', log_base=10, decimals=None):
     if nan_mask.all():
         return np.zeros(n_samples)
 
+    sampling = sampling.casefold()
     if sampling == 'linear':
-        quantiles = np.linspace(0, 1, n_samples)
+        quantiles = sample_q_linear(n_samples)
     elif sampling == 'log':
-        quantiles = np.logspace(0, 1, n_samples, base=log_base)
-        quantiles = (quantiles - 1) / (log_base - 1)
+        quantiles = sample_q_log(n_samples, log_base)
     elif sampling == 'invlog':
-        quantiles = np.logspace(0, 1, n_samples, base=log_base)
-        quantiles = (quantiles - 1) / (log_base - 1)
-        quantiles = np.array(sorted(1 - quantiles))
+        quantiles = sample_q_invlog(n_samples, log_base)
     else:
         msg = ('sampling option must be linear, log, or invlog, but received: '
                '{}'.format(sampling))
diff --git a/rex/utilities/bc_parse_table.py b/rex/utilities/bc_parse_table.py
new file mode 100644
index 00000000..0f65f3df
--- /dev/null
+++ b/rex/utilities/bc_parse_table.py
@@ -0,0 +1,106 @@
+# -*- coding: utf-8 -*-
+"""
+rex bias correction utilities.
+"""
+import json
+import numpy as np
+import logging
+from warnings import warn
+import rex.bias_correction
+
+
+logger = logging.getLogger(__name__)
+
+
+def parse_bc_table(bc_df, gids):
+    """Parse the bias correction table for required bc functions and kwargs
+
+    Parameters
+    ----------
+    bc_df : pd.DataFrame
+        DataFrame with wind or solar resource bias correction table. This
+        must have columns "gid" and "method", where "gid" is the resource
+        file indices, and "method" is a function name from the
+        ``rex.bias_correction`` module. Only windspeed or GHI+DNI+DHI are
+        corrected, depending on the technology. See the
+        ``rex.bias_correction`` module for more details on available
+        bias correction methods.
+    gids : list | np.ndarray
+        Array of integer gids (spatial indices) from the source h5 file.
+        This is used to get the correct bias correction parameters from
+        ``bias_correct`` table based on its ``gid`` column
+
+    Returns
+    -------
+    bc_fun : function
+        Function from ``rex.bias_correction`` to use.
+    bc_fun_kwargs : dict
+        Kwargs from ``bc_df`` to input to ``bc_fun``. This may include extra
+        kwargs that are not required by ``bc_fun`` and should be cleaned before
+        passing to the function.
+    bool_bc : np.ndarray
+        1D Boolean array with length equal to the ``gids`` input with ``True``
+        where data has available bias correction inputs in ``bc_df`` and
+        ``False`` where not
+    """
+
+    if 'method' not in bc_df:
+        msg = ('Bias correction table provided, but "method" column not '
+               'found! Only see columns: {}. Need to specify "method" which '
+               'is a function name from `rex.bias_correction`'
+               .format(list(bc_df.columns)))
+        logger.error(msg)
+        raise KeyError(msg)
+
+    if bc_df.index.name != 'gid':
+        if 'gid' not in bc_df:
+            msg = ('Bias correction table must have "gid" column but only '
+                   'found: {}'.format(list(bc_df.columns)))
+            logger.error(msg)
+            raise KeyError(msg)
+        bc_df = bc_df.set_index('gid')
+
+    gid_arr = np.array(gids)
+    bool_bc = np.isin(gid_arr, bc_df.index.values)
+
+    if not bool_bc.any():
+        return None, {}, bool_bc
+
+    if not bool_bc.all():
+        missing = gid_arr[~bool_bc]
+        msg = ('{} sites were missing from the bias correction table, '
+               'not bias correcting: {}'.format(len(missing), missing))
+        logger.warning(msg)
+        warn(msg)
+
+    fun_name = bc_df['method'].unique()
+    msg = ('rex bias correction currently only supports a single unique '
+           'bias correction method per chunk of sites but received: {}'
+           .format(fun_name))
+    assert len(fun_name) == 1, msg
+    bc_fun = getattr(rex.bias_correction, fun_name[0], None)
+    if bc_fun is None:
+        avail = [x for x in dir(rex.bias_correction) if not x.startswith('_')]
+        msg = ('Could not find method name "{}" in ``rex.bias_correction`` '
+               'which has the available objects: {}'
+               .format(fun_name[0], avail))
+        logger.error(msg)
+        raise KeyError(msg)
+
+    bc_fun_kwargs = {}
+    for col in bc_df.columns:
+
+        # load serialized lists from string columns in bc_df into nested lists
+        sample = bc_df[col].values[0]
+        if isinstance(sample, str) and '[' in sample and ']' in sample:
+            bc_df.loc[:, col] = bc_df[col].apply(json.loads)
+
+        arr = bc_df.loc[gid_arr[bool_bc], col].values
+
+        # nested lists in bc_df converted to arr of shape (space, N)
+        if isinstance(arr[0], (list, tuple)):
+            arr = np.array(arr.tolist())
+
+        bc_fun_kwargs[col] = arr
+
+    return bc_fun, bc_fun_kwargs, bool_bc
diff --git a/rex/utilities/bc_utils.py b/rex/utilities/bc_utils.py
index 45995c70..d74183d3 100644
--- a/rex/utilities/bc_utils.py
+++ b/rex/utilities/bc_utils.py
@@ -2,104 +2,293 @@
 """
 rex bias correction utilities.
 """
-import json
+import scipy
 import numpy as np
 import logging
-from warnings import warn
-import rex.bias_correction
 
 
 logger = logging.getLogger(__name__)
 
 
-def _parse_bc_table(bc_df, gids):
-    """Parse the bias correction table for required bc functions and kwargs
+def sample_q_linear(n_samples):
+    """Sample quantiles from 0 to 1 inclusive linearly with even spacing
 
     Parameters
     ----------
-    bc_df : pd.DataFrame
-        DataFrame with wind or solar resource bias correction table. This
-        must have columns "gid" and "method", where "gid" is the resource
-        file indices, and "method" is a function name from the
-        ``rex.bias_correction`` module. Only windspeed or GHI+DNI+DHI are
-        corrected, depending on the technology. See the
-        ``rex.bias_correction`` module for more details on available
-        bias correction methods.
-    gids : list | np.ndarray
-        Array of integer gids (spatial indices) from the source h5 file.
-        This is used to get the correct bias correction parameters from
-        ``bias_correct`` table based on its ``gid`` column
+    n_samples : int
+        Number of points to sample between 0 and 1
 
     Returns
     -------
-    bc_fun : function
-        Function from ``rex.bias_correction`` to use.
-    bc_fun_kwargs : dict
-        Kwargs from ``bc_df`` to input to ``bc_fun``. This may include extra
-        kwargs that are not required by ``bc_fun`` and should be cleaned before
-        passing to the function.
-    bool_bc : np.ndarray
-        1D Boolean array with length equal to the ``gids`` input with ``True``
-        where data has available bias correction inputs in ``bc_df`` and
-        ``False`` where not
+    quantiles : np.ndarray
+        1D array of evenly spaced samples from 0 to 1
     """
+    quantiles = np.linspace(0, 1, n_samples)
+    return quantiles
 
-    if 'method' not in bc_df:
-        msg = ('Bias correction table provided, but "method" column not '
-               'found! Only see columns: {}. Need to specify "method" which '
-               'is a function name from `rex.bias_correction`'
-               .format(list(bc_df.columns)))
-        raise KeyError(msg)
-
-    if bc_df.index.name != 'gid':
-        if 'gid' not in bc_df:
-            msg = ('Bias correction table must have "gid" column but only '
-                   'found: {}'.format(list(bc_df.columns)))
+
+def sample_q_log(n_samples, log_base):
+    """Sample quantiles from 0 to 1 while concentrating samples near quantile=0
+
+    Parameters
+    ----------
+    n_samples : int
+        Number of points to sample between 0 and 1
+    log_base : int | float
+        Log base value. A higher value will concentrate more samples at the
+        extreme sides of the distribution.
+
+    Returns
+    -------
+    quantiles : np.ndarray
+        1D array of log-spaced samples from 0 to 1
+    """
+    quantiles = np.logspace(0, 1, n_samples, base=log_base)
+    quantiles = (quantiles - 1) / (log_base - 1)
+    return quantiles
+
+
+def sample_q_invlog(n_samples, log_base):
+    """Sample quantiles from 0 to 1 while concentrating samples near quantile=1
+
+    Parameters
+    ----------
+    n_samples : int
+        Number of points to sample between 0 and 1
+    log_base : int | float
+        Log base value. A higher value will concentrate more samples at the
+        extreme sides of the distribution.
+
+    Returns
+    -------
+    quantiles : np.ndarray
+        1D array of log-spaced samples from 0 to 1
+    """
+    quantiles = np.logspace(0, 1, n_samples, base=log_base)
+    quantiles = (quantiles - 1) / (log_base - 1)
+    quantiles = np.array(sorted(1 - quantiles))
+    return quantiles
+
+
+class QuantileDeltaMapping:
+    """Class for quantile delta mapping based on the method from
+    Cannon et al., 2015
+
+    Note that this is a utility class for implementing QDM and should not be
+    requested directly as a method in the reV/rex bias correction table input
+
+    Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM
+    Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in
+    Quantiles and Extremes? Journal of Climate 28, 6938–6959 (2015).
+    """
+
+    def __init__(self, params_oh, params_mh, params_mf, dist='empirical',
+                 relative=True, sampling='linear', log_base=10):
+        """
+        Parameters
+        ----------
+        params_oh : np.ndarray
+            2D array of **observed historical** distribution parameters created
+            from a multi-year set of data where the shape is (space, N). This
+            can be the output of a parametric distribution fit like
+            ``scipy.stats.weibull_min.fit()`` where N is the number of
+            parameters for that distribution, or this can define the x-values
+            of N points from an empirical CDF that will be linearly
+            interpolated between. If this is an empirical CDF, this must
+            include the 0th and 100th percentile values and have even
+            percentile spacing between values.
+        params_mh : np.ndarray
+            Same requirements as params_oh. This input arg is for the **modeled
+            historical distribution**.
+        params_mf : np.ndarray | None
+            Same requirements as params_oh. This input arg is for the **modeled
+            future distribution**. If this is None, this defaults to params_mh
+            (no future data, just corrected to modeled historical distribution)
+        dist : str | np.ndarray
+            Probability distribution name to use to model the data which
+            determines how the param args are used. This can "empirical" or any
+            continuous distribution name from ``scipy.stats``. Can also be a 1D
+            array of dist inputs if being used from reV, but they must all be
+            the same option.
+        relative : bool | np.ndarray
+            Flag to preserve relative rather than absolute changes in
+            quantiles. relative=False (default) will multiply by the change in
+            quantiles while relative=True will add. See Equations 4-6 from
+            Cannon et al., 2015 for more details. Can also be a 1D array of
+            dist inputs if being used from reV, but they must all be the same
+            option.
+        sampling : str | np.ndarray
+            If dist="empirical", this is an option for how the quantiles were
+            sampled to produce the params inputs, e.g., how to sample the
+            y-axis of the distribution (see sampling functions in
+            ``rex.utilities.bc_utils``). "linear" will do even spacing, "log"
+            will concentrate samples near quantile=0, and "invlog" will
+            concentrate samples near quantile=1. Can also be a 1D array of dist
+            inputs if being used from reV, but they must all be the same
+            option.
+        log_base : int | float | np.ndarray
+            Log base value if sampling is "log" or "invlog". A higher value
+            will concentrate more samples at the extreme sides of the
+            distribution. Can also be a 1D array of dist inputs if being used
+            from reV, but they must all be the same option.
+        """
+
+        self.params_oh = params_oh
+        self.params_mh = params_mh
+        self.params_mf = params_mf if params_mf is not None else params_mh
+        self.relative = bool(self._clean_kwarg(relative))
+        self.dist_name = str(self._clean_kwarg(dist)).casefold()
+        self.sampling = str(self._clean_kwarg(sampling)).casefold()
+        self.log_base = float(self._clean_kwarg(log_base))
+        self.scipy_dist = None
+
+        if self.dist_name != 'empirical':
+            self.scipy_dist = getattr(scipy.stats, self.dist_name, None)
+            if self.scipy_dist is None:
+                msg = ('Could not get requested distribution "{}" from '
+                       '``scipy.stats``. Please double check your spelling '
+                       'and select "empirical" or one of the continuous '
+                       'distribution options from here: '
+                       'https://docs.scipy.org/doc/scipy/reference/stats.html'
+                       .format(self.dist_name))
+                logger.error(msg)
+                raise KeyError(msg)
+
+    @staticmethod
+    def _clean_kwarg(inp):
+        """Clean any kwargs inputs (e.g., dist, relative) that might be
+        provided as an array and must be collapsed into a single string or
+        boolean value"""
+        unique = np.unique(inp)
+        msg = ('_QuantileDeltaMapping kwargs must have only one unique input '
+               'even if being called with arrays as part of reV but found: {}'
+               .format(unique))
+        assert len(unique) == 1, msg
+
+        while isinstance(inp, np.ndarray):
+            inp = inp[0]
+
+        return inp
+
+    def _clean_params(self, params, arr_shape):
+        """Verify and clean 2D parameter arrays for passing into empirical
+        distribution or scipy continuous distribution functions.
+
+        Parameters
+        ----------
+        params : np.ndarray
+            Input params shape should be (space, N) where N is the number of
+            parameters for the distribution.
+        arr_shape : tuple
+            Array shape should be (time, space).
+
+        Returns
+        -------
+        params : np.ndarray | list
+            If a scipy continuous dist is set, this output will be params
+            unpacked along axis=1 into a list so that the list entries
+            represent the scipy distribution parameters
+            (e.g., shape, scale, loc) and each list entry is of shape (space,)
+        """
+
+        msg = f'params must be 2D array but received {type(params)}'
+        assert isinstance(params, np.ndarray), msg
+
+        if len(params.shape) == 1:
+            params = np.expand_dims(params, 0)
+
+        msg = (f'params must be 2D array of shape ({arr_shape[1]}, N) '
+               f'but received shape {params.shape}')
+        assert len(params.shape) == 2, msg
+        assert params.shape[0] == arr_shape[1], msg
+
+        if self.scipy_dist is not None:
+            params = [params[:, i] for i in range(params.shape[1])]
+
+        return params
+
+    def _get_quantiles(self, n_samples):
+        """If dist='empirical', this will get the quantile values for the CDF
+        x-values specified in the input params"""
+
+        if self.sampling == 'linear':
+            quantiles = sample_q_linear(n_samples)
+        elif self.sampling == 'log':
+            quantiles = sample_q_log(n_samples, self.log_base)
+        elif self.sampling == 'invlog':
+            quantiles = sample_q_invlog(n_samples, self.log_base)
+        else:
+            msg = ('sampling option must be linear, log, or invlog, but '
+                   'received: {}'.format(self.sampling))
             logger.error(msg)
             raise KeyError(msg)
-        bc_df = bc_df.set_index('gid')
-
-    gid_arr = np.array(gids)
-    bool_bc = np.isin(gid_arr, bc_df.index.values)
-
-    if not bool_bc.any():
-        return None, {}, bool_bc
-
-    elif not bool_bc.all():
-        missing = gid_arr[~bool_bc]
-        msg = ('{} sites were missing from the bias correction table, '
-               'not bias correcting: {}'.format(len(missing), missing))
-        logger.warning(msg)
-        warn(msg)
-
-    fun_name = bc_df['method'].unique()
-    msg = ('rex bias correction currently only supports a single unique '
-           'bias correction method per chunk of sites but received: {}'
-           .format(fun_name))
-    assert len(fun_name) == 1, msg
-    bc_fun = getattr(rex.bias_correction, fun_name[0], None)
-    if bc_fun is None:
-        avail = [x for x in dir(rex.bias_correction) if not x.startswith('_')]
-        msg = ('Could not find method name "{}" in ``rex.bias_correction`` '
-               'which has the available objects: {}'
-               .format(fun_name[0], avail))
-        logger.error(msg)
-        raise KeyError(msg)
-
-    bc_fun_kwargs = {}
-    for col in bc_df.columns:
-
-        # load serialized lists from string columns in bc_df into nested lists
-        sample = bc_df[col].values[0]
-        if isinstance(sample, str) and '[' in sample and ']' in sample:
-            bc_df.loc[:, col] = bc_df[col].apply(json.loads)
-
-        arr = bc_df.loc[gid_arr[bool_bc], col].values
-
-        # nested lists in bc_df converted to arr of shape (space, N)
-        if isinstance(arr[0], (list, tuple)):
-            arr = np.array(arr.tolist())
-
-        bc_fun_kwargs[col] = arr
-
-    return bc_fun, bc_fun_kwargs, bool_bc
+
+        return quantiles
+
+    def cdf(self, x, params):
+        """Run the CDF function e.g., convert physical variable to quantile"""
+
+        if self.scipy_dist is None:
+            p = np.zeros_like(x)
+            for idx in range(x.shape[1]):
+                xp = params[idx, :]
+                fp = self._get_quantiles(len(xp))
+                p[:, idx] = np.interp(x[:, idx], xp, fp)
+        else:
+            p = self.scipy_dist.cdf(x, *params)
+
+        return p
+
+    def ppf(self, p, params):
+        """Run the inverse CDF function (percent point function) e.g., convert
+        quantile to physical variable"""
+
+        if self.scipy_dist is None:
+            x = np.zeros_like(p)
+            for idx in range(p.shape[1]):
+                fp = params[idx, :]
+                xp = self._get_quantiles(len(fp))
+                x[:, idx] = np.interp(p[:, idx], xp, fp)
+        else:
+            x = self.scipy_dist.ppf(p, *params)
+
+        return x
+
+    def __call__(self, arr):
+        """Run the QDM function to bias correct an array
+
+        Parameters
+        ----------
+        arr : np.ndarray
+            2D array of values in shape (time, space)
+
+        Returns
+        -------
+        arr : np.ndarray
+            Bias corrected copy of the input array with same shape.
+        """
+
+        if len(arr.shape) == 1:
+            arr = np.expand_dims(arr, 1)
+
+        params_oh = self._clean_params(self.params_oh, arr.shape)
+        params_mh = self._clean_params(self.params_mh, arr.shape)
+        params_mf = self._clean_params(self.params_mf, arr.shape)
+
+        q_mf = self.cdf(arr, params_mf)  # Tau_m_p
+        x_oh = self.ppf(q_mf, params_oh)  # x^_o:m_h:p
+        x_mh_mf = self.ppf(q_mf, params_mh)  # F-1_mh[Tau_m_p]
+
+        if self.relative:
+            x_mh_mf[x_mh_mf == 0] = 0.001  # arbitrary limit to prevent div 0
+            delta = arr / x_mh_mf
+            arr_bc = x_oh * delta
+        else:
+            delta = arr - x_mh_mf
+            arr_bc = x_oh + delta
+
+        msg = ('Input shape {} does not match QDM bias corrected output '
+               'shape {}!'.format(arr.shape, arr_bc.shape))
+        assert arr.shape == arr_bc.shape, msg
+
+        return arr_bc

From f58c64662771f2aedce98942d6adca9b427893f9 Mon Sep 17 00:00:00 2001
From: grantbuster <grant.buster@nrel.gov>
Date: Fri, 12 Jan 2024 12:13:01 -0700
Subject: [PATCH 18/19] increment rex version to support reV bias correction
 dependency

---
 rex/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rex/version.py b/rex/version.py
index 091abf93..dd92f834 100644
--- a/rex/version.py
+++ b/rex/version.py
@@ -1,3 +1,3 @@
 """rex Version number"""
 
-__version__ = "0.2.84"
+__version__ = "0.2.85"

From 634ec22543e130e9d019c2154521349c8104aa72 Mon Sep 17 00:00:00 2001
From: grantbuster <grant.buster@nrel.gov>
Date: Tue, 16 Jan 2024 10:45:16 -0700
Subject: [PATCH 19/19] added more explicit QDM equation references to Cannon

---
 rex/utilities/bc_utils.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/rex/utilities/bc_utils.py b/rex/utilities/bc_utils.py
index d74183d3..6cb415ea 100644
--- a/rex/utilities/bc_utils.py
+++ b/rex/utilities/bc_utils.py
@@ -275,17 +275,23 @@ def __call__(self, arr):
         params_mh = self._clean_params(self.params_mh, arr.shape)
         params_mf = self._clean_params(self.params_mf, arr.shape)
 
-        q_mf = self.cdf(arr, params_mf)  # Tau_m_p
-        x_oh = self.ppf(q_mf, params_oh)  # x^_o:m_h:p
-        x_mh_mf = self.ppf(q_mf, params_mh)  # F-1_mh[Tau_m_p]
+        # Equation references are from Section 3 of Cannon et al 2015:
+        # Cannon, A. J., Sobie, S. R. & Murdock, T. Q. Bias Correction of GCM
+        # Precipitation by Quantile Mapping: How Well Do Methods Preserve
+        # Changes in Quantiles and Extremes? Journal of Climate 28, 6938–6959
+        # (2015).
+
+        q_mf = self.cdf(arr, params_mf)  # Eq.3: Tau_m_p = F_m_p(x_m_p)
+        x_oh = self.ppf(q_mf, params_oh)  # Eq.5: x^_o:m_h:p = F-1_o_h(Tau_m_p)
+        x_mh_mf = self.ppf(q_mf, params_mh)  # Eq.4 denom: F-1_m_h(Tau_m_p)
 
         if self.relative:
             x_mh_mf[x_mh_mf == 0] = 0.001  # arbitrary limit to prevent div 0
-            delta = arr / x_mh_mf
-            arr_bc = x_oh * delta
+            delta = arr / x_mh_mf  # Eq.4: x_m_p / F-1_m_h(Tau_m_p)
+            arr_bc = x_oh * delta  # Eq.6: x^_m_p = x^_o:m_h:p * delta
         else:
-            delta = arr - x_mh_mf
-            arr_bc = x_oh + delta
+            delta = arr - x_mh_mf  # Eq.4: x_m_p - F-1_m_h(Tau_m_p)
+            arr_bc = x_oh + delta  # Eq.6: x^_m_p = x^_o:m_h:p + delta
 
         msg = ('Input shape {} does not match QDM bias corrected output '
                'shape {}!'.format(arr.shape, arr_bc.shape))