From a69d8646f03a26985f9f44d4555cad4f500c0fb4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1s=20Capretto?= <tomicapretto@gmail.com>
Date: Wed, 22 Jun 2022 14:58:27 -0300
Subject: [PATCH] Treat offset accordingly (#534)

* Treat offset accordingly. Also improve formatting of model summary

* Predictions work for offset now

* Minor modification to docstrings

* more docstrings

* add test

* add more complex test

* Update bambi/backend/pymc.py

Co-authored-by: Ravin Kumar <ravinsdrive@gmail.com>

* Update bambi/backend/pymc.py

Co-authored-by: Ravin Kumar <ravinsdrive@gmail.com>

* Update bambi/backend/pymc.py

Co-authored-by: Ravin Kumar <ravinsdrive@gmail.com>

Co-authored-by: Ravin Kumar <ravinsdrive@gmail.com>
---
 bambi/backend/pymc.py       | 68 ++++++++++++++++++++++++++++++++---
 bambi/models.py             | 70 +++++++++++++++++++++++++++----------
 bambi/terms.py              | 36 +++++++++++++++++++
 bambi/tests/test_predict.py | 26 ++++++++++++--
 4 files changed, 175 insertions(+), 25 deletions(-)

diff --git a/bambi/backend/pymc.py b/bambi/backend/pymc.py
index f5267c51b..30e217847 100644
--- a/bambi/backend/pymc.py
+++ b/bambi/backend/pymc.py
@@ -64,6 +64,7 @@ def build(self, spec):
 
         with self.model:
             self._build_intercept(spec)
+            self._build_offsets(spec)
             self._build_common_terms(spec)
             self._build_group_specific_terms(spec)
             self._build_response(spec)
@@ -111,18 +112,36 @@ def run(
         return result
 
     def _build_intercept(self, spec):
+        """Add intercept term to the PyMC model.
+
+        We have linear predictors of the form 'X @ b + Z @ u'. This is technically part of
+        'X @ b' but it is added separately for convenience reasons.
+
+        Parameters
+        ----------
+        spec : bambi.Model
+            The model.
+        """
         if self.has_intercept:
             self.mu += InterceptTerm(spec.intercept_term).build(spec)
 
     def _build_common_terms(self, spec):
+        """Add common (fixed) terms to the PyMC model.
+
+        We have linear predictors of the form 'X @ b + Z @ u'.
+        This creates the 'b' parameter vector in PyMC, computes `X @ b`, and adds it to ``self.mu``.
+
+        Parameters
+        ----------
+        spec : bambi.Model
+            The model.
+        """
         if spec.common_terms:
             coefs = []
             columns = []
             for term in spec.common_terms.values():
                 common_term = CommonTerm(term)
                 # Add coords
-                # NOTE: At the moment, there's a bug in PyMC so we need to check if coordinate is
-                # present in the model before attempting to add it.
                 for name, values in common_term.coords.items():
                     if name not in self.model.coords:
                         self.model.add_coords({name: values})
@@ -147,6 +166,16 @@ def _build_common_terms(self, spec):
             self.mu += at.dot(data, coefs)
 
     def _build_group_specific_terms(self, spec):
+        """Add group-specific (random or varying) terms to the PyMC model.
+
+        We have linear predictors of the form 'X @ b + Z @ u'.
+        This creates the 'u' parameter vector in PyMC, computes `Z @ u`, and adds it to ``self.mu``.
+
+        Parameters
+        ----------
+        spec : bambi.Model
+            The model.
+        """
         # Add group specific terms that have prior for their correlation matrix
         for group, eta in spec.priors_cor.items():
             # pylint: disable=protected-access
@@ -162,8 +191,6 @@ def _build_group_specific_terms(self, spec):
             group_specific_term = GroupSpecificTerm(term, spec.noncentered)
 
             # Add coords
-            # NOTE: At the moment, there's a bug in PyMC so we need to check if coordinate is
-            # present in the model before attempting to add it.
             for name, values in group_specific_term.coords.items():
                 if name not in self.model.coords:
                     self.model.add_coords({name: values})
@@ -183,10 +210,43 @@ def _build_group_specific_terms(self, spec):
             else:
                 self.mu += coef * predictor
 
+    def _build_offsets(self, spec):
+        """Add offset terms to the PyMC model.
+
+        Offsets are terms with a regression coefficient of 1.
+        This is technically part of  'X @ b' in the linear predictor 'X @ b + Z @ u'.
+        It's added here so we avoid the creation of a constant variable in PyMC.
+
+        Parameters
+        ----------
+        spec : bambi.Model
+            The model.
+        """
+        for offset in spec.offset_terms.values():
+            self.mu += offset.data.squeeze()
+
     def _build_response(self, spec):
+        """Add response term to the PyMC model
+
+        Parameters
+        ----------
+        spec : bambi.Model
+            The model.
+        """
         ResponseTerm(spec.response, spec.family).build(self.mu, self.INVLINKS)
 
     def _build_potentials(self, spec):
+        """Add potentials to the PyMC model.
+
+        Potentials are arbitrary quantities that are added to the model log likelihood.
+        See 'Factor Potentials' in
+        https://github.com/fonnesbeck/probabilistic_python/blob/main/pymc_intro.ipynb
+
+        Parameters
+        ----------
+        spec : bambi.Model
+            The model.
+        """
         if spec.potentials is not None:
             count = 0
             for variable, constraint in spec.potentials:
diff --git a/bambi/models.py b/bambi/models.py
index 6f9d63d21..a999d5f10 100644
--- a/bambi/models.py
+++ b/bambi/models.py
@@ -16,7 +16,7 @@
 from .defaults import get_default_prior, get_builtin_family
 from .families import Family, univariate, multivariate
 from .priors import Prior, PriorScaler
-from .terms import GroupSpecificTerm, ResponseTerm, Term
+from .terms import GroupSpecificTerm, OffsetTerm, ResponseTerm, Term
 from .utils import listify, extra_namespace
 from .version import __version__
 
@@ -318,6 +318,8 @@ def _build_priors(self):
                 kind = "group_specific"
             elif term.kind == "intercept":
                 kind = "intercept"
+            elif term.kind == "offset":
+                continue
             else:
                 kind = "common"
             term.prior = prepare_prior(term.prior, kind, self.auto_scale)
@@ -512,7 +514,10 @@ def _add_common(self, common, priors):
                         f"Trying to set hyperprior on '{name}'. "
                         "Can't set a hyperprior on common effects."
                     )
-            self.terms[name] = Term(name, term, data, prior)
+            if term.kind == "offset":
+                self.terms[name] = OffsetTerm(name, term, data)
+            else:
+                self.terms[name] = Term(name, term, data, prior)
 
     def _add_group_specific(self, group, priors):
         """Add group-specific (a.k.a. random) terms to the model.
@@ -743,6 +748,7 @@ def predict(self, idata, kind="mean", data=None, inplace=True, include_group_spe
         linear_predictor = 0
         X = None
         Z = None
+        x_offsets = []
 
         chain_n = len(idata.posterior.coords.get("chain"))
         draw_n = len(idata.posterior.coords.get("draw"))
@@ -759,6 +765,13 @@ def predict(self, idata, kind="mean", data=None, inplace=True, include_group_spe
             else:
                 X = self._design.common.evaluate_new_data(data).design_matrix
 
+            # Add offset columns to their own design matrix
+            # Remove them from the common design matrix.
+            for term in self.offset_terms:
+                term_slice = self._design.common.slices[term]
+                x_offsets.append(X[:, term_slice])
+                X = np.delete(X, term_slice, axis=1)
+
         if self._design.group and include_group_specific:
             if in_sample:
                 Z = self._design.group.design_matrix
@@ -830,6 +843,10 @@ def predict(self, idata, kind="mean", data=None, inplace=True, include_group_spe
             contribution = contribution.reshape(shape)
             linear_predictor += contribution
 
+        # If model contains offset, add directly to the linear predictor
+        if x_offsets:
+            linear_predictor += np.column_stack(x_offsets).sum(axis=1)[np.newaxis, np.newaxis, :]
+
         # Contribution due to group-specific terms. Same comments than for beta_x apply here.
         if Z is not None:
             beta_z_list = []
@@ -1000,8 +1017,11 @@ def _get_group_specific_groups(self):
         return groups
 
     def __str__(self):
-        priors = ""
         priors_common = [f"    {t.name} ~ {t.prior}" for t in self.common_terms.values()]
+        if self.intercept_term:
+            term = self.intercept_term
+            priors_common = [f"    {term.name} ~ {term.prior}"] + priors_common
+
         priors_group = [f"    {t.name} ~ {t.prior}" for t in self.group_specific_terms.values()]
 
         # Prior for the correlation matrix in group-specific terms
@@ -1010,17 +1030,22 @@ def __str__(self):
         # Priors for auxiliary parameters, e.g., standard deviation in normal linear model
         priors_aux = [f"    {k} ~ {v}" for k, v in self.family.likelihood.priors.items()]
 
-        if self.intercept_term:
-            t = self.intercept_term
-            priors_common = [f"    {t.name} ~ {t.prior}"] + priors_common
-        if priors_common:
-            priors += "\n".join(["  Common-level effects", *priors_common])
-        if priors_group:
-            priors += "\n\n" + "\n".join(["  Group-level effects", *priors_group])
-        if priors_cor:
-            priors += "\n\n" + "\n".join(["  Group-level correlation", *priors_cor])
-        if priors_aux:
-            priors += "\n\n" + "\n".join(["  Auxiliary parameters", *priors_aux])
+        # Offsets
+        offsets = [f"    {t.name} ~ 1" for t in self.offset_terms.values()]
+
+        priors_dict = {
+            "Common-level effects": priors_common,
+            "Group-level effects": priors_group,
+            "Group-level correlation": priors_cor,
+            "Offset effects": offsets,
+            "Auxiliary parameters": priors_aux,
+        }
+
+        priors_list = []
+        for group, values in priors_dict.items():
+            if values:
+                priors_list += ["\n".join([f"  {group}"] + values)]
+        priors_message = "\n\n".join(priors_list)
 
         str_list = [
             f"Formula: {self.formula}",
@@ -1028,7 +1053,7 @@ def __str__(self):
             f"Link: {self.family.link.name}",
             f"Observations: {self.response.data.shape[0]}",
             "Priors:",
-            priors,
+            priors_message,
         ]
         if self.backend and self.backend.fit:
             extra_foot = (
@@ -1051,14 +1076,16 @@ def term_names(self):
 
     @property
     def common_terms(self):
-        """Return dict of all and only common effects in model."""
+        """Return dict of all common effects in model."""
         return {
-            k: v for (k, v) in self.terms.items() if not v.group_specific and v.kind != "intercept"
+            k: v
+            for (k, v) in self.terms.items()
+            if not v.group_specific and v.kind not in ["intercept", "offset"]
         }
 
     @property
     def group_specific_terms(self):
-        """Return dict of all and only group specific effects in model."""
+        """Return dict of all group specific effects in model."""
         return {k: v for (k, v) in self.terms.items() if v.group_specific}
 
     @property
@@ -1070,6 +1097,13 @@ def intercept_term(self):
         else:
             return None
 
+    @property
+    def offset_terms(self):
+        """Return dict of all offset effects in model."""
+        return {
+            k: v for (k, v) in self.terms.items() if not v.group_specific and v.kind == "offset"
+        }
+
 
 def prepare_prior(prior, kind, auto_scale):
     """Helper function to correctly set default priors, auto scaling, etc.
diff --git a/bambi/terms.py b/bambi/terms.py
index 0e3706bff..adc2e628c 100644
--- a/bambi/terms.py
+++ b/bambi/terms.py
@@ -274,6 +274,42 @@ def __repr__(self):
         return self.__str__()
 
 
+class OffsetTerm:
+    """Representation of a single offset  term.
+
+    Parameters
+    ----------
+    name: str
+        Name of the term.
+    term: formulae.terms.terms.Term
+        A model term created in formulae.
+    data: (DataFrame, Series, ndarray)
+        The term values.
+    """
+
+    group_specific = False
+
+    def __init__(self, name, term, data):
+        self.name = name
+        self.data = data.squeeze()
+        self.kind = "offset"
+        self.term = term
+        self.alias = None
+        self.coords = {}
+
+    def set_alias(self, value):
+        self.alias = value
+
+    def __str__(self):
+        args = [f"name: {self.name}", f"shape: {self.data.shape}"]
+        if self.alias:
+            args[0] = f"{args[0]} (alias: {self.alias})"
+        return f"{self.__class__.__name__}({', '.join(args)})"
+
+    def __repr__(self):
+        return self.__str__()
+
+
 # pylint: disable = protected-access
 def get_reference_level(term):
     if term.kind != "categoric":
diff --git a/bambi/tests/test_predict.py b/bambi/tests/test_predict.py
index 9e7619a14..a3decd653 100644
--- a/bambi/tests/test_predict.py
+++ b/bambi/tests/test_predict.py
@@ -5,6 +5,7 @@
 import pytest
 
 from bambi.models import Model
+from bambi import load_data
 
 
 @pytest.fixture(scope="module")
@@ -313,9 +314,6 @@ def c(*args):
 
 
 def test_posterior_predictive_multinomial(inhaler):
-    def c(*args):
-        return np.column_stack(args)
-
     df = inhaler.groupby(["treat", "carry", "rating"], as_index=False).size()
     df = df.pivot(index=["treat", "carry"], columns="rating", values="size").reset_index()
     df.columns = ["treat", "carry", "y1", "y2", "y3", "y4"]
@@ -356,3 +354,25 @@ def test_predict_include_group_specific():
 
     # When we include group-specific terms, these predictions are different
     assert not (idata_1.posterior["y_mean"] == idata_1.posterior["y_mean"][:, :, 0]).all()
+
+
+def test_predict_offset():
+    # Simple case
+    data = load_data("carclaims")
+    model = Model("numclaims ~ offset(np.log(exposure))", data, family="poisson", link="log")
+    idata = model.fit(tune=100, draws=100, chains=2, random_seed=1234)
+    model.predict(idata)
+    model.predict(idata, kind="pps")
+
+    # More complex case
+    data = pd.DataFrame(
+        {
+            "y": np.random.poisson(20, size=100),
+            "x": np.random.normal(size=100),
+            "group": np.tile(np.arange(10), 10),
+        }
+    )
+    data["time"] = data["y"] - np.random.normal(loc=1, size=100)
+    model = Model("y ~ offset(np.log(time)) + x + (1 | group)", data, family="poisson")
+    idata = model.fit(tune=100, draws=100, chains=2, target_accept=0.9, random_seed=1234)
+    model.predict(idata, kind="pps")