Skip to content

Commit

Permalink
Treat offset accordingly (#534)
Browse files Browse the repository at this point in the history
* Treat offset accordingly. Also improve formatting of model summary

* Predictions work for offset now

* Minor modification to docstrings

* more docstrings

* add test

* add more complex test

* Update bambi/backend/pymc.py

Co-authored-by: Ravin Kumar <ravinsdrive@gmail.com>

* Update bambi/backend/pymc.py

Co-authored-by: Ravin Kumar <ravinsdrive@gmail.com>

* Update bambi/backend/pymc.py

Co-authored-by: Ravin Kumar <ravinsdrive@gmail.com>

Co-authored-by: Ravin Kumar <ravinsdrive@gmail.com>
  • Loading branch information
tomicapretto and canyon289 authored Jun 22, 2022
1 parent 4f0395a commit a69d864
Show file tree
Hide file tree
Showing 4 changed files with 175 additions and 25 deletions.
68 changes: 64 additions & 4 deletions bambi/backend/pymc.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def build(self, spec):

with self.model:
self._build_intercept(spec)
self._build_offsets(spec)
self._build_common_terms(spec)
self._build_group_specific_terms(spec)
self._build_response(spec)
Expand Down Expand Up @@ -111,18 +112,36 @@ def run(
return result

def _build_intercept(self, spec):
"""Add intercept term to the PyMC model.
We have linear predictors of the form 'X @ b + Z @ u'. This is technically part of
'X @ b' but it is added separately for convenience reasons.
Parameters
----------
spec : bambi.Model
The model.
"""
if self.has_intercept:
self.mu += InterceptTerm(spec.intercept_term).build(spec)

def _build_common_terms(self, spec):
"""Add common (fixed) terms to the PyMC model.
We have linear predictors of the form 'X @ b + Z @ u'.
This creates the 'b' parameter vector in PyMC, computes `X @ b`, and adds it to ``self.mu``.
Parameters
----------
spec : bambi.Model
The model.
"""
if spec.common_terms:
coefs = []
columns = []
for term in spec.common_terms.values():
common_term = CommonTerm(term)
# Add coords
# NOTE: At the moment, there's a bug in PyMC so we need to check if coordinate is
# present in the model before attempting to add it.
for name, values in common_term.coords.items():
if name not in self.model.coords:
self.model.add_coords({name: values})
Expand All @@ -147,6 +166,16 @@ def _build_common_terms(self, spec):
self.mu += at.dot(data, coefs)

def _build_group_specific_terms(self, spec):
"""Add group-specific (random or varying) terms to the PyMC model.
We have linear predictors of the form 'X @ b + Z @ u'.
This creates the 'u' parameter vector in PyMC, computes `Z @ u`, and adds it to ``self.mu``.
Parameters
----------
spec : bambi.Model
The model.
"""
# Add group specific terms that have prior for their correlation matrix
for group, eta in spec.priors_cor.items():
# pylint: disable=protected-access
Expand All @@ -162,8 +191,6 @@ def _build_group_specific_terms(self, spec):
group_specific_term = GroupSpecificTerm(term, spec.noncentered)

# Add coords
# NOTE: At the moment, there's a bug in PyMC so we need to check if coordinate is
# present in the model before attempting to add it.
for name, values in group_specific_term.coords.items():
if name not in self.model.coords:
self.model.add_coords({name: values})
Expand All @@ -183,10 +210,43 @@ def _build_group_specific_terms(self, spec):
else:
self.mu += coef * predictor

def _build_offsets(self, spec):
"""Add offset terms to the PyMC model.
Offsets are terms with a regression coefficient of 1.
This is technically part of 'X @ b' in the linear predictor 'X @ b + Z @ u'.
It's added here so we avoid the creation of a constant variable in PyMC.
Parameters
----------
spec : bambi.Model
The model.
"""
for offset in spec.offset_terms.values():
self.mu += offset.data.squeeze()

def _build_response(self, spec):
"""Add response term to the PyMC model
Parameters
----------
spec : bambi.Model
The model.
"""
ResponseTerm(spec.response, spec.family).build(self.mu, self.INVLINKS)

def _build_potentials(self, spec):
"""Add potentials to the PyMC model.
Potentials are arbitrary quantities that are added to the model log likelihood.
See 'Factor Potentials' in
https://github.com/fonnesbeck/probabilistic_python/blob/main/pymc_intro.ipynb
Parameters
----------
spec : bambi.Model
The model.
"""
if spec.potentials is not None:
count = 0
for variable, constraint in spec.potentials:
Expand Down
70 changes: 52 additions & 18 deletions bambi/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from .defaults import get_default_prior, get_builtin_family
from .families import Family, univariate, multivariate
from .priors import Prior, PriorScaler
from .terms import GroupSpecificTerm, ResponseTerm, Term
from .terms import GroupSpecificTerm, OffsetTerm, ResponseTerm, Term
from .utils import listify, extra_namespace
from .version import __version__

Expand Down Expand Up @@ -318,6 +318,8 @@ def _build_priors(self):
kind = "group_specific"
elif term.kind == "intercept":
kind = "intercept"
elif term.kind == "offset":
continue
else:
kind = "common"
term.prior = prepare_prior(term.prior, kind, self.auto_scale)
Expand Down Expand Up @@ -512,7 +514,10 @@ def _add_common(self, common, priors):
f"Trying to set hyperprior on '{name}'. "
"Can't set a hyperprior on common effects."
)
self.terms[name] = Term(name, term, data, prior)
if term.kind == "offset":
self.terms[name] = OffsetTerm(name, term, data)
else:
self.terms[name] = Term(name, term, data, prior)

def _add_group_specific(self, group, priors):
"""Add group-specific (a.k.a. random) terms to the model.
Expand Down Expand Up @@ -743,6 +748,7 @@ def predict(self, idata, kind="mean", data=None, inplace=True, include_group_spe
linear_predictor = 0
X = None
Z = None
x_offsets = []

chain_n = len(idata.posterior.coords.get("chain"))
draw_n = len(idata.posterior.coords.get("draw"))
Expand All @@ -759,6 +765,13 @@ def predict(self, idata, kind="mean", data=None, inplace=True, include_group_spe
else:
X = self._design.common.evaluate_new_data(data).design_matrix

# Add offset columns to their own design matrix
# Remove them from the common design matrix.
for term in self.offset_terms:
term_slice = self._design.common.slices[term]
x_offsets.append(X[:, term_slice])
X = np.delete(X, term_slice, axis=1)

if self._design.group and include_group_specific:
if in_sample:
Z = self._design.group.design_matrix
Expand Down Expand Up @@ -830,6 +843,10 @@ def predict(self, idata, kind="mean", data=None, inplace=True, include_group_spe
contribution = contribution.reshape(shape)
linear_predictor += contribution

# If model contains offset, add directly to the linear predictor
if x_offsets:
linear_predictor += np.column_stack(x_offsets).sum(axis=1)[np.newaxis, np.newaxis, :]

# Contribution due to group-specific terms. Same comments than for beta_x apply here.
if Z is not None:
beta_z_list = []
Expand Down Expand Up @@ -1000,8 +1017,11 @@ def _get_group_specific_groups(self):
return groups

def __str__(self):
priors = ""
priors_common = [f" {t.name} ~ {t.prior}" for t in self.common_terms.values()]
if self.intercept_term:
term = self.intercept_term
priors_common = [f" {term.name} ~ {term.prior}"] + priors_common

priors_group = [f" {t.name} ~ {t.prior}" for t in self.group_specific_terms.values()]

# Prior for the correlation matrix in group-specific terms
Expand All @@ -1010,25 +1030,30 @@ def __str__(self):
# Priors for auxiliary parameters, e.g., standard deviation in normal linear model
priors_aux = [f" {k} ~ {v}" for k, v in self.family.likelihood.priors.items()]

if self.intercept_term:
t = self.intercept_term
priors_common = [f" {t.name} ~ {t.prior}"] + priors_common
if priors_common:
priors += "\n".join([" Common-level effects", *priors_common])
if priors_group:
priors += "\n\n" + "\n".join([" Group-level effects", *priors_group])
if priors_cor:
priors += "\n\n" + "\n".join([" Group-level correlation", *priors_cor])
if priors_aux:
priors += "\n\n" + "\n".join([" Auxiliary parameters", *priors_aux])
# Offsets
offsets = [f" {t.name} ~ 1" for t in self.offset_terms.values()]

priors_dict = {
"Common-level effects": priors_common,
"Group-level effects": priors_group,
"Group-level correlation": priors_cor,
"Offset effects": offsets,
"Auxiliary parameters": priors_aux,
}

priors_list = []
for group, values in priors_dict.items():
if values:
priors_list += ["\n".join([f" {group}"] + values)]
priors_message = "\n\n".join(priors_list)

str_list = [
f"Formula: {self.formula}",
f"Family name: {self.family.name.capitalize()}",
f"Link: {self.family.link.name}",
f"Observations: {self.response.data.shape[0]}",
"Priors:",
priors,
priors_message,
]
if self.backend and self.backend.fit:
extra_foot = (
Expand All @@ -1051,14 +1076,16 @@ def term_names(self):

@property
def common_terms(self):
"""Return dict of all and only common effects in model."""
"""Return dict of all common effects in model."""
return {
k: v for (k, v) in self.terms.items() if not v.group_specific and v.kind != "intercept"
k: v
for (k, v) in self.terms.items()
if not v.group_specific and v.kind not in ["intercept", "offset"]
}

@property
def group_specific_terms(self):
"""Return dict of all and only group specific effects in model."""
"""Return dict of all group specific effects in model."""
return {k: v for (k, v) in self.terms.items() if v.group_specific}

@property
Expand All @@ -1070,6 +1097,13 @@ def intercept_term(self):
else:
return None

@property
def offset_terms(self):
"""Return dict of all offset effects in model."""
return {
k: v for (k, v) in self.terms.items() if not v.group_specific and v.kind == "offset"
}


def prepare_prior(prior, kind, auto_scale):
"""Helper function to correctly set default priors, auto scaling, etc.
Expand Down
36 changes: 36 additions & 0 deletions bambi/terms.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,42 @@ def __repr__(self):
return self.__str__()


class OffsetTerm:
"""Representation of a single offset term.
Parameters
----------
name: str
Name of the term.
term: formulae.terms.terms.Term
A model term created in formulae.
data: (DataFrame, Series, ndarray)
The term values.
"""

group_specific = False

def __init__(self, name, term, data):
self.name = name
self.data = data.squeeze()
self.kind = "offset"
self.term = term
self.alias = None
self.coords = {}

def set_alias(self, value):
self.alias = value

def __str__(self):
args = [f"name: {self.name}", f"shape: {self.data.shape}"]
if self.alias:
args[0] = f"{args[0]} (alias: {self.alias})"
return f"{self.__class__.__name__}({', '.join(args)})"

def __repr__(self):
return self.__str__()


# pylint: disable = protected-access
def get_reference_level(term):
if term.kind != "categoric":
Expand Down
26 changes: 23 additions & 3 deletions bambi/tests/test_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pytest

from bambi.models import Model
from bambi import load_data


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -313,9 +314,6 @@ def c(*args):


def test_posterior_predictive_multinomial(inhaler):
def c(*args):
return np.column_stack(args)

df = inhaler.groupby(["treat", "carry", "rating"], as_index=False).size()
df = df.pivot(index=["treat", "carry"], columns="rating", values="size").reset_index()
df.columns = ["treat", "carry", "y1", "y2", "y3", "y4"]
Expand Down Expand Up @@ -356,3 +354,25 @@ def test_predict_include_group_specific():

# When we include group-specific terms, these predictions are different
assert not (idata_1.posterior["y_mean"] == idata_1.posterior["y_mean"][:, :, 0]).all()


def test_predict_offset():
# Simple case
data = load_data("carclaims")
model = Model("numclaims ~ offset(np.log(exposure))", data, family="poisson", link="log")
idata = model.fit(tune=100, draws=100, chains=2, random_seed=1234)
model.predict(idata)
model.predict(idata, kind="pps")

# More complex case
data = pd.DataFrame(
{
"y": np.random.poisson(20, size=100),
"x": np.random.normal(size=100),
"group": np.tile(np.arange(10), 10),
}
)
data["time"] = data["y"] - np.random.normal(loc=1, size=100)
model = Model("y ~ offset(np.log(time)) + x + (1 | group)", data, family="poisson")
idata = model.fit(tune=100, draws=100, chains=2, target_accept=0.9, random_seed=1234)
model.predict(idata, kind="pps")

0 comments on commit a69d864

Please # to comment.