Skip to content

Add examples from pymc3 #5

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Merged
merged 1 commit into from
Dec 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions examples/GHME_2013.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from pymc3 import HalfCauchy, Model, Normal, get_data, sample
from pymc3.distributions.timeseries import GaussianRandomWalk

data = pd.read_csv(get_data("pancreatitis.csv"))
countries = ["CYP", "DNK", "ESP", "FIN", "GBR", "ISL"]
data = data[data.area.isin(countries)]

age = data["age"] = np.array(data.age_start + data.age_end) / 2
rate = data.value = data.value * 1000
group, countries = pd.factorize(data.area, order=countries)


ncountries = len(countries)

for i, country in enumerate(countries):
plt.subplot(2, 3, i + 1)
plt.title(country)
d = data[data.area == country]
plt.plot(d.age, d.value, ".")

plt.ylim(0, rate.max())


nknots = 10
knots = np.linspace(data.age_start.min(), data.age_end.max(), nknots)


def interpolate(x0, y0, x, group):
x = np.array(x)
group = np.array(group)

idx = np.searchsorted(x0, x)
dl = np.array(x - x0[idx - 1])
dr = np.array(x0[idx] - x)
d = dl + dr
wl = dr / d

return wl * y0[idx - 1, group] + (1 - wl) * y0[idx, group]


with Model() as model:
coeff_sd = HalfCauchy("coeff_sd", 5)

y = GaussianRandomWalk("y", sigma=coeff_sd, shape=(nknots, ncountries))

p = interpolate(knots, y, age, group)

sd = HalfCauchy("sd", 5)

vals = Normal("vals", p, sigma=sd, observed=rate)


def run(n=3000):
if n == "short":
n = 150
with model:
trace = sample(n, tune=int(n / 2), init="advi+adapt_diag")

for i, country in enumerate(countries):
plt.subplot(2, 3, i + 1)
plt.title(country)

d = data[data.area == country]
plt.plot(d.age, d.value, ".")
plt.plot(knots, trace[y][::5, :, i].T, color="r", alpha=0.01)

plt.ylim(0, rate.max())


if __name__ == "__main__":
run()
62 changes: 62 additions & 0 deletions examples/LKJ_correlation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import numpy as np
import theano.tensor as tt

from numpy.random import multivariate_normal

import pymc3 as pm

# Generate some multivariate normal data:
n_obs = 1000

# Mean values:
mu_r = np.linspace(0, 2, num=4)
n_var = len(mu_r)

# Standard deviations:
stds = np.ones(4) / 2.0

# Correlation matrix of 4 variables:
corr_r = np.array(
[
[1.0, 0.75, 0.0, 0.15],
[0.75, 1.0, -0.06, 0.19],
[0.0, -0.06, 1.0, -0.04],
[0.15, 0.19, -0.04, 1.0],
]
)
cov_matrix = np.diag(stds).dot(corr_r.dot(np.diag(stds)))

dataset = multivariate_normal(mu_r, cov_matrix, size=n_obs)

with pm.Model() as model:

mu = pm.Normal("mu", mu=0, sigma=1, shape=n_var)

# Note that we access the distribution for the standard
# deviations, and do not create a new random variable.
sd_dist = pm.HalfCauchy.dist(beta=2.5)
packed_chol = pm.LKJCholeskyCov("chol_cov", n=n_var, eta=1, sd_dist=sd_dist)
# compute the covariance matrix
chol = pm.expand_packed_triangular(n_var, packed_chol, lower=True)
cov = tt.dot(chol, chol.T)

# Extract the standard deviations etc
sd = pm.Deterministic("sd", tt.sqrt(tt.diag(cov)))
corr = tt.diag(sd ** -1).dot(cov.dot(tt.diag(sd ** -1)))
r = pm.Deterministic("r", corr[np.triu_indices(n_var, k=1)])

like = pm.MvNormal("likelihood", mu=mu, chol=chol, observed=dataset)


def run(n=1000):
if n == "short":
n = 50
with model:
trace = pm.sample(n)
pm.traceplot(
trace, varnames=["mu", "r"], lines={"mu": mu_r, "r": corr_r[np.triu_indices(n_var, k=1)]}
)


if __name__ == "__main__":
run()
Empty file added examples/__init__.py
Empty file.
32 changes: 32 additions & 0 deletions examples/arbitrary_stochastic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import numpy as np
import theano.tensor as tt

import pymc3 as pm


# custom log-liklihood
def logp(failure, lam, value):
return tt.sum(failure * tt.log(lam) - lam * value)


def build_model():
# data
failure = np.array([0.0, 1.0])
value = np.array([1.0, 0.0])

# model
with pm.Model() as model:
lam = pm.Exponential("lam", 1.0)
pm.DensityDist("x", logp, observed={"failure": failure, "lam": lam, "value": value})
return model


def run(n_samples=3000):
model = build_model()
with model:
trace = pm.sample(n_samples)
return trace


if __name__ == "__main__":
run()
90 changes: 90 additions & 0 deletions examples/arma_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import numpy as np

from theano import scan, shared

import pymc3 as pm

"""
ARMA example
It is interesting to note just how much more compact this is than the original Stan example

The original implementation is in the Stan documentation by Gelman et al and is reproduced below


Example from Stan- slightly altered

data {
int<lower=1> T;
real y[T];
}
parameters {
// assume err[0] == 0
}
nu[t] <- mu + phi * y[t-1] + theta * err[t-1];
err[t] <- y[t] - nu[t];
}
mu ~ normal(0,10);
phi ~ normal(0,2);
theta ~ normal(0,2);
real mu;
real phi;
real theta;
real<lower=0> sigma;
} model {
vector[T] nu;
vector[T] err;
nu[1] <- mu + phi * mu;
err[1] <- y[1] - nu[1];
for (t in 2:T) {
// num observations
// observed outputs
// mean coeff
// autoregression coeff
// moving avg coeff
// noise scale
// prediction for time t
// error for time t
sigma ~ cauchy(0,5);
err ~ normal(0,sigma);
// priors
// likelihood
Ported to PyMC3 by Peadar Coyle and Chris Fonnesbeck (c) 2016.
"""


def build_model():
y = shared(np.array([15, 10, 16, 11, 9, 11, 10, 18], dtype=np.float32))
with pm.Model() as arma_model:
sigma = pm.HalfNormal("sigma", 5.0)
theta = pm.Normal("theta", 0.0, sigma=1.0)
phi = pm.Normal("phi", 0.0, sigma=2.0)
mu = pm.Normal("mu", 0.0, sigma=10.0)

err0 = y[0] - (mu + phi * mu)

def calc_next(last_y, this_y, err, mu, phi, theta):
nu_t = mu + phi * last_y + theta * err
return this_y - nu_t

err, _ = scan(
fn=calc_next,
sequences=dict(input=y, taps=[-1, 0]),
outputs_info=[err0],
non_sequences=[mu, phi, theta],
)

pm.Potential("like", pm.Normal.dist(0, sigma=sigma).logp(err))
return arma_model


def run(n_samples=1000):
model = build_model()
with model:
trace = pm.sample(draws=n_samples, tune=1000, target_accept=0.99)

pm.plots.traceplot(trace)
pm.plots.forestplot(trace)


if __name__ == "__main__":
run()
41 changes: 41 additions & 0 deletions examples/baseball.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#
# Demonstrates the usage of hierarchical partial pooling
# See http://mc-stan.org/documentation/case-studies/pool-binary-trials.html for more details
#

import numpy as np

import pymc3 as pm


def build_model():
data = np.loadtxt(
pm.get_data("efron-morris-75-data.tsv"), delimiter="\t", skiprows=1, usecols=(2, 3)
)

atbats = pm.floatX(data[:, 0])
hits = pm.floatX(data[:, 1])

N = len(hits)

# we want to bound the kappa below
BoundedKappa = pm.Bound(pm.Pareto, lower=1.0)

with pm.Model() as model:
phi = pm.Uniform("phi", lower=0.0, upper=1.0)
kappa = BoundedKappa("kappa", alpha=1.0001, m=1.5)
thetas = pm.Beta("thetas", alpha=phi * kappa, beta=(1.0 - phi) * kappa, shape=N)
ys = pm.Binomial("ys", n=atbats, p=thetas, observed=hits)
return model


def run(n=2000):
model = build_model()
with model:
trace = pm.sample(n, target_accept=0.99)

pm.traceplot(trace)


if __name__ == "__main__":
run()
Loading