pymc-devs · twiecki · Dec 17, 2020 · Dec 17, 2020
diff --git a/examples/GHME_2013.py b/examples/GHME_2013.py
@@ -0,0 +1,75 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+
+from pymc3 import HalfCauchy, Model, Normal, get_data, sample
+from pymc3.distributions.timeseries import GaussianRandomWalk
+
+data = pd.read_csv(get_data("pancreatitis.csv"))
+countries = ["CYP", "DNK", "ESP", "FIN", "GBR", "ISL"]
+data = data[data.area.isin(countries)]
+
+age = data["age"] = np.array(data.age_start + data.age_end) / 2
+rate = data.value = data.value * 1000
+group, countries = pd.factorize(data.area, order=countries)
+
+
+ncountries = len(countries)
+
+for i, country in enumerate(countries):
+    plt.subplot(2, 3, i + 1)
+    plt.title(country)
+    d = data[data.area == country]
+    plt.plot(d.age, d.value, ".")
+
+    plt.ylim(0, rate.max())
+
+
+nknots = 10
+knots = np.linspace(data.age_start.min(), data.age_end.max(), nknots)
+
+
+def interpolate(x0, y0, x, group):
+    x = np.array(x)
+    group = np.array(group)
+
+    idx = np.searchsorted(x0, x)
+    dl = np.array(x - x0[idx - 1])
+    dr = np.array(x0[idx] - x)
+    d = dl + dr
+    wl = dr / d
+
+    return wl * y0[idx - 1, group] + (1 - wl) * y0[idx, group]
+
+
+with Model() as model:
+    coeff_sd = HalfCauchy("coeff_sd", 5)
+
+    y = GaussianRandomWalk("y", sigma=coeff_sd, shape=(nknots, ncountries))
+
+    p = interpolate(knots, y, age, group)
+
+    sd = HalfCauchy("sd", 5)
+
+    vals = Normal("vals", p, sigma=sd, observed=rate)
+
+
+def run(n=3000):
+    if n == "short":
+        n = 150
+    with model:
+        trace = sample(n, tune=int(n / 2), init="advi+adapt_diag")
+
+    for i, country in enumerate(countries):
+        plt.subplot(2, 3, i + 1)
+        plt.title(country)
+
+        d = data[data.area == country]
+        plt.plot(d.age, d.value, ".")
+        plt.plot(knots, trace[y][::5, :, i].T, color="r", alpha=0.01)
+
+        plt.ylim(0, rate.max())
+
+
+if __name__ == "__main__":
+    run()
diff --git a/examples/LKJ_correlation.py b/examples/LKJ_correlation.py
@@ -0,0 +1,62 @@
+import numpy as np
+import theano.tensor as tt
+
+from numpy.random import multivariate_normal
+
+import pymc3 as pm
+
+# Generate some multivariate normal data:
+n_obs = 1000
+
+# Mean values:
+mu_r = np.linspace(0, 2, num=4)
+n_var = len(mu_r)
+
+# Standard deviations:
+stds = np.ones(4) / 2.0
+
+# Correlation matrix of 4 variables:
+corr_r = np.array(
+    [
+        [1.0, 0.75, 0.0, 0.15],
+        [0.75, 1.0, -0.06, 0.19],
+        [0.0, -0.06, 1.0, -0.04],
+        [0.15, 0.19, -0.04, 1.0],
+    ]
+)
+cov_matrix = np.diag(stds).dot(corr_r.dot(np.diag(stds)))
+
+dataset = multivariate_normal(mu_r, cov_matrix, size=n_obs)
+
+with pm.Model() as model:
+
+    mu = pm.Normal("mu", mu=0, sigma=1, shape=n_var)
+
+    # Note that we access the distribution for the standard
+    # deviations, and do not create a new random variable.
+    sd_dist = pm.HalfCauchy.dist(beta=2.5)
+    packed_chol = pm.LKJCholeskyCov("chol_cov", n=n_var, eta=1, sd_dist=sd_dist)
+    # compute the covariance matrix
+    chol = pm.expand_packed_triangular(n_var, packed_chol, lower=True)
+    cov = tt.dot(chol, chol.T)
+
+    # Extract the standard deviations etc
+    sd = pm.Deterministic("sd", tt.sqrt(tt.diag(cov)))
+    corr = tt.diag(sd ** -1).dot(cov.dot(tt.diag(sd ** -1)))
+    r = pm.Deterministic("r", corr[np.triu_indices(n_var, k=1)])
+
+    like = pm.MvNormal("likelihood", mu=mu, chol=chol, observed=dataset)
+
+
+def run(n=1000):
+    if n == "short":
+        n = 50
+    with model:
+        trace = pm.sample(n)
+    pm.traceplot(
+        trace, varnames=["mu", "r"], lines={"mu": mu_r, "r": corr_r[np.triu_indices(n_var, k=1)]}
+    )
+
+
+if __name__ == "__main__":
+    run()
diff --git a/examples/__init__.py b/examples/__init__.py
diff --git a/examples/arbitrary_stochastic.py b/examples/arbitrary_stochastic.py
@@ -0,0 +1,32 @@
+import numpy as np
+import theano.tensor as tt
+
+import pymc3 as pm
+
+
+# custom log-liklihood
+def logp(failure, lam, value):
+    return tt.sum(failure * tt.log(lam) - lam * value)
+
+
+def build_model():
+    # data
+    failure = np.array([0.0, 1.0])
+    value = np.array([1.0, 0.0])
+
+    # model
+    with pm.Model() as model:
+        lam = pm.Exponential("lam", 1.0)
+        pm.DensityDist("x", logp, observed={"failure": failure, "lam": lam, "value": value})
+    return model
+
+
+def run(n_samples=3000):
+    model = build_model()
+    with model:
+        trace = pm.sample(n_samples)
+    return trace
+
+
+if __name__ == "__main__":
+    run()
diff --git a/examples/arma_example.py b/examples/arma_example.py
@@ -0,0 +1,90 @@
+import numpy as np
+
+from theano import scan, shared
+
+import pymc3 as pm
+
+"""
+ARMA example
+It is interesting to note just how much more compact this is than the original Stan example
+
+The original implementation is in the Stan documentation by Gelman et al and is reproduced below
+
+
+Example from Stan- slightly altered
+
+data {
+  int<lower=1> T;
+  real y[T];
+}
+parameters {
+    // assume err[0] == 0
+}
+nu[t] <- mu + phi * y[t-1] + theta * err[t-1];
+  err[t] <- y[t] - nu[t];
+}
+mu ~ normal(0,10);
+phi ~ normal(0,2);
+theta ~ normal(0,2);
+  real mu;
+  real phi;
+  real theta;
+  real<lower=0> sigma;
+} model {
+  vector[T] nu;
+  vector[T] err;
+  nu[1] <- mu + phi * mu;
+  err[1] <- y[1] - nu[1];
+  for (t in 2:T) {
+    // num observations
+    // observed outputs
+    // mean coeff
+    // autoregression coeff
+    // moving avg coeff
+    // noise scale
+    // prediction for time t
+    // error for time t
+sigma ~ cauchy(0,5);
+err ~ normal(0,sigma);
+// priors
+// likelihood
+Ported to PyMC3 by Peadar Coyle and Chris Fonnesbeck (c) 2016.
+"""
+
+
+def build_model():
+    y = shared(np.array([15, 10, 16, 11, 9, 11, 10, 18], dtype=np.float32))
+    with pm.Model() as arma_model:
+        sigma = pm.HalfNormal("sigma", 5.0)
+        theta = pm.Normal("theta", 0.0, sigma=1.0)
+        phi = pm.Normal("phi", 0.0, sigma=2.0)
+        mu = pm.Normal("mu", 0.0, sigma=10.0)
+
+        err0 = y[0] - (mu + phi * mu)
+
+        def calc_next(last_y, this_y, err, mu, phi, theta):
+            nu_t = mu + phi * last_y + theta * err
+            return this_y - nu_t
+
+        err, _ = scan(
+            fn=calc_next,
+            sequences=dict(input=y, taps=[-1, 0]),
+            outputs_info=[err0],
+            non_sequences=[mu, phi, theta],
+        )
+
+        pm.Potential("like", pm.Normal.dist(0, sigma=sigma).logp(err))
+    return arma_model
+
+
+def run(n_samples=1000):
+    model = build_model()
+    with model:
+        trace = pm.sample(draws=n_samples, tune=1000, target_accept=0.99)
+
+    pm.plots.traceplot(trace)
+    pm.plots.forestplot(trace)
+
+
+if __name__ == "__main__":
+    run()
diff --git a/examples/baseball.py b/examples/baseball.py
@@ -0,0 +1,41 @@
+#
+# Demonstrates the usage of hierarchical partial pooling
+# See http://mc-stan.org/documentation/case-studies/pool-binary-trials.html for more details
+#
+
+import numpy as np
+
+import pymc3 as pm
+
+
+def build_model():
+    data = np.loadtxt(
+        pm.get_data("efron-morris-75-data.tsv"), delimiter="\t", skiprows=1, usecols=(2, 3)
+    )
+
+    atbats = pm.floatX(data[:, 0])
+    hits = pm.floatX(data[:, 1])
+
+    N = len(hits)
+
+    # we want to bound the kappa below
+    BoundedKappa = pm.Bound(pm.Pareto, lower=1.0)
+
+    with pm.Model() as model:
+        phi = pm.Uniform("phi", lower=0.0, upper=1.0)
+        kappa = BoundedKappa("kappa", alpha=1.0001, m=1.5)
+        thetas = pm.Beta("thetas", alpha=phi * kappa, beta=(1.0 - phi) * kappa, shape=N)
+        ys = pm.Binomial("ys", n=atbats, p=thetas, observed=hits)
+    return model
+
+
+def run(n=2000):
+    model = build_model()
+    with model:
+        trace = pm.sample(n, target_accept=0.99)
+
+    pm.traceplot(trace)
+
+
+if __name__ == "__main__":
+    run()