bootstrap.tex

\section*{Bootstrap}
Sample uniform from data points with replacement, compute bootstrapped estimator. For a large dataset $x_1, ..., x_n$ the probability that $x_1$ is contained in a random bootstrap dataset is: \\
$1-(1-1/n)^n \approx 2/3$ (for large $n$, limit goes to $1-1/e$).
\subsection*{Bootstrap Consistency}
For an increasing sequence $a_n$ (often $\sqrt{n}$) where $a_n^{-1}$ is the convergence rate of $\hat \theta_n$:
$P(a_n(\hat \theta_n - \theta) \leq x) - P^*(a_n(\hat \theta_n - \hat \theta_n) \leq x) \to^P 0$ as $n\to \infty$. This holds when $\sqrt{n}(\hat \theta_n - \theta)$ is asympt. normal. Allows to estimate $\text{Bias}(\hat \theta_n) = E[\hat \theta_n] - \theta$ by $E^*[\hat \theta^*_n] - \hat \theta_n$. Can also estimate $\text{Var}^*(\hat\theta_n)$ by $\text{Var}^*(\hat\theta^*_n)$ .

\textbf{Resersed Quantile Bootstrap CI:} $[\hat \theta_n - Q_{\hat \theta^* - \hat \theta}(1- \alpha / 2), \hat \theta_n - Q_{\hat \theta^* - \hat \theta}(\alpha / 2)]$ (type="basic"), \textbf{Normal Bootstrap CI:} Assums $\hat\theta_n$ to be asympt. normal: $\hat\theta_n \pm Q_z(1-\alpha / 2)\hat{sd}(\hat\theta_n)$ where $z \sim \mathcal{N}(0,1)$ and $\hat{sd}(\hat\theta_n)=\sqrt ({\text{Var}(\hat\theta_n^*)})$ (type="norm"), \textbf{Quantile Bootstrap CI:} not theoret. justified unless $\hat\theta_n$ is symm.:
$[Q_{\theta_n^*}(\alpha / 2), Q_{\theta_n^*}(1-\alpha / 2)]$ (type="perc"). Same as \textit{reversed quantile bootstrap CI} if $\hat\theta_n^* - \hat\theta_n$ is symm. around 0, \textbf{Bootstrap T:} Rely on $t=\frac{\hat\theta_n -\theta}{\hat {sd}(\hat\theta_n)}$ and $t*=\frac{\hat\theta_n^*-\hat\theta_n}{\hat{sd}(\hat\theta_n^*)}$ to be asympt. equal: $[\hat\theta_n - \hat{sd}(\hat\theta_n) \cdot Q_{t*}(1-\alpha / 2), \hat\theta_n - \hat{sd}(\hat\theta_n) \cdot Q_{t*}(\alpha / 2)]$ Note: $\hat{sd}(\hat\theta_n)$ is computed as above and $\hat{sd}(\hat\theta^*_n)$ is computed using a 2nd layer bootstrap.
\\
\textbf{Parametric Bootstrap:}
Assume data is generated by some parametric distr. (e.g. $\mathcal{N}(\mu, \sigma^2)$), est. the param., 
then create new data sets from this distr. Works only well if distr. is approx. correct.\\
\textbf{Smoothed Bootstrap:}
Given data $Z_1,...,Z_n \sim_{i.i.d} P$, we estimate $P$ by some smooth (non-parametric) estimate $\tilde P_n$, then generate bootstrap samples from $\tilde P_n$. In between non-param. and param. bootstrap. Works well if $P$ is indeed smooth.

\begin{codebox}{r}{Bootstrap}
library(boot)
sample(c(1:n), n, replace=T) # bootstrap sample
# f has args: (data, index)
res.boot <- boot(Portfolio, f, R=1000)
res.boot$t0 # Estimates on original data
res.boot$t # Estimates on bootstrapped data
# Confidence intervals for variable i
boot.ci(res.boot, type="basic", index=i)
# Example to find all confidence intervals
tm <- function(x, ind) {mean(x[ind], trim = 0.1)}
tmv <- function(x, ind) {
  # bootstrap Var, required for the bootstrap T CI
  t2 <- var(boot(data=x[ind], statistic=tm, R=50)$t) 
  return(c(tm(x, ind), t2)) }
res<-boot(data=..,statistic=tmv,R=10,sim="ordinary")
boot.ci(res, conf=0.95, type=c("basic","norm", "perc","stud"), var.t0=var(res.boot$t[,1]))
# Intervals by hand (t0: estimate, t: bootstrapped)
quantile.CI <- quantile(t,probs=c(0.025,0.975))
norm<-c(t0-qnorm(0.975)*sd(t),t0+qnorm(0.975)*sd(t))
reversed.CI <- t0-quantile(t-t0,probs=c(0.975,0.025))
# Parametric Bootstrap
# f1 is the bootstrap function: args (data)
# f2 returns a random dataset: args (data, mle)
res.boot <- boot(data, f1, R=1000, ran.gen=f2, sim="parametric",  mle=1/mean(x1))
\end{codebox}