---
title: "Regression and Other Stories: Unemployment"
author: "Andrew Gelman, Jennifer Hill, Aki Vehtari"
date: "`r format(Sys.Date())`"
output:
  html_document:
    theme: readable
    toc: true
    toc_depth: 2
    toc_float: true
    code_download: true
---
Time series fit and posterior predictive model checking for
unemployment series. See Chapter 11 in Regression and Other
Stories.

-------------


```{r setup, include=FALSE}
knitr::opts_chunk$set(message=FALSE, error=FALSE, warning=FALSE, comment=NA)
# switch this to TRUE to save figures in separate files
savefigs <- FALSE
```

#### Load packages

```{r }
library("rprojroot")
root<-has_file(".ROS-Examples-root")$make_fix_file()
library("rstanarm")
library("ggplot2")
library("bayesplot")
theme_set(bayesplot::theme_default(base_family = "sans"))
```
```{r eval=FALSE, include=FALSE}
# grayscale figures for the book
if (savefigs) color_scheme_set(scheme = "gray")
```

#### Load data

```{r }
unemp <- read.table(root("Unemployment/data","unemp.txt"), header=TRUE)
head(unemp)
```

#### Plot the unemployment rate

```{r eval=FALSE, include=FALSE}
if (savefigs) pdf(root("Unemployment/figs","unemployment1.pdf"), height=3, width=4.5)
```
```{r }
par(mar=c(3,3,1,.1), mgp=c(1.7,.5,0), tck=-.01)
plot(unemp$year, unemp$y, type="l", ylab="Unemployment rate", xlab="Year", yaxs="i",
  ylim=c(0, max(unemp$y)*1.05), xaxt="n", yaxt="n", bty="l")
axis(1, seq(1950,2010,10), rep("",7))
axis(1, seq(1950,2010,20))
axis(2, seq(0,10), rep("",11))
axis(2, c(0,5,10), paste (c(0,5,10), "%", sep=""))
```
```{r eval=FALSE, include=FALSE}
if (savefigs) dev.off()
```

#### Fit a 1st-order autogregression

```{r }
n <- nrow(unemp)
unemp$y_lag <- c(NA, unemp$y[1:(n-1)])
fit_lag <- stan_glm(y ~ y_lag, data=unemp, refresh=0)
print(fit_lag, digits=2)
```

#### Simulate replicated datasets using posterior predict

```{r }
y_rep <- posterior_predict(fit_lag)
y_rep <- cbind(unemp$y[1], y_rep)
n_sims <- nrow(y_rep)
```

#### Simulate replicated datasets "manually"

```{r }
sims <- as.matrix(fit_lag)
n_sims <- nrow(sims)
y_rep <- array(NA, c(n_sims, n))
for (s in 1:n_sims){
  y_rep[s,1] <- unemp$y[1]
  for (t in 2:n){
    y_rep[s,t] <- sims[s,"(Intercept)"] + sims[s,"y_lag"] * y_rep[s,t-1] + rnorm(1, 0, sims[s,"sigma"])
  }
}
```

#### Plot the simulated unemployment rate series

```{r eval=FALSE, include=FALSE}
if (savefigs) pdf(root("Unemployment/figs","unemployment2.pdf"), height=4.5, width=7.5)
```
```{r }
par(mar=c(1,1,3,.1), mgp=c(2,.5,0), tck=-.01)
par(mfrow=c(3,5))
for (s in sort(sample(n_sims, 15))){
  plot (unemp$year, y_rep[s,], type="l", ylab="", xlab="", yaxs="i",
  ylim=c(0, max(unemp$y)*1.05), xaxt="n", yaxt="n", bty="l", main=paste("Simulation", s))
  axis(1, seq(1950,2010,10), rep("",7))
  axis(2, seq(0,10), rep("",11))
}
```
```{r eval=FALSE, include=FALSE}
if (savefigs) dev.off()
```

#### Numerical posterior predictive check

```{r }
test <- function (y){
  n <- length(y)
  y_lag <- c(NA, y[1:(n-1)])
  y_lag_2 <- c(NA, NA, y[1:(n-2)])
  return(sum(sign(y-y_lag) != sign(y_lag-y_lag_2), na.rm=TRUE))
}
test_y <- test(unemp$y)
test_rep <- apply(y_rep, 1, test)
print(mean(test_rep > test_y))
print(quantile(test_rep, c(.1,.5,.9)))
```

#### Plot test statistic for data and histogram of test statistics for replications

```{r }
ppc_stat(y=unemp$y, yrep=y_rep, stat=test, binwidth = 1) + scale_y_continuous(breaks=NULL)
```