--- title: "Regression and Other Stories: Unemployment" author: "Andrew Gelman, Jennifer Hill, Aki Vehtari" date: "`r format(Sys.Date())`" output: html_document: theme: readable toc: true toc_depth: 2 toc_float: true code_download: true --- Time series fit and posterior predictive model checking for unemployment series. See Chapter 11 in Regression and Other Stories. ------------- ```{r setup, include=FALSE} knitr::opts_chunk$set(message=FALSE, error=FALSE, warning=FALSE, comment=NA) # switch this to TRUE to save figures in separate files savefigs <- FALSE ``` #### Load packages ```{r } library("rprojroot") root<-has_file(".ROS-Examples-root")$make_fix_file() library("rstanarm") library("ggplot2") library("bayesplot") theme_set(bayesplot::theme_default(base_family = "sans")) ``` ```{r eval=FALSE, include=FALSE} # grayscale figures for the book if (savefigs) color_scheme_set(scheme = "gray") ``` #### Load data ```{r } unemp <- read.table(root("Unemployment/data","unemp.txt"), header=TRUE) head(unemp) ``` #### Plot the unemployment rate ```{r eval=FALSE, include=FALSE} if (savefigs) pdf(root("Unemployment/figs","unemployment1.pdf"), height=3, width=4.5) ``` ```{r } par(mar=c(3,3,1,.1), mgp=c(1.7,.5,0), tck=-.01) plot(unemp$year, unemp$y, type="l", ylab="Unemployment rate", xlab="Year", yaxs="i", ylim=c(0, max(unemp$y)*1.05), xaxt="n", yaxt="n", bty="l") axis(1, seq(1950,2010,10), rep("",7)) axis(1, seq(1950,2010,20)) axis(2, seq(0,10), rep("",11)) axis(2, c(0,5,10), paste (c(0,5,10), "%", sep="")) ``` ```{r eval=FALSE, include=FALSE} if (savefigs) dev.off() ``` #### Fit a 1st-order autogregression ```{r } n <- nrow(unemp) unemp$y_lag <- c(NA, unemp$y[1:(n-1)]) fit_lag <- stan_glm(y ~ y_lag, data=unemp, refresh=0) print(fit_lag, digits=2) ``` #### Simulate replicated datasets using posterior predict ```{r } y_rep <- posterior_predict(fit_lag) y_rep <- cbind(unemp$y[1], y_rep) n_sims <- nrow(y_rep) ``` #### Simulate replicated datasets "manually" ```{r } sims <- as.matrix(fit_lag) n_sims <- nrow(sims) y_rep <- array(NA, c(n_sims, n)) for (s in 1:n_sims){ y_rep[s,1] <- unemp$y[1] for (t in 2:n){ y_rep[s,t] <- sims[s,"(Intercept)"] + sims[s,"y_lag"] * y_rep[s,t-1] + rnorm(1, 0, sims[s,"sigma"]) } } ``` #### Plot the simulated unemployment rate series ```{r eval=FALSE, include=FALSE} if (savefigs) pdf(root("Unemployment/figs","unemployment2.pdf"), height=4.5, width=7.5) ``` ```{r } par(mar=c(1,1,3,.1), mgp=c(2,.5,0), tck=-.01) par(mfrow=c(3,5)) for (s in sort(sample(n_sims, 15))){ plot (unemp$year, y_rep[s,], type="l", ylab="", xlab="", yaxs="i", ylim=c(0, max(unemp$y)*1.05), xaxt="n", yaxt="n", bty="l", main=paste("Simulation", s)) axis(1, seq(1950,2010,10), rep("",7)) axis(2, seq(0,10), rep("",11)) } ``` ```{r eval=FALSE, include=FALSE} if (savefigs) dev.off() ``` #### Numerical posterior predictive check ```{r } test <- function (y){ n <- length(y) y_lag <- c(NA, y[1:(n-1)]) y_lag_2 <- c(NA, NA, y[1:(n-2)]) return(sum(sign(y-y_lag) != sign(y_lag-y_lag_2), na.rm=TRUE)) } test_y <- test(unemp$y) test_rep <- apply(y_rep, 1, test) print(mean(test_rep > test_y)) print(quantile(test_rep, c(.1,.5,.9))) ``` #### Plot test statistic for data and histogram of test statistics for replications ```{r } ppc_stat(y=unemp$y, yrep=y_rep, stat=test, binwidth = 1) + scale_y_continuous(breaks=NULL) ```