#' ---
#' title: "Regression and Other Stories: Residuals"
#' author: "Andrew Gelman, Jennifer Hill, Aki Vehtari"
#' date: "`r format(Sys.Date())`"
#' output:
#'   html_document:
#'     theme: readable
#'     toc: true
#'     toc_depth: 2
#'     toc_float: true
#'     code_download: true
#' ---

#' Plotting the data and fitted model. See Chapter 11 in Regression
#' and Other Stories.
#' 
#' -------------
#' 

#+ setup, include=FALSE
knitr::opts_chunk$set(message=FALSE, error=FALSE, warning=FALSE, comment=NA)
# switch this to TRUE to save figures in separate files
savefigs <- FALSE

#' #### Load packages
library("rprojroot")
root<-has_file(".ROS-Examples-root")$make_fix_file()
library("rstanarm")

#' ## Simple model with const term, one pre-treatment predictor, and treatment indicator
#'
#' #### Fake data
N <- 100
x <- runif(N, 0, 1)
z <- sample(c(0, 1), N, replace=TRUE)
a <- 1
b <- 2
theta <- 5
sigma <- 2
y <- a + b*x + theta*z +  rnorm(N, 0, sigma)
fake <- data.frame(x=x, y=y, z=z)
#' #### Model
fit <- stan_glm(y ~ x + z, data=fake, refresh = 0)

#' #### Plot Predictor vs Outcome
#+ eval=FALSE, include=FALSE
if (savefigs) pdf(root("Residuals/figs","resid_0a.pdf"), height=3.5, width=9)
#+
par(mfrow=c(1,2), mar=c(3,3,2,2), mgp=c(1.7,.5,0), tck=-.01)
for (i in 0:1){
  plot(range(x), range(y), type="n", xlab="Pre-treatment predictor, x", ylab="Outcome, y", main=paste("z =", i), bty="l")
  points(x[z==i], y[z==i], pch=20+i)
  abline(coef(fit)["(Intercept)"] + coef(fit)["z"]*i, coef(fit)["x"])
}
#+ eval=FALSE, include=FALSE
if (savefigs) dev.off()

#' ## More complicated model with multiple pre-treatment predictors.
#'
#' #### Fake data
#' 
#' Creating the linear predictor from the fitted multiple regression
#' model
N <- 100
K <- 10
X <- array(runif(N*K, 0, 1), c(N, K))
z <- sample(c(0, 1), N, replace=TRUE)
a <- 1
b <- 1:K
theta <- 10
sigma <- 5
y <- a + X %*% b + theta*z +  rnorm(N, 0, sigma)
fake <- data.frame(X=X, y=y, z=z)
#' #### Model
fit <- stan_glm(y ~ X + z, data=fake, refresh = 0)

#' #### Plot Predictor vs Outcome
#+ eval=FALSE, include=FALSE
if (savefigs) pdf(root("Residuals/figs","resid_0b.pdf"), height=4, width=9)
#+
y_hat <- predict(fit)
par(mfrow=c(1,2), mar=c(3,3,2,2), mgp=c(1.7,.5,0), tck=-.01)
par(mfrow=c(1,2), pty="s")
for (i in 0:1){
  plot(range(y_hat,y), range(y_hat,y), type="n", xlab=expression(paste("Linear predictor, ", hat(y))), ylab="Outcome, y", main=paste("z =", i), bty="l")
  points(y_hat[z==i], y[z==i], pch=20+i)
abline(0, 1)
}
#+ eval=FALSE, include=FALSE
if (savefigs) dev.off()

#' #### Plot Predictor vs Residual
#+ eval=FALSE, include=FALSE
if (savefigs) pdf(root("Residuals/figs","resid_0c.pdf"), height=3.5, width=9)
#+
r <- y - y_hat
par(mfrow=c(1,2), mar=c(3,3,2,2), mgp=c(1.7,.5,0), tck=-.01)
par(mfrow=c(1,2))
for (i in 0:1){
  plot(range(y_hat), range(r), type="n", xlab=expression(paste("Linear predictor, ", hat(y))), ylab="Residual, r", main=paste("z =", i), bty="l")
  points(y_hat[z==i], r[z==i], pch=20+i)
  abline(0, 0)
}
#+ eval=FALSE, include=FALSE
if (savefigs) dev.off()