---
title: "Regression and Other Stories: Residuals"
author: "Andrew Gelman, Jennifer Hill, Aki Vehtari"
date: "`r format(Sys.Date())`"
output:
  html_document:
    theme: readable
    toc: true
    toc_depth: 2
    toc_float: true
    code_download: true
---
Plotting the data and fitted model. See Chapter 11 in Regression
and Other Stories.

-------------


```{r setup, include=FALSE}
knitr::opts_chunk$set(message=FALSE, error=FALSE, warning=FALSE, comment=NA)
# switch this to TRUE to save figures in separate files
savefigs <- FALSE
```

#### Load packages

```{r }
library("rprojroot")
root<-has_file(".ROS-Examples-root")$make_fix_file()
library("rstanarm")
```

## Simple model with const term, one pre-treatment predictor, and treatment indicator

#### Fake data

```{r }
N <- 100
x <- runif(N, 0, 1)
z <- sample(c(0, 1), N, replace=TRUE)
a <- 1
b <- 2
theta <- 5
sigma <- 2
y <- a + b*x + theta*z +  rnorm(N, 0, sigma)
fake <- data.frame(x=x, y=y, z=z)
```

#### Model

```{r }
fit <- stan_glm(y ~ x + z, data=fake, refresh = 0)
```

#### Plot Predictor vs Outcome

```{r eval=FALSE, include=FALSE}
if (savefigs) pdf(root("Residuals/figs","resid_0a.pdf"), height=3.5, width=9)
```
```{r }
par(mfrow=c(1,2), mar=c(3,3,2,2), mgp=c(1.7,.5,0), tck=-.01)
for (i in 0:1){
  plot(range(x), range(y), type="n", xlab="Pre-treatment predictor, x", ylab="Outcome, y", main=paste("z =", i), bty="l")
  points(x[z==i], y[z==i], pch=20+i)
  abline(coef(fit)["(Intercept)"] + coef(fit)["z"]*i, coef(fit)["x"])
}
```
```{r eval=FALSE, include=FALSE}
if (savefigs) dev.off()
```

## More complicated model with multiple pre-treatment predictors.

#### Fake data

Creating the linear predictor from the fitted multiple regression
model

```{r }
N <- 100
K <- 10
X <- array(runif(N*K, 0, 1), c(N, K))
z <- sample(c(0, 1), N, replace=TRUE)
a <- 1
b <- 1:K
theta <- 10
sigma <- 5
y <- a + X %*% b + theta*z +  rnorm(N, 0, sigma)
fake <- data.frame(X=X, y=y, z=z)
```

#### Model

```{r }
fit <- stan_glm(y ~ X + z, data=fake, refresh = 0)
```

#### Plot Predictor vs Outcome

```{r eval=FALSE, include=FALSE}
if (savefigs) pdf(root("Residuals/figs","resid_0b.pdf"), height=4, width=9)
```
```{r }
y_hat <- predict(fit)
par(mfrow=c(1,2), mar=c(3,3,2,2), mgp=c(1.7,.5,0), tck=-.01)
par(mfrow=c(1,2), pty="s")
for (i in 0:1){
  plot(range(y_hat,y), range(y_hat,y), type="n", xlab=expression(paste("Linear predictor, ", hat(y))), ylab="Outcome, y", main=paste("z =", i), bty="l")
  points(y_hat[z==i], y[z==i], pch=20+i)
abline(0, 1)
}
```
```{r eval=FALSE, include=FALSE}
if (savefigs) dev.off()
```

#### Plot Predictor vs Residual

```{r eval=FALSE, include=FALSE}
if (savefigs) pdf(root("Residuals/figs","resid_0c.pdf"), height=3.5, width=9)
```
```{r }
r <- y - y_hat
par(mfrow=c(1,2), mar=c(3,3,2,2), mgp=c(1.7,.5,0), tck=-.01)
par(mfrow=c(1,2))
for (i in 0:1){
  plot(range(y_hat), range(r), type="n", xlab=expression(paste("Linear predictor, ", hat(y))), ylab="Residual, r", main=paste("z =", i), bty="l")
  points(y_hat[z==i], r[z==i], pch=20+i)
  abline(0, 0)
}
```
```{r eval=FALSE, include=FALSE}
if (savefigs) dev.off()
```