---
title: "Regression and Other Stories: SimpleCausal"
author: "Andrew Gelman, Jennifer Hill, Aki Vehtari"
date: "`r format(Sys.Date())`"
output:
  html_document:
    theme: readable
    toc: true
    toc_depth: 2
    toc_float: true
    code_download: true
---
Simple graphs illustrating regression for causal inference. See
Chapter 1 in Regression and Other Stories.

The simulated data depends on the random seed, and thus the plots
and numbers here and in the book may differ. You can experiment
with the simulation variation by changing the seed.

-------------


```{r setup, include=FALSE}
knitr::opts_chunk$set(message=FALSE, error=FALSE, warning=FALSE, comment=NA)
# switch this to TRUE to save figures in separate files
savefigs <- FALSE
```

#### Load packages

```{r }
library("rprojroot")
root<-has_file(".ROS-Examples-root")$make_fix_file()
library("arm")
# for reproducibility of simulated data
SEED <- 1151
```

## Simulated data from linear model

```{r }
set.seed(SEED)
N <- 50
x <- runif(N, 1, 5)
y <- rnorm(N, 10 + 3*x, 3)
x_binary <- ifelse(x<3, 0, 1)
data <- data.frame(N, x, y, x_binary)
```

#### Regression with binary predictor

```{r }
lm_1a <- lm(y ~ x_binary, data = data)
display(lm_1a)
```

#### Plots

```{r eval=FALSE, include=FALSE}
if (savefigs) pdf(root("SimpleCausal/figs","overview_1a.pdf"), height=3.5, width=4.5)
```
```{r }
par(mar=c(3, 3, 1.5, 1), mgp=c(1.7, .5, 0), tck=-.01)
plot(x_binary, y, xlab="", ylab="Outcome measurement", pch=20, cex=.5, bty="l", main="Regression with binary treatment", cex.main=.9, xaxt="n", cex.lab=.9, cex.axis=.9)
axis(1, c(0,1), c("    Control", "Treatment    "), cex.axis=.9)
abline(coef(lm_1a)[1], coef(lm_1a)[2])
text(0.3, 13, paste("Estimated treatment effect is\nslope of fitted line: ", fround(coef(lm_1a)[2], 1)), cex=.8, adj=0)
```
```{r eval=FALSE, include=FALSE}
if (savefigs) dev.off()
```

#### Regression with continuous predictor

```{r }
lm_1b <- lm(y ~ x, data = data)
display(lm_1b)

```
```{r eval=FALSE, include=FALSE}
if (savefigs) pdf(root("SimpleCausal/figs","overview_1b.pdf"), height=3.5, width=4.5)
```
```{r }
par(mar=c(3, 3, 1.5, 1), mgp=c(1.7, .5, 0), tck=-.01)
plot(x, y, xlab="Treatment level", ylab="Outcome measurement", pch=20, cex=.5, bty="l", main="Regression with continuous treatment", cex.main=.9, cex.lab=.9, cex.axis=.9)
abline(coef(lm_1b)[1], coef(lm_1b)[2])
text(3.2, 15, paste("Estimated treatment\neffect per unit of x is\nslope of fitted line: ", fround(coef(lm_1b)[2], 1)), cex=.8, adj=0)
```
```{r eval=FALSE, include=FALSE}
if (savefigs) dev.off()
```

## Simulated data from nonlinear model

```{r }
set.seed(SEED)
y <- rnorm(N, 5 + 30*exp(-x), 2)
data$y <- y
```

#### Classical regression with continuous predictor

```{r }
lm_2a <- lm(y ~ x, data = data)
display(lm_2a)

```
```{r eval=FALSE, include=FALSE}
if (savefigs) pdf(root("SimpleCausal/figs","overview_2a.pdf"), height=3.5, width=4.5)
```
```{r }
par(mar=c(3, 3, 1.5, 1), mgp=c(1.7, .5, 0), tck=-.01)
plot(x, y, xlab="Treatment level", ylab="Outcome measurement", pch=20, cex=.5, bty="l", main="Nonlinear treatment effect", cex.main=.9, cex.lab=.9, cex.axis=.9)
curve(5 + 30*exp(-x), add=TRUE)
```
```{r eval=FALSE, include=FALSE}
if (savefigs) dev.off()

```
```{r eval=FALSE, include=FALSE}
if (savefigs) pdf(root("SimpleCausal/figs","overview_2b.pdf"), height=3.5, width=4.5)
```
```{r }
par(mar=c(3, 3, 1.5, 1), mgp=c(1.7, .5, 0), tck=-.01)
plot(x, y, xlab="Treatment level", ylab="Outcome measurement", pch=20, cex=.5, bty="l", main="Nonlinear effect, estimated with straight line fit", cex.main=.9, cex.lab=.9, cex.axis=.9)
abline(coef(lm_2a)[1], coef(lm_2a)[2])
```
```{r eval=FALSE, include=FALSE}
if (savefigs) dev.off()
```

## Simulated data from two groups

```{r }
set.seed(SEED)
N <- 100
xx <- rnorm(N, 0, 1)^2
z <- rep(0:1, N/2)
xx <- ifelse(z==0, rnorm(N, 0, 1.2)^2, rnorm(N, 0, .8)^2)
yy <- rnorm(N, 20 + 5*xx + 10*z, 3)
data <- data.frame(xx, z, yy)
lm_2 <- lm(yy ~ xx + z, data=data)
display(lm_2)

```
```{r eval=FALSE, include=FALSE}
if (savefigs) pdf(root("SimpleCausal/figs","overview_3.pdf"), height=3.5, width=4.5)
```
```{r }
par(mar=c(3, 3, 1.5, 1), mgp=c(1.7, .5, 0), tck=-.01)
plot(xx, yy, xlab="Pre-treatment predictor", ylab="Outcome measurement", bty="l", main="Continuous pre-treatment predictor and binary treatment    ", cex.main=.9, cex.lab=.9, cex.axis=.9, type="n")
points(xx[z==0], yy[z==0], pch=20, cex=.5)
points(xx[z==1], yy[z==1], pch=1, cex=1)
abline(coef(lm_2)[1], coef(lm_2)[2])
abline(coef(lm_2)[1] + coef(lm_2)[3], coef(lm_2)[2])
text(2.3, 29.5, "Controls", cex=.9, adj=0)
text(1.5, 45, "Treated", cex=.9, adj=0)
x0 <- 5.2
arrows(x0, coef(lm_2)[1] + coef(lm_2)[2]*x0, x0, coef(lm_2)[1] + coef(lm_2)[2]*x0 + coef(lm_2)[3], length=.1, code=3)
text(x0+.15, 1 + coef(lm_2)[1] + coef(lm_2)[2]*x0 + .5*coef(lm_2)[3], paste("Estimated\ntreatment\neffect is", fround(coef(lm_2)[3], 1)), cex=.8, adj=0)
```
```{r eval=FALSE, include=FALSE}
if (savefigs) dev.off()

for (j in 0:1) print(mean(yy[z==j]))
for (j in 0:1) print(mean(xx[z==j]))
```