#' --- #' title: "Regression and Other Stories: Residuals" #' author: "Andrew Gelman, Jennifer Hill, Aki Vehtari" #' date: "`r format(Sys.Date())`" #' output: #' html_document: #' theme: readable #' toc: true #' toc_depth: 2 #' toc_float: true #' code_download: true #' --- #' Plotting the data and fitted model. See Chapter 11 in Regression #' and Other Stories. #' #' ------------- #' #+ setup, include=FALSE knitr::opts_chunk$set(message=FALSE, error=FALSE, warning=FALSE, comment=NA) # switch this to TRUE to save figures in separate files savefigs <- FALSE #' #### Load packages library("rprojroot") root<-has_file(".ROS-Examples-root")$make_fix_file() library("rstanarm") #' ## Simple model with const term, one pre-treatment predictor, and treatment indicator #' #' #### Fake data N <- 100 x <- runif(N, 0, 1) z <- sample(c(0, 1), N, replace=TRUE) a <- 1 b <- 2 theta <- 5 sigma <- 2 y <- a + b*x + theta*z + rnorm(N, 0, sigma) fake <- data.frame(x=x, y=y, z=z) #' #### Model fit <- stan_glm(y ~ x + z, data=fake, refresh = 0) #' #### Plot Predictor vs Outcome #+ eval=FALSE, include=FALSE if (savefigs) pdf(root("Residuals/figs","resid_0a.pdf"), height=3.5, width=9) #+ par(mfrow=c(1,2), mar=c(3,3,2,2), mgp=c(1.7,.5,0), tck=-.01) for (i in 0:1){ plot(range(x), range(y), type="n", xlab="Pre-treatment predictor, x", ylab="Outcome, y", main=paste("z =", i), bty="l") points(x[z==i], y[z==i], pch=20+i) abline(coef(fit)["(Intercept)"] + coef(fit)["z"]*i, coef(fit)["x"]) } #+ eval=FALSE, include=FALSE if (savefigs) dev.off() #' ## More complicated model with multiple pre-treatment predictors. #' #' #### Fake data #' #' Creating the linear predictor from the fitted multiple regression #' model N <- 100 K <- 10 X <- array(runif(N*K, 0, 1), c(N, K)) z <- sample(c(0, 1), N, replace=TRUE) a <- 1 b <- 1:K theta <- 10 sigma <- 5 y <- a + X %*% b + theta*z + rnorm(N, 0, sigma) fake <- data.frame(X=X, y=y, z=z) #' #### Model fit <- stan_glm(y ~ X + z, data=fake, refresh = 0) #' #### Plot Predictor vs Outcome #+ eval=FALSE, include=FALSE if (savefigs) pdf(root("Residuals/figs","resid_0b.pdf"), height=4, width=9) #+ y_hat <- predict(fit) par(mfrow=c(1,2), mar=c(3,3,2,2), mgp=c(1.7,.5,0), tck=-.01) par(mfrow=c(1,2), pty="s") for (i in 0:1){ plot(range(y_hat,y), range(y_hat,y), type="n", xlab=expression(paste("Linear predictor, ", hat(y))), ylab="Outcome, y", main=paste("z =", i), bty="l") points(y_hat[z==i], y[z==i], pch=20+i) abline(0, 1) } #+ eval=FALSE, include=FALSE if (savefigs) dev.off() #' #### Plot Predictor vs Residual #+ eval=FALSE, include=FALSE if (savefigs) pdf(root("Residuals/figs","resid_0c.pdf"), height=3.5, width=9) #+ r <- y - y_hat par(mfrow=c(1,2), mar=c(3,3,2,2), mgp=c(1.7,.5,0), tck=-.01) par(mfrow=c(1,2)) for (i in 0:1){ plot(range(y_hat), range(r), type="n", xlab=expression(paste("Linear predictor, ", hat(y))), ylab="Residual, r", main=paste("z =", i), bty="l") points(y_hat[z==i], r[z==i], pch=20+i) abline(0, 0) } #+ eval=FALSE, include=FALSE if (savefigs) dev.off()