#' --- #' title: "Regression and Other Stories: Sesame street" #' author: "Andrew Gelman, Jennifer Hill, Aki Vehtari" #' date: "`r format(Sys.Date())`" #' output: #' html_document: #' theme: readable #' toc: true #' toc_depth: 2 #' toc_float: true #' code_download: true #' --- #' Causal analysis of Sesame Street experiment. See Chapters 18 and 21 #' in Regression and Other Stories. #' #' ------------- #' #+ setup, include=FALSE knitr::opts_chunk$set(message=FALSE, error=FALSE, warning=FALSE, comment=NA) #' #### Load packages library("rprojroot") root<-has_file(".ROS-Examples-root")$make_fix_file() library("rstanarm") library("brms") #' Set random seed for reproducability SEED <- 1234 #' #### Load data sesame <- read.csv(root("Sesame/data","sesame.csv")) head(sesame) #' ## Compliance (sesame_tab <- table(sesame[,c('watched','encouraged')])) round(prop.table(sesame_tab, margin=2), digits=2) #' ## Wald estimator #' #' Estimate the intent-to-treat (ITT) effect of the instrument #' (encouragement) on the treatment (regular watching), that is, #' percentage of children actually induced to watch Sesame Street by #' the intervention itt_zt <- stan_glm(watched ~ encouraged, data=sesame, seed=SEED, refresh=0) print(itt_zt, digits=2) #' Estimate the intent-to-treat (ITT) estimate on the outcome #' (post-treatment letter identification) itt_zy <- stan_glm(postlet ~ encouraged, data=sesame, refresh=0) print(itt_zy, digits=1) #' Calculate Wald estimate, ie the ratio of the above two estimates wald_est <- coef(itt_zy)["encouraged"] / coef(itt_zt)["encouraged"] round(wald_est, digits=1) #' ## Two stage approach #' #' #### Predict the "treatment" variable on the randomized instrument #' #' The first step is to regress the "treatment" variable---an #' indicator for regular watching (watched)---on the randomized #' instrument, encouragement to watch (encouraged). fit_2a <- stan_glm(watched ~ encouraged, data=sesame, seed=SEED, refresh=0) print(fit_2a, digits=2) summary(fit_2a$fitted, digits=2) sesame$watched_hat <- fit_2a$fitted #' Then we plug predicted values of watched into the equation #' predicting the letter recognition outcome. fit_2b <- stan_glm(postlet ~ watched_hat, data=sesame, seed=SEED, refresh=0) print(fit_2b, digits = 1) #' ## Two stage approach with instrumental variables #' #' Two stage approach with adjusting for covariates in an instrumental variables framework. #' #' #### Predict the "treatment" variable on the randomized instrument and pre-treatment variables. #' #' The first step is to regress the "treatment" variable on the #' randomized instrument, encouragement to watch (encouraged) and #' pre-treatment variables. fit_3a <- stan_glm(watched ~ encouraged + prelet + as.factor(site) + setting, data=sesame, seed=SEED, refresh=0) print(fit_3a, digits=2) summary(fit_3a$fitted, digits=2) #' Then we plug predicted values of watched into the equation #' predicting the letter recognition outcome. watched_hat_3 <- fit_3a$fitted fit_3b <- stan_glm(postlet ~ watched_hat_3 + prelet + as.factor(site) + setting, data=sesame, seed=SEED, refresh=0) print(fit_3b, digits = 1) #' #### Estimate the standard errors #' #' Use the predictor matrix from this second-stage regression. X_adj <- X <- model.matrix(fit_3b) X_adj[,"watched_hat_3"] <- sesame$watched n <- nrow(X) p <- ncol(X) #' #### Compute the standard deviation of the adjusted residuals RMSE1 <- sqrt(sum((sesame$postlet - X %*% coef(fit_3b))^2)/(n-p)) RMSE2 <- sqrt(sum((sesame$postlet - X_adj %*% coef(fit_3b))^2)/(n-p)) se_adj <- se(fit_3b)["watched_hat_3"] * sqrt(RMSE1 / RMSE2) print(se_adj, digits=2) #' ## Two-stage approach with brms #' #' #### Predict the "treatment" variable on the randomized instrument f1 <- bf(watched ~ encour) f2 <- bf(postlet ~ watched) #+ results='hide' IV_brm_a <- brm(f1 + f2, data=sesame, seed=SEED) #+ print(IV_brm_a, digits=1) #' #### Incorporate other pre-treatment variables as controls f1 <- bf(watched ~ encour + prelet + setting + factor(site)) f2 <- bf(postlet ~ watched + prelet + setting + factor(site)) #+ results='hide' IV_brm_b <- brm(f1 + f2, data=sesame, seed=SEED) #+ print(IV_brm_b, digits=1)