#' ---
#' title: "Regression and Other Stories: Fake midterm and final exam"
#' author: "Andrew Gelman, Jennifer Hill, Aki Vehtari"
#' date: "`r format(Sys.Date())`"
#' output:
#'   html_document:
#'     theme: readable
#'     toc: true
#'     toc_depth: 2
#'     toc_float: true
#'     code_download: true
#' ---

#' Fake dataset of 1000 students' scores on a midterm and final
#' exam. See Chapter 6 in Regression and Other Stories.
#' 
#' -------------
#' 

#+ setup, include=FALSE
knitr::opts_chunk$set(message=FALSE, error=FALSE, warning=FALSE, comment=NA)
# switch this to TRUE to save figures in separate files
savefigs <- FALSE

#' #### Load packages
library("rstanarm")

#' #### Simulate fake data
n <- 1000
# set the random seed to get reproducible results
# change the seed to experiment with variation due to random noise
set.seed(2243)
true_ability <- rnorm(n, 50, 10)
noise_1 <- rnorm(n, 0, 10)
noise_2 <- rnorm(n, 0, 10)
midterm <- true_ability + noise_1
final <- true_ability + noise_2
exams <- data.frame(midterm, final)

#' #### Linear regression
#' 
#' The option `refresh = 0` supresses the default Stan sampling
#' progress output. This is useful for small data with fast
#' computation. For more complex models and bigger data, it can be
#' useful to see the progress.
fit_1 <- stan_glm(final ~ midterm, data=exams, refresh = 0)
print(fit_1, digits=2)

#' #### Plot midterm and final exam scores
#+ eval=FALSE, include=FALSE
if (savefigs) pdf(here("FakeMidtermFinal/figs","FakeMidtermFinal1.pdf"), height=4, width=4)
#+
par(mar=c(3, 3, 2, 1), mgp=c(1.7, .5, 0), tck=-.01)
par(pty="s")
plot(midterm, final, xlab="Midterm exam score", ylab="Final exam score", xlim=c(0,100), ylim=c(0,100), xaxs="i", yaxs="i", xaxt="n", yaxt="n", pch=20, cex=.5)
x <- seq(0,100,20)
axis(1, x)
axis(2, x)
for (i in x){
  abline(h=i, col="gray70", lty=2)
  abline(v=i, col="gray70", lty=2)
}
abline(coef(fit_1))
#+ eval=FALSE, include=FALSE
if (savefigs) dev.off()