--- title: "Regression and Other Stories: Pearson and Lee Heights" author: "Andrew Gelman, Jennifer Hill, Aki Vehtari" date: "`r format(Sys.Date())`" --- The heredity of height. Published in 1903 by Karl Pearson and Alice Lee. See Chapter 6 in Regression and Other Stories. ------------- ```{r setup, include=FALSE} knitr::opts_chunk$set(message=FALSE, error=FALSE, warning=FALSE, comment=NA) # switch this to TRUE to save figures in separate files savefigs <- FALSE ``` **Load packages** ```{r } library("rprojroot") root<-has_dirname("ROS-Examples")$make_fix_file() library("rstanarm") library("HistData") ``` **Load data** ```{r } heights <- read.table(root("PearsonLee/data","Heights.txt"), header=TRUE) daughter_height <- heights$daughter_height mother_height <- heights$mother_height n <- length(mother_height) ``` **Linear regression** ```{r } # MCMC sampling ``` ```{r results='hide'} fit_1 <- stan_glm(daughter_height ~ mother_height, data = heights, refresh = 0) ``` ```{r } print(fit_1, digits=2) ab_hat <- coef(fit_1) ``` **Plot mothers' and daughters' heights** ```{r eval=FALSE, include=FALSE} if (savefigs) pdf(root("PearsonLee/figs","PearsonLee1.pdf"), height=4.5, width=4.5) ``` ```{r } par(mar=c(3, 3, 2, 1), mgp=c(1.7, .5, 0), tck=-.01) par(pty="s") rng <- range(mother_height, daughter_height) plot(mother_height, daughter_height, xlab="Mother's height (inches)", ylab="Adult daughter's height (inches)", bty="l", xlim=rng, ylim=rng, xaxt="n", yaxt="n", pch=20, cex=.5) x <- seq(48, 84, 6) axis(1, x) axis(2, x) for (i in x){ abline(h=i, col="gray70", lty=2) abline(v=i, col="gray70", lty=2) } ``` ```{r eval=FALSE, include=FALSE} if (savefigs) dev.off() ``` **Plot mothers' and daughters' heights with jitter** ```{r eval=FALSE, include=FALSE} if (savefigs) pdf(root("PearsonLee/figs","PearsonLee2.pdf"), height=4.5, width=4.5) ``` ```{r } mother_height_jitt <- mother_height + runif(n, -0.5, 0.5) daughter_height_jitt <- daughter_height + runif(n, -0.5, 0.5) par(mar=c(3, 3, 2, 1), mgp=c(1.7, .5, 0), tck=-.01) par(pty="s") rng <- range(mother_height, daughter_height) plot(mother_height_jitt, daughter_height_jitt, xlab="Mother's height (inches)", ylab="Adult daughter's height (inches)", bty="l", xlim=rng, ylim=rng, xaxt="n", yaxt="n", pch=20, cex=.2) x <- seq(48, 84, 6) axis(1, x) axis(2, x) for (i in x){ abline(h=i, col="gray70", lty=2) abline(v=i, col="gray70", lty=2) } ``` ```{r eval=FALSE, include=FALSE} if (savefigs) dev.off() ``` **Plot mothers' and daughters' heights and fitted regression line** ```{r eval=FALSE, include=FALSE} if (savefigs) pdf(root("PearsonLee/figs","PearsonLee3a.pdf"), height=4.5, width=4.5) ``` ```{r } par(mar=c(3, 3, 2, .1), mgp=c(2, .5, 0), tck=-.01) par(pty="s") rng <- range(mother_height, daughter_height) plot(mother_height_jitt, daughter_height_jitt, xlab="Mother's height (inches)", ylab="Adult daughter's height (inches)", bty="l", xlim=c(rng[1], rng[2]), ylim=rng, xaxt="n", yaxt="n", pch=20, cex=.2) x <- seq(48, 84, 6) axis(1, x) axis(2, x) for (i in x){ abline(h=i, col="gray70", lty=2) abline(v=i, col="gray70", lty=2) } abline(ab_hat[1], ab_hat[2], lwd=3, col="white") abline(ab_hat[1], ab_hat[2], lwd=1.5) points(mean(mother_height), mean(daughter_height), pch=20, cex=2, col="white") mtext("Mothers' and daughters' heights,\naverage of data, and fitted regression line", side=3, line=0) ``` ```{r eval=FALSE, include=FALSE} if (savefigs) dev.off() ``` **Plot fitted regression line and the average of the data** ```{r eval=FALSE, include=FALSE} if (savefigs) pdf(root("PearsonLee/figs","PearsonLee3b.pdf"), height=4.5, width=4.5) ``` ```{r } par(mar=c(3, 3, 2, .1), mgp=c(2, .5, 0), tck=-.01) par(pty="s") rng <- range(mother_height, daughter_height) plot(mother_height_jitt, daughter_height_jitt, xlab="Mother's height (inches)", ylab="Adult daughter's height (inches)", bty="l", xlim=c(rng[1], rng[2]), ylim=rng, xaxt="n", yaxt="n", pch=20, cex=.2, type="n") x <- seq(54, 72, 6) axis(1, x) axis(2, x) abline(ab_hat[1], ab_hat[2], lwd=3, col="white") abline(ab_hat[1], ab_hat[2], lwd=1.5) lines(rep(mean(mother_height), 2), c(0, mean(daughter_height)), lwd=.5) lines(c(0, mean(mother_height)), rep(mean(daughter_height), 2), lwd=.5) axis(1, mean(mother_height), round(mean(mother_height), 1)) axis(2, mean(daughter_height), round(mean(daughter_height), 1)) text(68, 64, paste("y =", round(ab_hat[1]), "+", round(ab_hat[2], 2), "x")) text(63, 62, paste("Equivalently, y = ", round(mean(daughter_height), 1), " + ", round(ab_hat[2], 2), " * (x - ", round(mean(mother_height), 1), ")", sep="")) points(mean(mother_height), mean(daughter_height), pch=20, cex=2) mtext("The fitted regression line and the average of the data ", side=3, line=1) ``` ```{r eval=FALSE, include=FALSE} if (savefigs) dev.off() ``` **Plot fitted regression line** ```{r eval=FALSE, include=FALSE} if (savefigs) pdf(root("PearsonLee/figs","PearsonLee4a.pdf"), height=4, width=4.5) ``` ```{r } par(mar=c(3, 3, 2, .1), mgp=c(2, .5, 0), tck=-.01) plot(c(0, 100), c(0, 100), xlab="", ylab="", xaxt="n", yaxt="n", bty="n", type="n") abline(h=0) abline(v=0) axis(2, round(ab_hat[1]), tck=0, las=1) axis(1, 0, tck=0, las=1, line=-.4) axis(2, 0, tck=0, las=1) abline(ab_hat[1], ab_hat[2], lwd=2) text(40, 40, paste("slope", round(ab_hat[2], 2))) mtext(paste("The line, y =", round(ab_hat[1]), "+", round(ab_hat[2], 2), "x"), side=3, line=0) ``` ```{r eval=FALSE, include=FALSE} if (savefigs) dev.off() ``` **Plot data and fitted regression line in the context of the data** ```{r eval=FALSE, include=FALSE} if (savefigs) pdf(root("PearsonLee/figs","PearsonLee4b.pdf"), height=4, width=4.5) ``` ```{r } par(mar=c(3, 3, 2, .1), mgp=c(2, .5, 0), tck=-.01) plot(c(0, 100), c(0, 100), xlab="", ylab="", xaxt="n", yaxt="n", bty="n", type="n") abline(h=0) abline(v=0) axis(2, round(ab_hat[1]), tck=0, las=1) points(mother_height_jitt, daughter_height_jitt, pch=20, cex=.2) abline(ab_hat[1], ab_hat[2], lwd=3, col="white") abline(ab_hat[1], ab_hat[2], lwd=1.5) axis(1, 0, tck=0, las=1, line=-.4) axis(2, 0, tck=0, las=1) axis(1, mean(mother_height), round(mean(mother_height), 1), tck=0, las=1, line=-.4) axis(2, mean(daughter_height), round(mean(daughter_height), 1), tck=0, las=1, line=-.7) lines(rep(mean(mother_height), 2), c(0, mean(daughter_height)), lwd=.5) lines(c(0, mean(mother_height)), rep(mean(daughter_height), 2), lwd=.5) text(40, 43, paste("slope", round(ab_hat[2], 2)), cex=.9) mtext(paste("The line, y =", round(ab_hat[1]), "+", round(ab_hat[2], 2), "x, in the context of the data"), side=3, line=0) ``` ```{r eval=FALSE, include=FALSE} if (savefigs) dev.off() ```