--- title: "Regression and Other Stories: Storable" author: "Andrew Gelman, Jennifer Hill, Aki Vehtari" date: "`r format(Sys.Date())`" output: html_document: theme: readable toc: true toc_depth: 2 toc_float: true code_download: true --- Ordered categorical data analysis with a study from experimental economics, on the topic of ``storable votes''. See Chapter 15 in Regression and Other Stories. ------------- ```{r setup, include=FALSE} knitr::opts_chunk$set(message=FALSE, error=FALSE, warning=FALSE, comment=NA) # switch this to TRUE to save figures in separate files savefigs <- FALSE ``` #### Load packages ```{r } library("rprojroot") root<-has_file(".ROS-Examples-root")$make_fix_file() library("rstan") rstan_options(auto_write = TRUE) library("rstanarm") invlogit<-plogis ``` #### Load data ```{r } data_2player <- read.csv(root("Storable/data","2playergames.csv")) data_3player <- read.csv(root("Storable/data","3playergames.csv")) data_6player <- read.csv(root("Storable/data","6playergames.csv")) data_all <- rbind(data_2player, data_3player, data_6player) data_all$factor_vote <- factor(data_all$vote, levels = c(1, 2, 3), labels = c("1", "2", "3"), ordered=TRUE) head(data_all) ``` #### Simple analysis using data from just one person ```{r } data_401 <- subset(data_2player, person == 401, select = c("vote", "value")) data_401$factor_vote <- factor(data_401$vote, levels = c(1, 2, 3), labels = c("1", "2", "3"), ordered=TRUE) head(data_401) fit_1 <- stan_polr(factor_vote ~ value, data = data_401, prior = R2(0.3, "mean"), refresh = 0) print(fit_1, digits=2) ``` #### 6 people ```{r } plotted <- c(101, 303, 409, 405, 504, 112) story <- c("Perfectly monotonic", "One fuzzy and one sharp cutpoint", "Monotonic with one outlier", "Only 1's and 3's", "Almost only 3's", "Erratic") n_plotted <- length(plotted) data <- as.list(rep(NA, n_plotted)) fit <- as.list(rep(NA, n_plotted)) for (i in 1:n_plotted){ #ok <- data_all[,"person"]==plotted[i] data[[i]] <- subset(data_all, person == plotted[i], select = c("vote", "factor_vote", "value")) fit[[i]] <- stan_polr(factor_vote ~ value, data=data[[i]], prior=R2(0.3, "mean"), refresh = 0, cores = 1, open_progress = FALSE, adapt_delta = 0.9999) } ``` #### Graph ```{r eval=FALSE, include=FALSE} if (savefigs) pdf(root("Storable/figs","sampledata4.pdf"), height=5, width=8) ``` ```{r } par(mfrow=c(2,3), mgp=c(1.5,.5,0), tck=-.01) for (i in 1:n_plotted){ sims <- as.matrix(fit[[i]]) n_cutpoints <- 2 cutpoints <- rep(NA, n_cutpoints) for (i_cut in 1:n_cutpoints){ cutpoints[i_cut] <- median(sims[,i_cut+1]/sims[,1]) } s <- median(1/sims[,1]) plot(data[[i]][,"value"], data[[i]][,"vote"], xlim=c(0,100), ylim=c(1,3), xlab="Value", ylab="Vote", main=story[i], yaxt="n") axis (2, 1:(n_cutpoints+1)) temp <- seq(0, 100, 0.1) prob <- array(NA, c(length(temp), n_cutpoints+1)) expected <- rep(NA, length(temp)) prob[,1] <- 1 - invlogit((temp-cutpoints[1])/s) expected <- 1*prob[,1] for (i_cut in 2:n_cutpoints){ prob[,i_cut] <- invlogit((temp-cutpoints[i_cut-1])/s) - invlogit((temp-cutpoints[i_cut])/s) expected <- expected + i_cut*prob[,i_cut] } prob[,n_cutpoints+1] <- invlogit((temp-cutpoints[n_cutpoints])/s) expected <- expected + (n_cutpoints+1)*prob[,n_cutpoints+1] lines (temp, expected, lwd=.5) for (i_cut in 1:n_cutpoints){ lines(rep(cutpoints[i_cut],2), i_cut+c(0,1), lwd=.5) } } ``` ```{r eval=FALSE, include=FALSE} if (savefigs) dev.off() ```