---
title: "Regression and Other Stories: Storable"
author: "Andrew Gelman, Jennifer Hill, Aki Vehtari"
date: "`r format(Sys.Date())`"
output:
  html_document:
    theme: readable
    toc: true
    toc_depth: 2
    toc_float: true
    code_download: true
---
Ordered categorical data analysis with a study from experimental
economics, on the topic of ``storable votes''. See Chapter 15 in
Regression and Other Stories.

-------------


```{r setup, include=FALSE}
knitr::opts_chunk$set(message=FALSE, error=FALSE, warning=FALSE, comment=NA)
# switch this to TRUE to save figures in separate files
savefigs <- FALSE
```

#### Load packages

```{r }
library("rprojroot")
root<-has_file(".ROS-Examples-root")$make_fix_file()
library("rstan")
rstan_options(auto_write = TRUE)
library("rstanarm")
invlogit<-plogis
```

#### Load data

```{r }
data_2player <- read.csv(root("Storable/data","2playergames.csv"))
data_3player <- read.csv(root("Storable/data","3playergames.csv"))
data_6player <- read.csv(root("Storable/data","6playergames.csv"))
data_all <- rbind(data_2player, data_3player, data_6player)
data_all$factor_vote <- factor(data_all$vote, levels = c(1, 2, 3), labels = c("1", "2", "3"), ordered=TRUE)
head(data_all)
```

#### Simple analysis using data from just one person

```{r }
data_401 <- subset(data_2player, person == 401, select = c("vote", "value"))
data_401$factor_vote <- factor(data_401$vote, levels = c(1, 2, 3), labels = c("1", "2", "3"), ordered=TRUE)
head(data_401)
fit_1 <- stan_polr(factor_vote ~ value, data = data_401,
                   prior = R2(0.3, "mean"), refresh = 0)
print(fit_1, digits=2)
```

#### 6 people

```{r }
plotted <- c(101, 303, 409, 405, 504, 112)
story <- c("Perfectly monotonic",
           "One fuzzy and one sharp cutpoint",
           "Monotonic with one outlier",
           "Only 1's and 3's",
           "Almost only 3's",
           "Erratic")
n_plotted <- length(plotted)
data <- as.list(rep(NA, n_plotted))
fit <- as.list(rep(NA, n_plotted))
for (i in 1:n_plotted){
  #ok <- data_all[,"person"]==plotted[i]
  data[[i]] <- subset(data_all, person == plotted[i], 
                      select = c("vote", "factor_vote", "value"))
    fit[[i]] <- stan_polr(factor_vote ~ value, data=data[[i]],
                          prior=R2(0.3, "mean"), refresh = 0,
                          cores = 1, open_progress = FALSE,
                          adapt_delta = 0.9999)
}
```

#### Graph

```{r eval=FALSE, include=FALSE}
if (savefigs) pdf(root("Storable/figs","sampledata4.pdf"), height=5, width=8)
```
```{r }
par(mfrow=c(2,3), mgp=c(1.5,.5,0), tck=-.01)
for (i in 1:n_plotted){
  sims <- as.matrix(fit[[i]])
  n_cutpoints <- 2
  cutpoints <- rep(NA, n_cutpoints)
  for (i_cut in 1:n_cutpoints){
    cutpoints[i_cut] <- median(sims[,i_cut+1]/sims[,1])
  }
  s <- median(1/sims[,1])
  plot(data[[i]][,"value"], data[[i]][,"vote"], xlim=c(0,100), ylim=c(1,3),
        xlab="Value", ylab="Vote", main=story[i], yaxt="n")
  axis (2, 1:(n_cutpoints+1))
  temp <- seq(0, 100, 0.1)
  prob <- array(NA, c(length(temp), n_cutpoints+1))
  expected <- rep(NA, length(temp))
  prob[,1] <- 1 - invlogit((temp-cutpoints[1])/s)
  expected <- 1*prob[,1]
  for (i_cut in 2:n_cutpoints){
    prob[,i_cut] <- invlogit((temp-cutpoints[i_cut-1])/s) -
      invlogit((temp-cutpoints[i_cut])/s)
    expected <- expected + i_cut*prob[,i_cut]
  }
  prob[,n_cutpoints+1] <- invlogit((temp-cutpoints[n_cutpoints])/s)
  expected <- expected + (n_cutpoints+1)*prob[,n_cutpoints+1]
  lines (temp, expected, lwd=.5)
  for (i_cut in 1:n_cutpoints){
    lines(rep(cutpoints[i_cut],2), i_cut+c(0,1), lwd=.5)
  }
}
```
```{r eval=FALSE, include=FALSE}
if (savefigs) dev.off()
```