# balance function after 2019

balance <- function (rawdata, treat, matched, estimand="ATT")
                     #factor = TRUE) 
{
  # rawdata: the full covariate dataset 
  # treat: the vector of treatment assignments for the full dataset
  # matched: vector of weights to apply to the full dataset to create the 
  # restructured data:  
  # --for matching without replacement these will all be 0's and 1's
  # --for one-to-one matching with replacement these will all be non-negative 
  #   integers
  # --for IPTW or more complicated matching methods these could be any 
  #   non-negative numbers
  # estimand: can either be ATT, ATC, or ATE
  require("Hmisc")
  if(missing(rawdata)) stop("rawdata is required")
    if(missing(matched)) stop("argument matched is required")
      if(missing(treat)) stop("treatment vector (treat) is required")
        cat("Balance diagnostics assume that the estimand is the",estimand,"\n")
  #
  #raw.dat <- data.frame(rawdata, treat = treat)
    covnames <- colnames(rawdata)
    if (is.null(covnames)){
        cat("No covariate names provided.  Generic names will be generated.")
        covnames = paste("v",c(1:ncol(rawdata)),sep="")
    }
    K <- length(covnames) 
    diff.means <- matrix(NA, K, 5)
    var.t <- numeric(K)
    var.c <- numeric(K)
    std.denom <- numeric(K)
    binary <- rep(1,K)
  #
  # First we calculate balance on the RAW DATA
  # Columns are (1) treat mean, (2) control mean, (3) diff in means, (4) abs std diff,
  # (5) ratio of sds
    for (i in 1:K) {
    # separate means by group
        diff.means[i, 1] <- mean(rawdata[treat==1, i]) 
        diff.means[i, 2] <- mean(rawdata[treat==0, i])
    # separate variances by group == only used as input to calculations below
        var.t[i] <- var(rawdata[(treat == 1), i])
        var.c[i] <- var(rawdata[(treat == 0), i])
    # denominator in standardized difference calculations
        if(estimand=="ATE"){std.denom[i] <- sqrt((var.t[i]+var.c[i])/2)}
        else{
          std.denom[i] <- ifelse(estimand=="ATT",sqrt(var.t[i]),sqrt(var.c[i]))
      }
    # difference in means
      diff.means[i, 3] <- diff.means[i, 1] - diff.means[i, 2]
    # standardized difference in means (sign intact)
      diff.means[i, 4] <- abs(diff.means[i, 3]/std.denom[i])
      if(length(unique(rawdata[,covnames[i]]))>2){
          binary[i] = 0
      }
  }
    #ifelse(estimand="ATT",sqrt(var.c[i]/var.t[i]),sqrt(var.t[i]/var.c[i]))
#  dimnames(diff.means) <- list(covnames[-(K + 1)], c("treat", "control", "unstd.diff", 
#                                                     "abs.std.diff", "ratio"))
#  diff.means[is.na(diff.means)] = "--"  #maybe only worry about in print function
  dimnames(diff.means) <- list(covnames, c("treat", "control", "unstd.diff", 
     "abs.std.diff", "ratio"))
  # Now we calculate balance on the restructured data
  diff.means.matched = matrix(NA, K, 5)
  #
  for (i in 1:K) {
    wts0 <- matched[treat==0]
    # separate means by group
    diff.means.matched[i, 1] <- mean(rawdata[treat == 1, i])
    diff.means.matched[i, 2] <- weighted.mean(rawdata[treat==0, i],w=wts0)
    # separate variances by group == only used as input to calculations below
    # these overwrite the variance above
    var.t[i] <- var(rawdata[treat == 1, i])
    var.c[i] <- wtd.var(rawdata[treat==0,i],weights=wts0)
    # difference in means
    diff.means.matched[i, 3] <- diff.means.matched[i, 1] - diff.means.matched[i, 2]
    # absolute standardized difference in means (denominator is stolen from
    # calculations on raw data above)
    diff.means.matched[i, 4] <- abs(diff.means.matched[i, 3])/std.denom[i]
    if(length(unique(rawdata[,covnames[i]]))>2){
      # just for binary
      # ratio of sds (treat over control:  should we change to comparison over inferential)
      diff.means.matched[i, 5] <- sqrt(var.c[i]/var.t[i])
  }
}
  #dimnames(diff.means.matched) <- list(covnames[-(K + 1)], c("treat", "control", "unstd.diff", 
  #                                                            "abs.std.diff", "ratio"))
dimnames(diff.means.matched) <- list(covnames, c("treat", "control", "unstd.diff", 
 "abs.std.diff", "ratio"))
  #
out <- list(diff.means.raw = diff.means, diff.means.matched = diff.means.matched, 
    covnames = covnames, binary = binary)
class(out) <- "balance"
return(out)
}


print.balance <- function(x, ..., combined=FALSE, digits= 2)
{
   if(combined==FALSE){
      cat("Balance Statistics for Unmatched Data\n")
      cat("--\n")
      print(round(x$diff.means.raw, digits=digits))
      cat("--\n")
      cat("\n")
      cat("Balance Statistics for Matched Data\n")
      cat("--\n")
      print(round(x$diff.means.matched, digits=digits), na.print="--")
      cat("--\n")
      cat("\n")
  }
  else{
   cat("Balance Statistics\n")
   cat("--\n") 
   print(round(cbind(x$diff.means.raw,x$diff.matched.raw)[,c(4,9,5,10)], 
     digits=digits), na.print="--")
}
}

### NEXT NEED TO FIGURE OUT HOW TO REVERSE THE ORDER OF THE COVARIATES

plot.balance <- function(x, longcovnames=NULL, which.covs="mixed",
    v.axis=TRUE, cex.main=1, cex.vars=1, cex.pts=1,
    mar=c(4, 3, 5.1, 2), plot=TRUE, x.max = NULL,...)
{
    # if which.covs = mixed then it plots all as std diffs
    # if which.covs = binary it only plots binary and as abs unstd diffs
    # if which.covs = cont it only plots non-binary and as abs std diffs
    #  
 
    covnames <- x$covnames
    if(!is.null(x.max)){
        x.range = c(0,x.max)
    }
    # if(which.covs=="binary") {
    #   cat("condition satisfied \n")
    # }

    # if plotting all, then use the standardized diff for all
    if(which.covs == "mixed"){
        pts <-  x$diff.means.raw[,4]                    # before matched.dat
        pts2 <- x$diff.means.matched[,4]                  # after matched
        K <- length(pts)
        idx <- 1:K
        main="Absolute Standardized Difference in Means"
    }
    #if plotting just binary use the unstandardized difference
    # for the plot make it the absolute value of
    if(which.covs == "binary"){
        pts <-  abs(x$diff.means.raw[x$binary==TRUE,3])      # before matched.dat
        pts2 <- abs(x$diff.means.matched[x$binary==TRUE,3])  # after matched
        K <- length(pts)
        idx <- 1:K
        main="Absolute Difference in Means"
        covnames = covnames[x$binary==TRUE]
    }
    #if plotting just continuous use the standardized difference
    if(which.covs == "cont"){
        pts <-  x$diff.means.raw[x$binary==FALSE,4]      # before matched
        pts2 <- x$diff.means.matched[x$binary==FALSE,4]  # after matched
        K <- length(pts)
        idx <- 1:K
        main="Absolute Standardized Difference in Means"
        covnames = covnames[x$binary==FALSE]
    }
    cat(pts,"\n")
    # tune the graphic console
    #par (mar=mar, mgp=mgp, oma=oma, tcl=tcl)

    par(mar = mar)
    if (is.null(longcovnames)) {
        longcovnames <- covnames
        maxchar <- max(sapply(longcovnames, nchar))
    }
    else {
        maxchar <- max(sapply(longcovnames, nchar))
    }
    min.mar <- par("mar")
    mar[2] <- max(min.mar[2], trunc(mar[2] + maxchar/10)) + mar[2] + 0.5
    par(mar = mar)

    ## now reverse the order of everything so the plot proceeds from
    ## to top to bottom with respect to original ordering of variables
    pts = rev(pts)
    pts2 = rev(pts2)
    longcovnames = rev(longcovnames)
    
    if(plot){
        # plot the estimates
        if(is.null(x.max)){
            plot(c(pts,pts2), c(idx,idx),
                xlim=c(0, max(c(pts,pts2))),
                bty="n", xlab="", ylab="",
                xaxt="n", yaxt="n", type="n",
                main=main, cex.main=cex.main)
        }
        if(!is.null(x.max)){
            plot(c(pts,pts2), c(idx,idx),
                bty="n", xlab="", ylab="",
                xaxt="n", yaxt="n", type="n",
                xlim=x.range,
                main=main, cex.main=cex.main)
        }
        abline(v=0, lty=2)
        points(pts, idx, cex=cex.pts)          # before matched
        points(pts2, idx, pch=19, cex=cex.pts) # after matched
        if (v.axis){
            axis(3)
        }
        if (is.null(longcovnames)){
            axis(2, at=1:K, labels=covnames[1:K],
                las=2, hadj=1, lty=0, cex.axis=cex.vars)
        }
        else{
            axis(2, at=1:K, labels=longcovnames[1:K],
                las=2, hadj=1, lty=0, cex.axis=cex.vars)
        }
    }
    else{
        plot(c(pts,pts2), c(idx,idx),
            bty="n", xlab="", ylab="",
            xaxt="n", yaxt="n", #xaxs="i",
            #yaxs="i",
            type="n", axes=FALSE,
            #ylim=c(max(idx)+.25, min(idx)-.25),
            #xlim=x.range,
            main="", cex.main=cex.main,...)
    }
    return(list("raw"=pts, "matched"=pts2))
}