## ----include = FALSE----------------------------------------------------------
is_check <- ("CheckExEnv" %in% search()) || any(c("_R_CHECK_TIMINGS_",
             "_R_CHECK_LICENSE_") %in% names(Sys.getenv()))
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"#,eval = !is_check
)

## ----setup--------------------------------------------------------------------
library(ezECM)

## ----datagen------------------------------------------------------------------
data_gen <- function(p = NULL, K = 3, Ntest = NULL, Ntrain = NULL, 
                     Ntrain_missing = NULL, tst_missing = NULL, trn_missing = NULL){
  
    mu_use <- matrix(rnorm(n = p*K, sd = 0.5), ncol = K, nrow = p)
    Y <- list()
    S <- list()
    
    ## random number in each class
    
    N <- LaplacesDemon::rcat(n = Ntest + Ntrain + Ntrain_missing, 
                             p = rep(1/3, times = K))
    N <- as.vector(table(N))
    
    ## random very important category
    
    vic <- sample(1:K, size = 1)
    
    for(k in 1:K){
      
      ## random number of blocks in kth category
      
      nblocks <- sample(1:2, size = 1)
      
      ## random covariance matrix for kth category
      
      S[[k]] <- LaplacesDemon::rinvwishart(nu = p + 4, S = diag(p))
      
      
      ## If relevant, delete entries to form block covariance matrices of random sizes
      
      if(nblocks == 2){
        nblock1 <- sample(1:(p-1), size = 1)
        block1_members <- sample(1:p, size = nblock1, replace = FALSE)
        block2_members <- (1:p)[-block1_members]
        
        zero_elements <- expand.grid(block1_members, block2_members)
        
        S[[k]][zero_elements$Var1, zero_elements$Var2] <- 0
        S[[k]][zero_elements$Var2, zero_elements$Var1] <- 0
        
      }
      
      ## data for kth class is drawn from a MVN, given the mean and covariance
      Y[[k]] <- as.data.frame(LaplacesDemon::rmvn(n = N[k], mu = mu_use[,k], Sigma= S[[k]]))
      
      ## data is transformed to (0,1) using the logistic function to run a check
      Ytemp<- 1/(1+ exp(-Y[[k]]))
      
      ## if machine precision causes the output of the logistic function to round to 1, the experiment is stopped
      ## this has never happened
      if(max(apply(Ytemp,2,function(X){range(X)})) == 1){
        stop()
      }else{
        Y[[k]]<- 1/(1+ exp(-Y[[k]]))
      }
      
      ## a column for the event class is appended to the data.frame
      Y[[k]] <- cbind(Y[[k]], as.character(k))
      names(Y[[k]]) <- c(paste("p",as.character(1:p),  sep = ""), "event")
      
    }
    
    Y <- do.call("rbind", Y)
    
    ## A random sample of the data is taken to be the testing set
    test_index <- sample(1:nrow(Y), size = Ntest)
    testing <- Y[test_index,]
    
    ## The remainder is slated for training
    training <- Y[-test_index, ]
    
    ## A random sample of the training set is set aside to have missing entries,
    ## the remainder is set aside to be the fully observed training set
    train_full_index <- sample(1:nrow(training), size = Ntrain)
    train_missing <- training[-train_full_index, ]
    train_full <- training[train_full_index, ]
    
    ## If all event categories are not represented in the training set of full observations,
    ## the sampling scheme repeats
    if(any(table(train_full$event) <= 1) | length(table(train_full$event)) <= (K-1)){
      while(any(table(train_full$event) <= 1 | length(table(train_full$event)) <= (K-1))){
        test_index <- sample(1:nrow(Y), size = Ntest)
        
        testing <- Y[test_index,]
        training <- Y[-test_index, ]
        train_full_index <- sample(1:nrow(training), size = Ntrain)
        train_missing <- training[-train_full_index, ]
        train_full <- training[train_full_index, ]
      }
    }
    
    ## the true event category for the testing set is saved as a seperate variable,
    ## and deleted from the data.frame containing the data
    test_truth <- testing$event
    testing$event <- NULL
    
    ## Entries are randomly selected for deletion from the testing set.
    ## This scheme ensures a single discriminant for each observation
    ## not deleted in order to reduce computation time.
    
    abs_present <- sample(size = Ntest, 1:p, replace = TRUE)
    missing_pool <- matrix(1:p, ncol = p, nrow = Ntest, byrow = TRUE)
    
    missing_pool <- t(apply(cbind(missing_pool, abs_present), 1, function(X,pp){
      X[-c(X[pp + 1], pp +1)]
    }, pp = p))
    
    missing_pool_save <- missing_pool
    frac_missing <- (p*tst_missing)/(p-1)
    
    # sample which of the remaining elements will be missing
    missing_sample <- sample(1:(nrow(missing_pool)*ncol(missing_pool)), 
                             size = floor(nrow(missing_pool)*ncol(missing_pool)*(frac_missing)), 
                             replace = FALSE)
    
    missing_pool_save[missing_sample] <- NA
    
    saved_data <- apply(cbind(missing_pool_save, unname(abs_present)), 1, function(X){
      X[-which(is.na(X))]
    })
    
    for(j in 1:nrow(testing)){
      testing[j,-c(saved_data[[j]])] <- NA
    }
    
    ## Entries are randomly selected for deletion from the training set.
    ## This scheme ensures a single discriminant for each observation
    ## not deleted in order to reduce computation time.
    
    abs_present <- sample(size = Ntrain_missing, 1:p, replace = TRUE)
    abs_missing <- matrix(1:p, ncol = p, nrow = Ntrain_missing, byrow = TRUE)
    
    abs_missing <- t(apply(cbind(abs_missing, abs_present), 1, function(X,pp){
      X[-c(X[pp + 1], pp +1)]
    }, pp = p))
    
    abs_missing <- apply(abs_missing, 1, function(X){
      sample(X, size = 1)
    })
    
    missing_pool <- matrix(1:p, ncol = p, nrow = Ntrain_missing, byrow = TRUE)
    
    missing_pool <- t(apply(cbind(missing_pool, abs_present, abs_missing), 1, function(X,pp){
      X[-c(X[pp + 1], X[pp + 2], pp +1, pp + 2)]
    }, pp = p))
    
    missing_pool_save <- missing_pool
    frac_missing <- (p*trn_missing - 1)/(p-2)
    
    # sample which of the remaining elements will be missing
    missing_sample <- sample(1:(nrow(missing_pool)*ncol(missing_pool)), size = floor(nrow(missing_pool)*ncol(missing_pool)*(frac_missing)), replace = FALSE)
    
    
    missing_pool_save[missing_sample] <- NA
    
    saved_data <- apply(cbind(missing_pool_save, unname(abs_present)), 1, function(X){
      X[-which(is.na(X))]
    })
    
    for(j in 1:nrow(train_missing)){
      train_missing[j,-c(saved_data[[j]], p+1)] <- NA
    }
    
    return(list(Y = list(train_full = train_full, train_missing = train_missing, 
                         testing = testing, test_truth = test_truth), 
                params = list(mu = mu_use, Sig = S, vic = vic)))
    
}


## -----------------------------------------------------------------------------

## Data Parameters
tst_missing <- 0.5
trn_missing <- 0.5

Ntrain <- 25
Ntest <- 100
Ntrain_missing <- 5 * Ntrain

K <- 3

## -----------------------------------------------------------------------------
P <- c(4,6)
#P <- c(4,6,8,10)

## -----------------------------------------------------------------------------
alphatilde <- 0.05

## -----------------------------------------------------------------------------
mixture_weights <- "training"

## -----------------------------------------------------------------------------
C01 <- matrix(c(0,1,1, 0), ncol = 2, nrow = 2)
C01

## -----------------------------------------------------------------------------
Cfneg <- C01
Cfneg[1,2] <- 2
Cfneg

## -----------------------------------------------------------------------------
Ccat <- matrix(1, ncol = 3, nrow = 3) - diag(3)
Ccat

## -----------------------------------------------------------------------------
iters <- 3#250

## -----------------------------------------------------------------------------
## MCMC parameters
BT <- c(150, 2150)
#BT <- c(500, 50500)

## -----------------------------------------------------------------------------
thinning <- 5

## -----------------------------------------------------------------------------
## Experimental Parameters

verb <- FALSE
#verb <- TRUE

## -----------------------------------------------------------------------------

## Data structures for saving progress

cECM_recfp <- cECM_recfn <- bayes_rec <- cECM_rec <- matrix(NA, ncol = length(P), nrow = iters)
Nvic <- rep(0, times = length(P))

exp_out <- list()
method_template <- data.frame(matrix(NA, ncol = 3, nrow = iters))
names(method_template) <- c("accurate", "FN", "FP")
data_template <- data.frame(matrix(NA, ncol = 2, nrow = iters))
names(data_template) <- c("Ntest", "Nvic")
data_template$Ntest <- Ntest

p_template <- list(cECM = method_template, becm = method_template, 
                   mbecm = method_template, mbecm_Cfn = method_template, 
                   mbecm_cat = method_template, data = data_template)

bayes_rec <- cECM_rec <- matrix(NA, ncol = length(P), nrow = iters)

if(verb){
toc <- Sys.time()
}

## -----------------------------------------------------------------------------
## Iterates over the differing numbers of total discriminants set for the experiment.
for(p in P){
  
  ## Builds the list for saving the results using a template list
  exp_out[[p]] <- p_template
  
  ## Runs each model for `iters` number of independent data sets.
  for(i in 1:iters){
    ## The i^{th} run for p discriminants
    if(verb){    
      ## set the experimental parameter verb <- TRUE to print progress
      print(paste0("i = ", i, ", p = ", p, ", ", round(Sys.time() - toc, digits = 2), " ", units(Sys.time() - toc), " elapsed"))
    }
    
    ## Generate random data set
    
      Ylist <- data_gen(p = p, K = K, Ntest = Ntest, Ntrain = Ntrain, 
                     Ntrain_missing = Ntrain_missing, tst_missing = tst_missing, 
                    trn_missing = trn_missing)
      
      ## Saves the random data information to the environment
      
      train_full<- Ylist$Y$train_full
      train_missing <- Ylist$Y$train_missing
      testing <- Ylist$Y$testing
      test_truth <- Ylist$Y$test_truth
      ## Which category is the important one this time?
      vic <- Ylist$params$vic
    
    
    ## Save the true total number of `vic` events in the testing data to be used
    ## later for analyzing performance.
    
    exp_out[[p]][["data"]]$Nvic[i] <- sum(test_truth == as.character(vic))
    
    ## Fit the classical ECM model, apply the decision framework with the
    ## `cECM_decision()` function, then save the results
    
    cECM <- cECM(x = train_full, transform = TRUE, newdata = testing)

    cECM_out <-  apply(cECM_decision(pval = cECM, alphatilde = alphatilde,
                                 vic = as.character(vic), 
                                 cat_truth = test_truth)$events[,1:3] ,2, 
                       sum, na.rm = TRUE)

    exp_out[[p]][["cECM"]][i,] <- unname(cECM_out)
    
    ## Fit the B-ECM model, using only full p observations
    
    bayes_fit <- BayesECM(Y = train_full)
    
    ## Run the predict function on the testing set.
    ## If there were multiple testing sets, the same model fit could be used on
    ## each one without having to rerun the `BayesECM()` function.  This
    ## functionality is more important when using training data with missing
    ## entries.
    
    bayes_pred <- predict(bayes_fit, Ytilde = testing, 
                          mixture_weights = mixture_weights)
    
    ## The "becm_desision()" function applies the decision theoretic framework
    ## to the training and testing data.  For one training and one testing set,
    ## where the user wants to try different values of `alphatilde` and `C`, it is
    ## not necessary to rerun the `BayesECM()` function or the `predict()`
    ## function.
    
    becm_out <- becm_decision(bayes_pred = bayes_pred, alphatilde = alphatilde,
                                vic = as.character(vic), cat_truth = test_truth, 
                              pn = TRUE, C = C01)
    
    ## Summarize and save the data.
    
    becm_out <- apply(becm_out$results,2, sum, na.rm = TRUE)
    
    exp_out[[p]][["becm"]][i,] <- unname(becm_out)
    
    
    ## Fit and save the B-ECM model that includes missing data
    
    bayes_fit_missing <- BayesECM(Y = rbind(train_full, train_missing), BT = BT, 
                                  verb = verb)
    
    bayes_pred_missing <- predict(bayes_fit_missing, Ytilde = testing, 
                                  thinning = thinning, 
                                  mixture_weights = mixture_weights)
    
    missing_out <- becm_decision(bayes_pred = bayes_pred_missing, alphatilde = alphatilde,
                             vic = as.character(vic), cat_truth = test_truth, 
                             pn = TRUE, C = C01)
    mbecm_out <- apply(missing_out$results,2, sum, na.rm = TRUE)
    
    
    exp_out[[p]][["mbecm"]][i,] <- unname(mbecm_out)
    
    ## The rest of the B-ECM variants are different through decision theory,
    ## not the model fit.  All use partial observations for training.
    ## Note that the rej argument is supplied to becm_decision to reduce computation time
    
    ## Record the decision when the loss matrix is adjusted to target
    ## false negatives.
    
    Cfn_out <- becm_decision(bayes_pred = bayes_pred_missing, alphatilde = alphatilde,
                         vic = as.character(vic), cat_truth = test_truth, 
                         pn = TRUE, C = Cfneg, rej = missing_out$rej)
    becm_Cfn_out <- apply(Cfn_out$results,2, sum, na.rm = TRUE)
    
    
    exp_out[[p]][["mbecm_Cfn"]][i,] <- unname(becm_Cfn_out)
    
    ## Record the decisions when full class (K = 3) categorization is used
    ## instead of binary categorization
    
    cat_out <-  becm_decision(bayes_pred = bayes_pred_missing, alphatilde = alphatilde,
                          vic = as.character(vic), cat_truth = test_truth, 
                          pn = TRUE, C = Ccat, rej = missing_out$rej)
    becm_cat_out <- apply(cat_out$results,2, sum, na.rm = TRUE)
    
    
    exp_out[[p]][["mbecm_cat"]][i,] <- unname(becm_cat_out)

    
  }
  
}


## -----------------------------------------------------------------------------

ECM_boxplot <- function(exp_out, P = P, models = c("cECM", "becm", 
                                                   "mbecm", 
                                                   "mbecm_Cfn",
                                                   "mbecm_cat"),
                        cols = NULL,
                        legend_text =  c("C-ECM", "B-ECM", "M-B-ECM", 
                  bquote("M-B-ECM, " * C["1,2"] == 2), "M-B-ECM Cat"),
                  metric = "accurate"){
  
  if(metric == "accurate"){
    divisor <- function(exp_out, p){
      return(exp_out[[p]]$data$Ntest)
    }
    ylab <- "Model Accuracy"
  }else if(metric == "FN"){
    divisor <- function(exp_out,p){
      return(exp_out[[p]]$data$Nvic)
    }
    ylab <- "False Negative Rate"
  }else if(metric == "FP"){
    divisor <- function(exp_out, p){
      return(exp_out[[p]]$data$Ntest - exp_out[[p]]$data$Nvic)
    }
    ylab <- "False Positive Rate"
  }else{
    stop("Argument 'metric' must be one of the following case sensitive character strings: 'accurate', 'FN', or 'FP'.")
  }
   boxplotdf <- do.call("cbind", lapply(exp_out[[P[1]]][models], function(X, m = metric){
  X[[m]]
}))/divisor(exp_out, p = P[1])
  
   
  for(p in P[-1]){
  boxplotdf <- cbind(boxplotdf,do.call("cbind", lapply(exp_out[[p]][models], function(X, m = metric){
  X[[m]]
}))/divisor(exp_out, p = p))
  }
  
   boxplotdf <- boxplotdf
   
   if(max(boxplotdf) > 65){
     ylim <- c(min(boxplotdf), 1.1)
   }else{
    ylim <- range(boxplotdf) * c(0.9,1.2)
   }
   

if(is.null(cols)){
if(length(models) == 5){
pltcols <- hcl.colors(44, palette = "viridis", rev = TRUE)[c(3, 10, 20, 30, 37)]
}else if(length(models) > 10){
  warning("You should consider recoding this function with a different way to select the colors used for the plot.")
  pltcols <- hcl.colors(44, palette = "viridis", rev = TRUE)[seq(from = 2, to = 38, length.out = length(models))]
}else{
  pltcols <- hcl.colors(44, palette = "viridis", rev = TRUE)[seq(from = 2, to = 38, length.out = length(models))]
}
}else{
  if(length(cols) != length(models)){
    stop("If supplying colors, a vector the same length as the 'models' argument must be used.")
  }
}
   
opar <- par(no.readonly = TRUE)
on.exit(expr = suppressWarnings(par(opar)))
par(mar = c(4.25,3.85,1,0.5))
lmodels <- length(models)
graphics::boxplot(boxplotdf,  
                  at = (1:((lmodels + 1)*length(P)))[-seq(from = lmodels + 1, 
                                                          to = length(P)*(lmodels + 1), 
                                                          by = lmodels + 1)],
         xaxt = "n", yaxt = "n", ylim = ylim, 
        col = pltcols, xlab = "Number of Discriminants", ylab = ylab)
py <- pretty(boxplotdf)
graphics::axis(2, py)
graphics::axis(1, at =seq(from = 1, to = length(P)*(lmodels + 1), by = lmodels + 1) + 1, 
               labels =  paste0("p = ", P) )
graphics::legend("topleft", bty ="n", 
       legend = legend_text,  
       fill = pltcols, horiz = FALSE, ncol = 3, y.intersp = 1.25)
}


## -----------------------------------------------------------------------------
ECM_boxplot(exp_out = exp_out, P = P, metric = "accurate")

## -----------------------------------------------------------------------------
ECM_boxplot(exp_out = exp_out, P = P, metric = "FN")