##' Depth importance measure
##' 
##' This function constructs random forest to calculate depth importance measure of variables.
##' This function utilizes package snowfall to construct trees in parallel.
##' 
##' @param pheno A vector indicates case-control status.
##' @param geno A matrix containing SNPs data, each column corresponding to single SNP,
##' and the column name is the SNP ID
##' @param num.trees Number of trees constructed in the random forest, default number is 1000.
##' @param num.var.boot Number of variables considered in each tree construction,
##' default number is one-fifth of the total number of SNPs.
##' @return A numeric vector giving the depth importance measure of each SNP.
##' @author Jianchang Hu
##' @examples 
##' set.seed(12345 + 987)
##' n_sample <- 2000
##' n_var <- 200
##' maf <- runif(n_var, 0.05, 0.45)
##' x <- sapply(maf, function(snp.maf){sample(x=c(2,1,0), n_sample, replace=TRUE,
##' prob=c(snp.maf^2, 2* snp.maf *(1-snp.maf), (1-snp.maf)^2))})
##' colnames(x) <- paste0("X_", seq(n_var))
##' 
##' log.odds <- -1 + (x[, 2] > 0)
##' prob.case <- exp(log.odds) / (1 + exp(log.odds))
##' Y <- sapply(prob.case, rbinom, n = 1, size = 1)
##' table(Y)
##' 
##' \dontrun{
##' library(diTARV)
##' di.measure <- di_cal(Y, x)
##' }
##' @export
di_cal <- function(pheno, geno, num.trees = 1000, num.var.boot = NULL){

  num.snps <- dim(geno)[2]
  all.var <- colnames(geno)
  num.trees <- num.trees
  if(is.null(num.var.boot)){
    num.var.boot <- floor(num.snps / 5)
  } else {
    num.var.boot <- num.var.boot
  }  
  
  wrapper <- function(x){
    var.idx <- sample(seq(num.snps), num.var.boot, replace = F) # snps to include in a bootstrap sample
    geno.boot <- geno[, var.idx]

    data.boot <- cbind(pheno, geno.boot)
    colnames(data.boot)[1] <- "affected"
    
    tree.t <- tryCatch(
      {tree_build(data.boot, method = "entropy", is_prune = F) # build tree w/o pruning
      },
      error=function(cond){
        NA
      },
      warning=function(cond){
        NA
      }
    )
    
    if(!is.na(tree.t)){
      chi.t <- tree.t$chi
      depth.t <- depth_find(tree.t)
      spvl.t <- tree.t$spvl
      di.score.node <- 2^(-depth.t) * chi.t # score for each node
      di.score.node <- di.score.node[!is.na(spvl.t)]
      depth.t <- depth.t[!is.na(spvl.t)]
      chi.t <- chi.t[!is.na(spvl.t)]
      
      split.var.t <- split_var_extract(tree.t)
      res.t <- data.frame(split.var.t, di.score.node, depth.t, chi.t, stringsAsFactors=FALSE)
      res.t <- res.t[depth.t<=7, ]
      
      split.var.uniq <- unique(res.t[,1])
      di.score.tree <- sapply(split.var.uniq, function(x){ idx <- which(res.t[,1]==x); sum(res.t[idx, 2]) })
      res.tree <- data.frame(split.var.uniq, di.score.tree, stringsAsFactors=FALSE)
      
      return(res.tree)
    }

    return(NULL)

  }
  
  # library(snowfall)
  snowfall::sfInit(parallel = T, cpus = 4)
  snowfall::sfExport("num.snps", "num.var.boot", "geno", "pheno")
  
  index <- seq(num.trees)
  
  start.time <- Sys.time()
  res.test <- snowfall::sfLapply(index, wrapper)
  print(Sys.time() - start.time)
  
  snowfall::sfStop()
  
  res.test <- res.test[!sapply(res.test, is.null)]
  
  di.all <- rep(0, num.snps)
  num.selected <- rep(0, num.snps)
  
  for(t in seq_along(res.test)){
    idx <- sapply(res.test[[t]][, 1], function(x){which(all.var==x)})
    if(!is.list(idx)){
      di.all[idx] <- di.all[idx] + res.test[[t]][, 2]
      num.selected[idx] <- num.selected[idx] + 1
    }
  }
  
  di.all <- di.all / pmax(1, num.selected)
  return(di.all)

}
