##' Build a tree
##' 
##' This function builds the tree and output the chi-square statistics for each node.
##' The function depends on an Rcpp wrapper of "cctree" and R function "trim_ctree"
##' 
##' @param data The input data for classification tree.
##' The first column gives the class label and the rest columns are covariates.
##' @param method criteria for measuring the impurity of nodes in the tree.
##' @param alpha a parameter used for tree construction.
##' @param cost a parameter used for making prediction with tree model.
##' @param is_prune a binary indicator to indicates whether the tree will be pruned or not.
##' @return A ctree class object containing tree information and the chi-square stat for each node.
##' @author Jianchang Hu
##' @examples
##' set.seed(12345 + 987)
##' n_sample <- 2000
##' n_var <- 200
##' maf <- runif(n_var, 0.05, 0.45)
##' x <- sapply(maf, function(snp.maf){sample(x=c(2,1,0), n_sample, replace=TRUE, 
##' prob=c(snp.maf^2, 2* snp.maf *(1-snp.maf), (1-snp.maf)^2))})
##' colnames(x) <- paste0("X_", seq(n_var))
##' 
##' log.odds <- -1 + (x[, 2] > 0)
##' prob.case <- exp(log.odds) / (1 + exp(log.odds))
##' Y <- sapply(prob.case, rbinom, n = 1, size = 1)
##' 
##' data <- cbind(Y, x)
##' test.tree <- tree_build(data, method = "entropy") # build the tree for a data set
##' test.chi <- test.tree$chi # extract the chi-square stat for each node
##' @export
tree_build <- function(data, method = c("entropy", "gini"), alpha = 0.01, cost = NULL, is_prune=TRUE)
{
      cl <- match.call()
      if(missing(data)) stop("'data' argument is required")
      if(missing(method)) method <- "entropy"
      
      if(method=="entropy"){
            srule <- 1
      }else if(method=="gini"){
            srule <- 2
      }else{
            stop("wrong 'method'")
      }
      index <- which(sapply(data, class)=="factor")
      colname <- rep(1, ncol(data))
      colname[1] <- -1
      for(i in index){
            colname[i] <- length(unique(data[, i]))
      }
      if(is.null(cost)) cost <- rep(1, length(unique(data[, 1])))
      else if(length(cost)!=length(unique(data[, 1]))){
            stop("the length of 'cost' does not match the input data")
      }
      output <- cctree(data, colname, srule, alpha, is_prune)
      out <- trim_ctree(output)
      
      names(out$dt) <- 1:out$nnd
      names(out$pt) <- 1:out$nnd
      names(out$spv) <- 1:out$nnd
      names(out$chi) <- 1:out$nnd
      colnames(out$spvl) <- 1:out$nnd
      colnames(out$final_counts) <- paste0("count_", 1:ncol(out$final_counts))
      out$final_counts <- data.frame(out$final_counts)
      names(out$varcatg) <- 1:length(out$varcatg)
      
      out <- c(out, method = list(method), call = list(cl), learning.data = list(data))
      class(out) <- "ctree"
      out
}
