#super scalable short segment (4S) detection algorithm
 
SSSS = function(x, center = FALSE, thresh = NULL, distance = 9, minlength = 3, inference = FALSE, pvthresh=0.05){ 
    if (center) x=x-median(x)                     # if center is TRUE, we center x by substracting its median
    #if (is.null(thresh)) thresh = 2*sd(x)         # if thresh is not given, we use 2*sd. We suggest that user should input a threshold
    if (is.null(thresh)) thresh = quantile(abs(x),0.95)
    n = length(x)
    ind = which(abs(x)>thresh)                    # Step 1: set of locations with measurements beyond the thresh 
    indsize = length(ind)
    indiff = ind[-1]-ind[-indsize]                # distant between to consecutive locations in ind
    indind = which(indiff<=distance+1)             # Step 2: thresholding the difference set; here `+1' to make it consistent with the paper
    #subint = partition(indind, minlength)          
    subint = partition2(indind)                    # Step 2: cluster the difference set
    
    if (is.null(subint)) return(NULL)
    NumCNV = length(subint)
    startpoint = endpoint = CNVlength = Mean = bk =rep(0,NumCNV)
    for (i in 1:length(subint)){
        tempint = ind[subint[[i]]]
        startpoint[i] = min(tempint)
        endpoint[i] = max(tempint)
        CNVlength[i] = endpoint[i]+1-startpoint[i]
        Mean[i] = mean(x[startpoint[i]:endpoint[i]])
        bk[i]   = length(subint[[i]])
    }
    object = data.frame(startpoint=startpoint, endpoint=endpoint,length=CNVlength,mean=Mean,bk=bk)
    # step 3 here
    if (minlength>1) {
       object=object[object$length>minlength,]
       rownames(object) = seq(nrow(object))
    }
    # addition step to calculate p-value
    if (inference){
       pvalue = rep(0,nrow(object))
       #pp = indsize/n                             # proportion of measurement beyond the threshold
       for (i in 1:nrow(object)){
        pvalue[i] = Bappr4(n,indsize,object$length[i],object$bk[i])
       }
       object$pvalue = pvalue
       if (!is.null(pvthresh)) {
          if (sum(object$pvalue<pvthresh)==0) return(NULL)
          object=object[object$pvalue<pvthresh,]; rownames(object) = seq(nrow(object))
       }
    }
    return(object)
}

 
 
partition2=function(indind){                   #input c(1:4, 6:7), output a list with 1:5; and 6:8
   subint=list(); listI =1 
   mm = max(indind)
   indcomp =c(1:mm)[-indind]                    #complement
   mc =  length(indcomp)
   for (i in 1:(mc-1)){
      if (indcomp[i]+1==indcomp[i+1]) next
      subint[[listI]]=c((indcomp[i]+1):indcomp[i+1])     #'gap' of complement is what we want
      listI = listI+1
   }
   subint[[listI]]=c((indcomp[mc]+1):(mm+1))            #last segment
   return(subint)
}
 
Bappr4 = function(n,m,s,t){
  return(min(1,m*phyper(t-1.1, m-1, n-m, s-1, lower.tail = FALSE, log.p = FALSE)))
}


#################################################################################
# find threshold percentile
#################################################################################
threshper = function(n,s=10,t=6,pv=0.05){
     pclow=0.6; pchigh=1
     for (i in 1:10){
        currentpc = (pclow+pchigh)/2
        m = n-floor(currentpc*n)
        pvalue = Bappr4(n,m,s,t)
        if (pvalue<pv) {pchigh=currentpc;next}
        pclow=currentpc
     }
     return(currentpc)
} 
 



