################################################################################
# TODO LIST
# TODO: ...

# NOTE: Column names used for calculations with data.table is declared
# in globals.R to avoid NOTES in R CMD CHECK.

################################################################################
# CHANGE LOG (last 20 changes)
# 03.05.2015: First version.

#' @title Calculate Analytical Threshold
#'
#' @description
#' Calculate analytical thresholds estimates.
#'
#' @details
#' Calculate the analytical threshold (AT) according to method 1, 2, and 4 as
#' recommended in the reference by analysing the background signal (noise).
#' Method 1: The average signal + 'k' * the standard deviation.
#' Method 2: The percentile rank method. The percentage of noise peaks below 'rank.t'.
#' Method 4: Utilize the mean and standard deviation and the critical value obtained 
#' from the t-distribution for confidence interval 'alpha' (one-sided) and observed
#' peaks analysed (i.e. not blocked) minus one as degrees of freedom, and the number
#' of samples.
#' If samples containing DNA are used a range around the allelic peaks can be blocked
#' from the analysis to discard peaks higher than the noise. Blocking can be within
#' each dye or across all dye channels.
#' Similarily a range around the peaks of the internal lane standard (ILS) can be 
#' blocked across all dye channels. Which can bleed-through in week samples
#' (i.e. negative controls)
#' 
#' @param data a data frame containing at least 'Dye.Sample.Peak',
#'  'Sample.File.Name', 'Marker', 'Allele', 'Height', and 'Data.Point'.
#' @param ref a data frame containing at least
#'  'Sample.Name', 'Marker', 'Allele'.
#' @param block.height logical to indicate if high peaks should be blocked.
#' @param height integer for global lower peak height threshold for peaks
#' to be excluded from the analysis. Active if 'block.peak=TRUE.
#' @param block.sample logical to indicate if sample allelic peaks should be blocked.
#' @param per.dye logical TRUE if sample peaks should be blocked per dye channel.
#' FALSE if sample peaks should be blocked globally across dye channels.
#' @param range.sample integer to specify the blocking range in (+/-) data points.
#' Active if block.sample=TRUE.
#' @param block.ils logical to indicate if internal lane standard peaks should be blocked.
#' @param range.ils integer to specify the blocking range in (+/-) data points.
#' Active if block.ils=TRUE.
#' @param k numeric factor for the desired confidence level (method AT1).
#' @param alpha numeric one-sided confidence interval to obtain the
#' critical value from the t-distribution (method AT4).
#' @param rank.t numeric percentile rank threshold (method AT2).
#' @param ignore.case logical to indicate if sample matching should ignore case.
#' @param word logical to indicate if word boundaries should be added before sample matching.
#' @param debug logical to indicate if debug information should be printed.
#' 
#' @return list of two data frames. The first with result per dye per sample,
#'  per sample, and all data for each method. The second is the complete percentile
#'  rank list.
#' 
#' @export
#' 
#' @seealso \code{\link{blockAT}}, \code{\link{checkSubset}}
#' 
#' @references
#'  J. Bregu et.al.,
#'   Analytical thresholds and sensitivity: establishing RFU thresholds for
#'   forensic DNA analysis, J. Forensic Sci. 58 (1) (2013) 120-129,
#'   ISSN 1556-4029, DOI: 10.1111/1556-4029.12008.
#' \url{http://onlinelibrary.wiley.com/doi/10.1111/1556-4029.12008/abstract}
#' 
#' 

calculateAT <- function(data, ref=NULL, block.height=TRUE, height=500,
                        block.sample=TRUE, per.dye = TRUE, range.sample=20,
                        block.ils=TRUE, range.ils=10,
                        k=3, rank.t=0.99, alpha=0.01,
                        ignore.case=TRUE, word=FALSE, debug=FALSE){
  
  if(debug){
    print(paste("IN:", match.call()[[1]]))
    print("Parameters:")
    print("data")
    print(str(data))
    print("ref")
    print(str(ref))
    print("block.height")
    print(block.height)
    print("height")
    print(height)
    print("block.sample")
    print(block.sample)
    print("per.dye")
    print(per.dye)
    print("range.sample")
    print(range.sample)
    print("block.ils")
    print(block.ils)
    print("range.ils")
    print(range.ils)
    print("k")
    print(k)
    print("rank.t")
    print(rank.t)
    print("alpha")
    print(alpha)
    print("ignore.case")
    print(ignore.case)
    print("word")
    print(word)
  }
  
  # Check data ----------------------------------------------------------------
  
  if(is.null(data$Dye.Sample.Peak)){
    stop("'data' must contain a column 'Dye.Sample.Peak'")
  }
  
  if(is.null(data$Sample.File.Name)){
    stop("'data' must contain a column 'Sample.File.Name'")
  }

  if(is.null(data$Marker)){
    stop("'data' must contain a column 'Marker'")
  }
  
  if(is.null(data$Allele)){
    stop("'data' must contain a column 'Allele'")
  }
  
  if(is.null(data$Height)){
    stop("'data' must contain a column 'Height'")
  }

  if(is.null(data$Data.Point)){
    stop("'data' must contain a column 'Data.Point'")
  }
  
  # Check if slim format.  
  if(sum(grepl("Allele", names(data))) > 1){
    stop("'data' must be in 'slim' format",
         call. = TRUE)
  }
  
  if(sum(grepl("Height", names(data))) > 1){
    stop("'data' must be in 'slim' format",
         call. = TRUE)
  }

  if(sum(grepl("Data.Point", names(data))) > 1){
    stop("'data' must be in 'slim' format",
         call. = TRUE)
  }
  
  if(!is.null(ref)){

    if(is.null(ref$Sample.Name)){
      stop("'ref' must contain a column 'Sample.Name'")
    }
    
    if(is.null(ref$Marker)){
      stop("'ref' must contain a column 'Marker'")
    }
    
    if(is.null(ref$Allele)){
      stop("'ref' must contain a column 'Allele'")
    }
    
    # Check if slim format.  
    if(sum(grepl("Allele", names(ref))) > 1){
      stop("'ref' must be in 'slim' format",
           call. = TRUE)
    }
    
  }
  
  # Check parameters.  
  if(!is.logical(block.height)){
    stop("'block.height' must be logical",
         call. = TRUE)
  }
  
  if(!is.numeric(height)){
    stop("'height' must be numeric",
         call. = TRUE)
  }
  
  if(!is.logical(block.sample)){
    stop("'block.sample' must be logical",
         call. = TRUE)
  }
  
  if(!is.logical(per.dye)){
    stop("'per.dye' must be logical",
         call. = TRUE)
  }
  
  if(!is.numeric(range.sample)){
    stop("'range.sample' must be numeric",
         call. = TRUE)
  }
  
  if(!is.logical(block.ils)){
    stop("'block.ils' must be logical",
         call. = TRUE)
  }
  
  if(!is.numeric(range.ils)){
    stop("'range.ils' must be numeric",
         call. = TRUE)
  }

  if(!is.numeric(k)){
    stop("'k' must be numeric",
         call. = TRUE)
  }
  
  if(!is.numeric(rank.t)){
    stop("'rank.t' must be numeric",
         call. = TRUE)
  }

  if(!is.numeric(alpha)){
    stop("'alpha' must be numeric",
         call. = TRUE)
  }
  
  if(!is.logical(ignore.case)){
    stop("'ignore.case' must be logical",
         call. = TRUE)
  }
  
  if(!is.logical(word)){
    stop("'word' must be logical",
         call. = TRUE)
  }
  
  if(!is.logical(debug)){
    stop("'debug' must be logical",
         call. = TRUE)
  }
  
  # Prepare -------------------------------------------------------------------

  if(!all(c("Blocked", "Dye") %in% names(data))){
    # Block data for AT calculation
    # (need to be separate function to enable control plots in GUI).
    data <- blockAT(data=data, ref=ref,block.height=block.height, height=height,
                    block.sample=block.sample, per.dye=per.dye, range.sample=range.sample,
                    block.ils=block.ils, range.ils=range.ils,
                    ignore.case=ignore.case, word=word, debug=debug)
    
  }
  
  # Get all dyes.
  dyes <- as.character(unique(data$Dye))
  colorsKit <- unique(getKit("ESX17", what="Color")$Color)
  dyesKit <- addColor(colorsKit, have="Color", need="Dye")
  dyeILS <- setdiff(dyes, dyesKit)

  # Get number of samples.
  nSamples <- length(unique(data$Sample.File.Name))

  # Internal functions --------------------------------------------------------
  
  # Function to calculate the percentile rank .
  percentileRank <- function(x) trunc(rank(x))/length(x)
  
  # Function to get height above a percentile.
  rankThreshold <- function(x, t) min(x[percentileRank(x) > t])

  # Convert -------------------------------------------------------------------
  
  # Strip blocked data, and ILS channel.
  dt <- data[data$Blocked==FALSE & data$Dye!=dyeILS,]
  
  # Convert to data.table for performance.
  dt <- data.table::data.table(dt)
  
  # Analyse1 ------------------------------------------------------------------
  
  # Calculate for sample per dye.
  res1 <- dt[, list(Mean=mean(Height, na.rm=TRUE),
                    Sd=sd(Height, na.rm=TRUE),
                    Peaks=sum(Blocked==FALSE),
                    AT2=rankThreshold(Height, rank.t)),
             by=list(Sample.File.Name, Dye)]

  # Calculate for sample.
  at.sample <- dt[, list(Mean=mean(Height, na.rm=TRUE),
                         Sd=sd(Height, na.rm=TRUE),
                         Peaks=sum(Blocked==FALSE),
                         AT2=rankThreshold(Height, rank.t)),
                  by=list(Sample.File.Name)]

  # Join the result.
  res1$Sample.Mean <- rep(at.sample$Mean, each=length(dyesKit))
  res1$Sample.Sd <- rep(at.sample$Sd, each=length(dyesKit))
  res1$Sample.Peaks <- rep(at.sample$Peaks, each=length(dyesKit))
  res1$Sample.AT2 <- rep(at.sample$AT2, each=length(dyesKit))

  # Calculate globally for all data.
  at.global <- dt[, list(Mean=mean(Height, na.rm=TRUE),
                         Sd=sd(Height, na.rm=TRUE),
                         Peaks=sum(Blocked==FALSE),
                         AT2=rankThreshold(Height, rank.t))]
  
  # Join the result.
  res1$Global.Mean <- rep(at.global$Mean, nrow(res1))
  res1$Global.Sd <- rep(at.global$Sd, nrow(res1))
  res1$Global.Peaks <- rep(at.global$Peaks, nrow(res1))
  res1$Global.AT2 <- rep(at.global$AT2, nrow(res1))

  # Calculate AT1.
  res1$AT1 <- res1$Mean + k * res1$Sd
  res1$Sample.AT1 <- res1$Sample.Mean + k * res1$Sample.Sd
  res1$Global.AT1 <- res1$Global.Mean + k * res1$Global.Sd

  # Calculate AT4.
  #Note: Actually no point using t-distribution since degrees of freedom
  # (number of observations - 1) are large (>100).
  res1$AT4 <- res1$Mean + abs(qt(alpha, res1$Peaks - 1)) * (1 + 1 / 1)^0.5 * res1$Sd
  res1$Sample.AT4 <- res1$Sample.Mean + abs(qt(alpha, res1$Sample.Peaks - 1)) * (1 + 1 / 1)^0.5 * res1$Sample.Sd
  res1$Global.AT4 <- res1$Global.Mean + abs(qt(alpha, res1$Global.Peaks - 1)) * (1 + 1 / nSamples)^0.5 * res1$Global.Sd

  # Add number of samples.
  res1$Total.Samples <-  nSamples

  # Add attributes.
  attr(res1, which="k") <- k
  attr(res1, which="rank.t") <- rank.t
  attr(res1, which="alpha") <- alpha
  attr(res1, which="block") <- block.sample
  attr(res1, which="range.sample") <- range.sample
  attr(res1, which="block.ils") <- block.ils
  attr(res1, which="range.ils") <- range.ils
  attr(res1, which="per.dye") <- per.dye
 
  # Re-order columns and convert back to data.frame.
  res1 <- data.frame(setcolorder(res1, c(1:5, 7:9, 11:13, 15:17, 6, 10, 14, 18:21)))
  
  # Analyse2 ------------------------------------------------------------------
  
  # Calculate complete percentile rank list.
  res2 <- data.frame(Height=unique(sort(dt$Height)),
                     Rank=unique(percentileRank(sort(dt$Height))),
                     Observations=as.numeric(table(dt$Height)))

  # Add attributes.
  attr(res2, which="rank.t") <- rank.t
  attr(res2, which="block") <- block.sample
  attr(res2, which="range.sample") <- range.sample
  attr(res2, which="block.ils") <- block.ils
  attr(res2, which="range.ils") <- range.ils
  attr(res2, which="per.dye") <- per.dye
  
  # Convert back to data frame.
  res2 <- data.frame(res2)
  
  if(debug){
    print("str(res1)")
    print(str(res1))
    print("head(res1)")
    print(head(res1))
    print("tail(res1)")
    print(tail(res1))
    print("str(res2)")
    print(str(res2))
    print("head(res2)")
    print(head(res2))
    print("tail(res2)")
    print(tail(res2))
  }

  if(debug){
    print(paste("EXIT:", match.call()[[1]]))
  }
  
  # Return list of the two dataframes.
  res <- list(res1, res2)
  
  # Return result.
  return(res)
  
}