#' Read csv or txt files exported from MS-Angel and Proline
#'
#' Quantification results form \href{https://www.maxquant.org/}{MaxQuant} can be read using this function and relevant information extracted. 
#' The final output is a list containing 3 elements: \code{$annot}, \code{$abund} and optional \code{$quant}, or returns data.frame with entire content of file if \code{separateAnnot=FALSE}.
#' This function has been developed using MaxQuant version 1.6.10.x, the format of resulting file 'proteinGroups.txt' is typically well conserved.  
#' 
#' @param path (character) path of file to be read
#' @param fileName (character) name of file to be read (default 'proteinGroups.txt' as typically generated by MaxQuant in txt folder) 
#' @param normalizeMeth (character) normalization method (will be sent to  \code{\link[wrMisc]{normalizeThis}}) 
#' @param quantCol (character or integer) exact col-names, or if length=1 content of \code{quantCol} will be used as pattern to search among column-names for $quant using \code{grep} 
#' @param uniqPepPat (character, length=1) pattern to search for columns with unique (razor) peptides using \code{grep}, default set to read unique razor-peptides  
#' @param extrColNames (character) column names to be read (1: prefix for LFQ quantitation, default 'LFQ.intensity'; 2: column name for protein-IDs, default 'Majority.protein.IDs'; 3: column names of fasta-headers, default 'Fasta.headers', 4: column name for number of protein IDs matching, default 'Number.of.proteins')
#' @param specPref (character) prefix to identifiers allowing to separate i) recognize contamination database, ii) species of main identifications and iii) spike-in species
#' @param refLi (integer) custom decide which line of data is main species, if single character entry it will be used to choose a group of species (eg 'mainSpe')
#' @param separateAnnot (logical) if \code{TRUE} output will be organized as list with \code{$annot}, \code{$abund} for initial/raw abundance values and \code{$quant} with final normalized quantitations
#' @param plotGraph (logical) optional plot vioplot of initial and normalized data (using \code{normalizeMeth}); alternatively the argument may contain numeric details that will be passed to \code{layout} when plotting
#' @param tit (character) custom title to plot
#' @param silent (logical) suppress messages
#' @param callFrom (character) allow easier tracking of message produced
#' @return list with \code{$annot}, \code{$raw} for initial abundance values and \code{$quant} with final normalized quantitations, or returns data.frame with annot and quant if \code{separateAnnot=FALSE}
#' @seealso \code{\link[utils]{read.table}}, \code{\link[wrMisc]{normalizeThis}}) , \code{\link{readProlineFile}} 
#' @examples
#' path1 <- system.file("extdata",package="wrProteo")
#' # Here we'll load a short/trimmed example file (thus not MaxQuant default name) 
#' fiNa <- "proteinGroupsMaxQuantUps1.txt"
#' specPref1 <- c(conta="conta|CON_|LYSC_CHICK", mainSpecies="YEAST",spike="HUMAN_UPS")
#' dataMQ <- readMaxQuantFile(path1, file=fiNa, specPref=specPref1)
#' summary(dataMQ$quant)
#' matrixNAinspect(dataMQ$quant, gr=gl(3,3)) 
#' @export
readMaxQuantFile <- function(path,fileName="proteinGroups.txt",normalizeMeth="median", quantCol="LFQ.intensity",uniqPepPat="Razor...unique.peptides",refLi=NULL,
  extrColNames=c("Majority.protein.IDs","Fasta.headers","Number.of.proteins"), specPref=c(conta="conta|CON_|LYSC_CHICK",mainSpecies="OS=Homo sapiens",spike="HUMAN_UPS"),
  tit=NULL, separateAnnot=TRUE, plotGraph=TRUE, silent=FALSE,callFrom=NULL) {
  ## prepare  
  fxNa <- wrMisc::.composeCallName(callFrom,newNa="readMaxQuantFile")
  opar <- graphics::par(no.readonly=TRUE)      
  if(!file.exists(path)) stop(" Can't find path '",path,"'")
  chFi <- file.exists(file.path(path,fileName))
  if(!chFi) stop(" file  ",fileName," was NOT found in path ",path," !")
  if(length(grep("\\.txt$",fileName)) <1) message(fxNa," Trouble ahead, this function was designed for reading tabulated text files (as MaxQuant usualy produces) !!")
  chPa <- try(find.package("utils"),silent=TRUE)
  if("try-error" %in% class(chPa)) stop("package 'utils' not found ! Please install first")   
  ## initial read MaxQuant
  tmp <- utils::read.delim(file.path(path,fileName),stringsAsFactors=FALSE)
  chCol <- extrColNames %in% colnames(tmp)
  if(!any(chCol)) { extrColNames <- gsub("\\."," ",extrColNames)
    chCol <- extrColNames %in% colnames(tmp) }
  if(all(!chCol)) stop(" Problem locating annotation columns (",wrMisc::pasteC(extrColNames,quote="''"),")")
  if(any(!chCol) ) {
    if(!silent(chCol)) message(fxNa," can't find coumns ",wrMisc::pasteC(extrColNames[!chCol]),quote="'")
  }  
  if(any(!chCol)) stop("Can't find columns ",wrMisc::pasteC(extrColNames[!chCol],quote="'"))
  ## further extracting : quantitation  
  grepX <- function(x) grep(paste(x,"\\.",sep=""),colnames(tmp))
  useDCol <- if(length(quantCol)==1) grepX(quantCol) else unique(as.integer(sapply(quantCol,grepX)))
  if(length(useDCol) <1) stop("no columns matching terms from 'quantCol' found")
  MQdat <- as.matrix(tmp[,useDCol])
  quantColP <- NULL                  # initialize
  if(length(quantCol) <1) stop(" 'quantCol' must be provided !")
  if(length(quantCol) >1) { abund <- as.matrix(wrMisc::extrColsDeX(tmp,extrCol=quantCol,doExtractCols=TRUE,callFrom=fxNa))
  } else { chP <- substr(quantCol,nchar(quantCol),nchar(quantCol)) != "."
    quantColP <- quantCol
    quantCol <- if(chP) grep(paste(quantCol,"\\.",sep=""),colnames(tmp)) else grep(quantCol,colnames(tmp)) 
    chNa <- is.na(quantCol)
    if(all(chNa)) stop("Could not find any of the columns specified in argument 'quantCol' !")
    if(any(chNa)) { 
      if(!silent) message(fxNa," Could not find columns ",wrMisc::pasteC(quantCol[which(chNa)],quote="'")," .. omit")
      quantCol <- wrMisc::naOmit(quantCol)} 
    abund <- as.matrix(tmp[,quantCol]) }           # abundance val
  chNum <- is.numeric(abund)
  if(!chNum) {abund <- apply(tmp[,quantCol],2,wrMisc::convToNum,convert="allChar",callFrom=fxNa)}
  if(length(dim(abund)) <2 & !is.numeric(abund)) abund <- matrix(as.numeric(abund),ncol=ncol(abund),dimnames=dimnames(abund))
  colnames(abund) <- if(length(quantColP)==1) sub(paste(quantColP,"\\.",sep=""),"", colnames(abund)) else wrMisc::.trimFromStart(wrMisc::.trimFromEnd(colnames(abund)))
  ## convert 0 to NA
  abund[which(abund <= 0)] <- NA
  ## further extracting : annotation
  useACol <- wrMisc::naOmit(c(match(extrColNames,colnames(tmp)), grep(uniqPepPat,colnames(tmp)),grep("Peptides\\.",colnames(tmp))))
  MQann <- as.matrix(tmp[,useACol])
  if(length(specPref) <1) warning(fxNa," Trouble ahead: argument 'specPref' missing")
  specMQ0 <- list(conta=grep(specPref[1],MQann[,extrColNames[1]]),
    mainSpe=grep(specPref[2],MQann[,extrColNames[2]]),
    spike= grep(specPref[3],MQann[,extrColNames[1]] ))
  if(length(specMQ0$mainSpe) <1) {
    specMQ0$mainSpe <- (1:nrow(tmp))[-1*unlist(specMQ0)]
    if(!silent) message(fxNa," Problem with 'mainSpecies' (none found), assuming that all ",length(specMQ0$mainSpe)," non-assigned proteins are 'mainSpecies' ")
  }  
  chTa <- table(table(unlist(specMQ0))) 
  specMQ <- rep(NA,nrow(abund))
  for(i in 1:length(specMQ0)) if(length(specMQ0[[i]]) >0) specMQ[specMQ0[[i]]] <- names(specMQ0)[i]
  if(!silent) { chSp <- sum(is.na(specMQ))
    if(chSp >0) message(fxNa," Note: ",chSp," unrecognized species")
    tab <- table(specMQ)
    tab <- rbind(names(tab),": ",tab,"  ")
    if(!silent) message("   by species : ",apply(tab,2,paste)) }             # all lines assigned   
  MQann <- cbind(Spec=specMQ,MQann)                                          # better to name column 'species' ?? 
  ## look for unique col from $annot as rownames
  chAn <- colSums(apply(MQann[,1:min(ncol(MQann),5)],2,duplicated),na.rm=TRUE)
  rownames(abund) <- rownames(MQann) <- if(any(chAn==0)) MQann[,which(chAn==0)[1]] else wrMisc::correctToUnique(MQann[,which.min(chAn)],callFrom=fxNa)  
  ## normalize
  custLay <- NULL
  if(is.character(refLi) & length(refLi)==1) refLi <- which(MQann[,"Spec"]==refLi)   # may be "mainSpe"  
  abundN <- wrMisc::normalizeThis(log2(abund),meth=normalizeMeth,refLi=refLi,callFrom=fxNa) #
  ## plot distribution of intensities
  custLay <- NULL
  if(length(plotGraph) >0) {if(is.numeric(plotGraph)) {custLay <- plotGraph; plotGraph <- TRUE
    } else  {plotGraph <- as.logical(plotGraph[1])}}
  if(plotGraph){
    if(length(custLay) >0) graphics::layout(custLay) else graphics::layout(1:2)
    graphics::par(mar=c(3, 3, 3, 1))                          # mar: bot,le,top,ri
    chGr <- try(find.package("wrGraph"),silent=TRUE)
    chSm <- try(find.package("sm"),silent=TRUE)
    misPa <- c("try-error" %in% class(chGr),"try-error" %in% class(chSm))
    if(any(misPa)) { 
      if(!silent) message(fxNa," missing package ",wrMisc::pasteC(c("wrGraph","sm")[which(misPa)],quoteC="'")," for drawing vioplots")
      ## wrGraph not available : simple boxplot  
      graphics::boxplot(log2(abund), main=paste(tit," MQ (initial)"),las=1,outline=FALSE)
      graphics::abline(h=round(stats::median(log2(abund),na.rm=TRUE))+(-1:1),lty=2,col=grDevices::grey(0.6)) 
      ## now normalized
      graphics::boxplot(abundN,main=paste(tit," MQ normalized"),las=1,outline=FALSE)
      graphics::abline(h=round(stats::median(abundN,na.rm=TRUE))+(-1:1),lty=2,col=grDevices::grey(0.6))
    } else {                                            # wrGraph and sm are available
      wrGraph::vioplotW(log2(abund), tit=paste(tit," MQ (initial)")) 
      graphics::abline(h=round(stats::median(log2(abund),na.rm=TRUE))+(-1:1),lty=2,col=grDevices::grey(0.6)) 
      ## now normalized
      wrGraph::vioplotW((abundN), tit=paste(tit," MQ normalized"))
      graphics::abline(h=round(stats::median(abundN,na.rm=TRUE))+(-1:1),lty=2,col=grDevices::grey(0.6))    
    }
    on.exit(graphics::par(opar)) }   #
  ## prepare for final output
  colnames(abundN) <- sub(paste(extrColNames[1],"\\.",sep="") ,"",sub("_[[:digit:]]","_",colnames(abund)))  
  if(separateAnnot) list(raw=abund,quant=abundN,annot=MQann) else data.frame(abund,MQann)
}  
  
