
#' Fully process a Full Scan Wiff and scan File
#'
#' A convenient wrapper function to extract all full scan areas from a file
#' without storing the intermediate reader and ejection data structures.
#'
#' @param wiffFile A valid file path to the Sciex data file (extension .wiff)
#' containing the metadata for one or more EchoMS runs
#' @param wiffScanFile An file path the the Sciex scan file (extension
#' .wiff.scan) containing the full raw EchoMS data
#' @param peaks A named list of peak objects of class `rtmsPeak`, often
#' generated by `rtmsPeakList()`
#' @param ... Additional arguments passed to `measureEjections()`
#'
#' @return A list with two objects: `ejections`, a data frame listing all
#' ejections for all samples in the WIFF file, with an additional `wiffSample`
#' column specifying which sample each ejection was extracted from; and
#' `massAreas`, a data frame of the format returned by [getAllFullScanAreas()],
#' also with an additional `wiffSample` column
#'
#' @export
processAllFullScanAreas <- function(wiffFile,wiffScanFile,peaks,...) {
	wiff <- newWiffReader(wiffFile,wiffScanFile)
	shots <- getWiffShots(wiff)
	tics <- getAllTIC(wiff)

	ejectiondf <- data.frame()
	massdf <- data.frame()
	for (si in seq_along(wiff$samples)) {
		cursample <- paste0("Sample",si)
		ejections <- measureEjections(tics[[si]],shots=shots,...)
		ejections$wiffSample <- cursample
		ejectiondf <- rbind(ejectiondf,ejections)

		massarea <- getAllFullScanAreas(wiffScanFile,tics[[si]],wiff$samples[[si]],ejections,peaks)
		massarea$wiffSample <- cursample
		massdf <- rbind(massdf,massarea)
	}

	list(ejections=ejectiondf,massAreas=massdf)
}

#' Get the total area for given masses for all wells
#'
#' @param scanfile A file path to a Sciex raw scan file (extension .wiff.scan)
#' containing the raw data referenced by the .wiff file from which the other
#' parameters were extracted
#' @param tic A total ion chromatogram of the format outputby [getTIC()] or
#' [getAllTIC()]
#' @param sample The particular object from the `samples` field of the
#' `rtmsWiffReader` object, which in this case contains binary offsets into the
#' .wiff.scan file
#' @param ejections An ejection table listing the timing and boundaries of the
#' total ion chromatogram peaks for all shots in the run, as returned by
#' [measureEjections()]
#' @param peaks A named list of peak objects of class `rtmsPeak`
#'
#' @return A data frame containing the total intensity for each `rtmsPeak` given
#' and each ejection; see Details for column specifics.
#'
#' @details
#' The table returned includes a measurement of total area for each of the mass
#' transitions listed in `sample`.  It contains one row for each measured shot
#' each mass transition, with the following columns:
#' * `shotorder`: The order of the peak within the shots fired during the run
#' * `well`: The alphanumeric well name of the well from which the shot was
#' fired
#' * `time`: The time (in seconds) after the beginning of the run at which the
#' intensity from the shot was at its peak
#' * `massindex`: The index of the measured mass transition in the set of masses
#' in the given sample
#' * `mass`: The name of the mass transition measured (often a compound name or
#' id)
#' * `area`: The intensity area (in counts) for that particular mass transition
#' from that ejection
#' @export
getAllFullScanAreas <- function(scanfile,tic,sample,ejections,peaks) {
	scon <- file(scanfile,"rb")
	on.exit(close(scon))

	areadf <- data.frame(
		eindex=rep(seq_len(nrow(ejections)),each=length(peaks)),
		shotorder=rep(ejections$shotorder,each=length(peaks)),
		well=rep(ejections$well,each=length(peaks)),
		time=rep(ejections$time,each=length(peaks)),
		massindex=rep(seq_along(peaks),times=nrow(ejections)),
		mass=rep(names(peaks),times=nrow(ejections)),
		area=NA
	)
	for (eiter in seq_len(nrow(ejections))) {
		ptic <- tic[tic$time >= ejections$minTime[[eiter]] & tic$time<= ejections$maxTime[[eiter]],]
		spectrum <- getFullScanSpectrum_internal(scon,ptic,sample)
		measure <- rtms::measureSample(rtms::getSample(spectrum,peaks,freqSpacing=FALSE),"PeakArea")
		areadf$area[areadf$eindex==eiter] <- measure$value
	}

	areadf$eindex <- NULL
	areadf <- areadf[order(areadf$shotorder,areadf$massindex),]
	areadf
}

#' Get the total area for given masses for a particular well
#'
#' @inheritParams getAllFullScanAreas
#' @param well The well (a string) or well index (an integer) to be measured
#'
#' @return A data frame containing the total intensity for `rtmsPeak` gvien
#' and each ejection; see Details for column specifics.
#'
#' @details
#' The table return includes a measurement of total area for each of the mass
#' transitions listed in `sample`.  It contains one row for each measured shot
#' each mass transition, with the following columns:
#' * `shotorder`: The order of the peak within the shots fired during the run
#' * `well`: The alphanumeric well name of the well from which the shot was
#' fired
#' * `time`: The time (in seconds) after the beginning of the run at which the
#' intensity from the shot was at its peak
#' * `massindex`: The index of the measured mass transition in the set of masses
#' in the given sample
#' * `mass`: The name of the mass transition measured (often a compound name or
#' id)
#' * `area`: The intensity area (in counts) for that particular mass transition
#' from that ejection
#' @export
getFullScanAreas <- function(scanfile,tic,sample,ejections,peaks,well) {
	if (length(well)!=1) {
		stop("Parameter 'well' should be of length 1.")
	}
	if (is.character(well)) {
		well <- which(ejections$well==well)
		if (length(well)==0) {
			stop("Unable to locate well in ejection table.")
		}
	}
	rel_ejections <- ejections[well,]

	getAllFullScanAreas(scanfile,tic,sample,rel_ejections,peaks)
}


#' Extract mass spectra for all wells
#'
#' @inheritParams getAllFullScanAreas
#'
#' @return A list of objects of class `rtmsSpectrum` containing the full
#' extracted mass spectra for each ejection
#' @export
getAllFullScanSpectra <- function(scanfile,tic,sample,ejections) {
	scon <- file(scanfile,"rb")
	on.exit(close(scon))

	spectra <- list()
	for (eiter in seq_len(nrow(ejections))) {
		ptic <- tic[tic$time >= ejections$minTime[[eiter]] & tic$time<= ejections$maxTime[[eiter]],]
		spectra[[eiter]] <- getFullScanSpectrum_internal(scon,ptic,sample)
	}

	spectra
}

#' Extract a full mass spectrum for a given well
#'
#' @inheritParams getAllFullScanAreas
#' @param well The well (a string) or well index (an integer) to be measured
#'
#' @return An object of class `rtmsSpectrum` representing the full mass
#' spectrum for the given well
#'
#' @export
getFullScanSpectrum <- function(scanfile,tic,sample,ejections,well) {
	if (length(well)!=1) {
		stop("Parameter 'well' should be of length 1.")
	}
	if (is.character(well)) {
		well <- which(ejections$well==well)
		if (length(well)==0) {
			stop("Unable to locate well in ejection table.")
		}
	}
	rel_ejections <- ejections[well,]

	getAllFullScanSpectra(scanfile,tic,sample,rel_ejections)[[1]]
}

getFullScanSpectrum_internal <- function(scon,ptic,sample) {
	spectra <- getFullScanData_internal(scon,ptic,sample,removeZeros=FALSE)
	specsum <- data.frame(mz=unique(spectra$mz),area=rep(NA_real_,length(unique(spectra$mz))))
	for (si in seq_len(nrow(specsum))) {
		specsum$area[[si]] <- sum(spectra$intensity[spectra$mz==specsum$mz[[si]]])
	}

	rtms::rtmsSpectrum(specsum$mz,specsum$area)
}

#' Extract raw Wiff full scan data
#'
#' @param scanfile A file path to a Sciex raw scan file (extension .wiff.scan)
#' containing the raw data referenced by the .wiff file from which the other
#' parameters were extracted
#' @param tic A (possibly partial) total ion chromatogram of the format output
#' by [getTIC()] or [getAllTIC()] referring to the range of timepoints to be
#' extracted
#' @param sample The particular object from the `samples` field of the
#' `rtmsWiffReader` object, which in this case contains binary offsets into the
#' .wiff.scan file
#' @param removeZeros If `FALSE` (the default) all intensity measuremens for
#' all time points and m/z values will be extractd and returned; if `TRUE`,
#' only values greater than zero will be returned; this can result in a much
#' smaller output and may be useful for calculating areas
#'
#' @return A data frame containing all raw data for the given range (see
#' Details)
#'
#' @details
#' The data frame output by the function contains all the intensity data
#' compressed into the .wiff.scan file for the given range of times; it has the
#' following columns:
#' * `index`: The particular row of `ptic` the measurement corresponds to
#' * `time`: The time (in seconds) after the beginning of the run at which the
#' measurement was taken
#' * `mz`: The mass to charge value (in m/z) which the measurement corresponds
#' to
#' * `intensity`: The intensity (in counts per second) that was measured for
#' the given time and m/z value
#'
#' @export
getFullScanData <- function(scanfile,tic,sample,removeZeros=FALSE) {
	scon <- file(scanfile,"rb")
	on.exit(close(scon))
	getFullScanData_internal(scon,tic,sample,removeZeros)
}

getFullScanData_internal <- function(scon,ptic,sample,removeZeros=FALSE) {
	rawvecs <- list()
	specs <- list()
	baseoffset <- sample$offset+24
	ends <- ptic$offset+ptic$size
	shifts <- ptic$offset-c(0,ends[-nrow(ptic)])
	seek(scon,baseoffset)

	for (titer in seq_len(nrow(ptic))) {
		if (shifts[[titer]]>0) seek(scon,shifts[[titer]],origin="current")
		rawvec <- readBin(scon,"raw",ptic$size[titer])
		curdf <- wfs_parseSpectrum(rawvec)
		curdf$index <- titer
		curdf$time <- ptic$time[[titer]]
		if (removeZeros) {
			curdf <- curdf[curdf$intensity>0,]
		}
		specs[[titer]] <- curdf
	}

	return(do.call(rbind,specs))
}

wfs_parseSpectrum <- function(rawvec) {
	if (length(rawvec)<=44 || (length(rawvec)%%4)!=0) {
		stop("Compressed spectrum blocks must be a multiple of 4 bytes greater than 44.")
	}
	if (any(readBin(rawvec[1:8],"integer",2,size=4,endian="little")!=c(-2,1))) {
		stop("Compressed spectrum blocks must begin with a -2, 1 marker.")
	}
	params <- readBin(rawvec[9:40],"double",4,size=8,endian="little")
	lower <- params[[1]]
	upper <- params[[2]]
	step <- params[[3]]
	multiple <- params[[4]]
	npeaks <- readBin(rawvec[41:44],"integer",1,size=4,endian="little")

	specdf <- data.frame(mz=seq(lower,upper,by=step),intensity=0)
	if (npeaks==0) { return(specdf) }

	subvec <- rawvec[45:(length(rawvec))]
	valdf <- wfs_extractBlockValues(subvec,npeaks)
	specdf$intensity[(valdf$val1)+1] <- multiple*valdf$val2
	return(specdf)
}

wfs_extractBlockValues <- function(rawvec,npeaks) {
	peaks <- data.frame(val1=rep(1,npeaks),val2=NA)
	peakIndex <- 1
	val1 <- c()
	val2 <- c()
	type1 <- c()

	rawcon <- rawConnection(rawvec,"rb")
	on.exit({ close(rawcon) })

	curval1 <- 0
	curtype1 <- "implicit"

	finished <- FALSE
	while (peakIndex<=npeaks) {
		num <- readBin(rawcon,"integer",1,size=1,signed=FALSE)

		if (num>0) { curstate <- "subbyte" }
		else { curstate <- "fullbyte" }

		while (curstate != "open") {
			if (curstate=="subbyte") {
				seek(rawcon,-1,origin="current")
				allsubs <- c()

				while (!any((allsubs%%8)==0)) {
					num <- readBin(rawcon,"integer",1,size=1,signed=FALSE)
					allsubs <- c(allsubs,wfs_getSubbytes(num))
				}

				for (subindex in seq_along(allsubs)) {
					if ((allsubs[[subindex]]%%8)==0) {
						# A subbyte of 0 or 8 indicates a string of subbyte
						# values is finished. 0 indicates that full bytes
						# are next to be read; 8 indicates that the next stream
						# of bytes is open.
						if (allsubs[[subindex]]==0) { curstate <- "fullbyte" }
						else { curstate <- "open" }
						break
					} else if (allsubs[[subindex]]>8) {
						# A subbyte with value n>8 indicates that (n-8) m/z
						# values are skipped before the next intensity
						peaks$val1[[peakIndex]] <- peaks$val1[[peakIndex]]+allsubs[[subindex]]-8
					} else {
						# A subbyte with value n<8 indicates an intensity of
						# value n; if some m/z values have been skipped this
						# should be tracked; otherwise the m/z is assumed to be
						# immediately after the previous one.
						peaks$val2[[peakIndex]] <- allsubs[[subindex]]
						peakIndex <- peakIndex+1
						if (peakIndex>npeaks) { break }
					}
				}
				if (peakIndex>npeaks) { break }

			} else if (curstate=="fullbyte") {
				num <- readBin(rawcon,"integer",1,size=1,signed=FALSE)

				if (length(num)==0) {
					break
				} else if (num==0) {
					num <- readBin(rawcon,"integer",1,size=4,endian="little")
				}

				peaks$val2[[peakIndex]] <- num
				peakIndex <- peakIndex+1
				if (peakIndex>npeaks) { break }

				num <- readBin(rawcon,"integer",1,size=1,signed=FALSE)

				if (num==128) { curstate <- "open" }
				else if (num==0) { curstate <- "fullbyte" }
				else { curstate <- "subbyte" }
				curtype1 <- "implicit"
			}
		}
		# if (finished) { break }
		if (peakIndex>npeaks) { break }

		num <- readBin(rawcon,"integer",1,size=1,signed=FALSE)
		if (num==0) {
			nextval <- readBin(rawcon,"integer",1,size=4,endian="little")
			peaks$val1[[peakIndex]] <- peaks$val1[[peakIndex]]+nextval
			curtype1 <- "fullint"
		} else {
			peaks$val1[[peakIndex]] <- peaks$val1[[peakIndex]]+num
			curtype1 <- "byte"
		}
	}

	peaks$val1 <- cumsum(peaks$val1)-1
	return(peaks)
}

wfs_getSubbytes <- function(num) {
	return(c(floor(num/16),num%%16))
}
