#' Rank-based Two-Sample Tests on Similarity / Graph-Induced Rank Matrices
#'
#' @description
#' \code{RISE} constructs a nonnegative, symmetric rank/graph matrix \eqn{R} from
#' two samples \eqn{X} and \eqn{Y} (or from a pre-computed similarity matrix \eqn{S}),
#' then computes a Hotelling-type quadratic statistic with an asymptotic
#' chi-square p-value. Optionally, a permutation p-value is returned.
#'
#' @param X Numeric matrix of size \eqn{m \times p} (first sample).
#'   Optional if \code{S} is supplied.
#' @param Y Numeric matrix of size \eqn{n \times p} (second sample).
#'   Optional if \code{S} is supplied.
#' @param S Numeric similarity matrix of size \eqn{N \times N} with \eqn{N=m+n}
#'   (larger values indicate greater similarity). If \code{X} and \code{Y} are provided,
#'   \code{S} is constructed internally as \code{-dist(rbind(X, Y))}.
#' @param sample1ID Integer indices (length \eqn{m}) for sample \eqn{X} in \code{S}.
#'   Ignored if \code{X} and \code{Y} are given.
#' @param sample2ID Integer indices (length \eqn{n}) for sample \eqn{Y} in \code{S}.
#'   Ignored if \code{X} and \code{Y} are given.
#' @param k Positive integer tuning parameter.
#'   For \code{"RgNN"}/\code{"RoNN"}, it is the neighborhood size in the k-nearest-neighbor graph (k-NNG);
#'   for \code{"RgMST"}/\code{"RoMST"}, it controls the number of minimum-spanning-tree layers (k-MST);
#'   for \code{"RoMDP"}, it specifies the number of rounds of minimum-distance non-bipartite matching (k-MDP).
#' @param rank.type Character, one of \code{c("RgNN","RoNN","RgMST","RoMST","RoMDP")}.
#'   Prefix \code{"Rg"} denotes graph-induced ranks; prefix \code{"Ro"}
#'   denotes overall ranks obtained by ordering all selected edges. See the references for precise definitions.
#' @param perm Integer, number of permutations for a permutation p-value (default \code{0}).
#'
#' @return A list with components:
#' \itemize{
#'   \item \code{test.statistic}: quadratic form \eqn{T_R}.
#'   \item \code{pval.approx}: asymptotic p-value (chi-square, df = 2).
#'   \item \code{pval.perm}: permutation p-value (present only if \code{perm > 0}).
#' }
#'
#' @details
#' From \eqn{S} (or from \eqn{X}, \eqn{Y}), the procedure constructs a
#' symmetric matrix \eqn{R} with zero diagonal using one of the supported
#' graph/ranking schemes. It then forms the within-group edge sums
#' \eqn{U_x = \sum_{i,j \in X} R_{ij}} and
#' \eqn{U_y = \sum_{i,j \in Y} R_{ij}}.
#' The expectation vector and covariance matrix of \eqn{(U_x, U_y)} are
#' derived under the permutation null distribution. The test statistic is
#' \deqn{T = ( U_x - \mu_x, U_y - \mu_y ) \Sigma^{-1}
#'        \begin{pmatrix} U_x - \mu_x \\ U_y - \mu_y \end{pmatrix},}
#' where \eqn{\mu_x, \mu_y} are the expected values and \eqn{\Sigma} is the
#' covariance matrix. Under the null hypothesis, \eqn{T} is asymptotically
#' chi-square distributed with 2 degrees of freedom.
#'
#' @references
#' Zhou, D. and Chen, H. (2023).
#' \emph{A new ranking scheme for modern data and its application to two-sample hypothesis testing}.
#' In \emph{Proceedings of the 36th Annual Conference on Learning Theory (COLT 2023)}, PMLR, pp. 3615–3668.
#'
#' @examples
#' set.seed(1)
#' X <- matrix(rnorm(50*100, mean = 0), nrow=50)
#' Y <- matrix(rnorm(50*100, mean = 0.3), nrow=50)
#' # RgNN: graph-induced ranks from the k-nearest-neighbor graph
#' out.RgNN <- RISE(X = X, Y = Y, k = 10, rank.type = "RgNN", perm = 1000)
#' out.RgNN
#'
#' # RoNN: overall ranks obtained by ordering edges from the k-NN graph
#' out.RoNN <- RISE(X = X, Y = Y, k = 10, rank.type = "RoNN", perm = 1000)
#' out.RoNN
#'
#' # RgMST: graph-induced ranks from layered minimum spanning trees
#' \donttest{
#' out.RgMST <- RISE(X = X, Y = Y, k = 10, rank.type = "RgMST", perm = 1000)
#' out.RgMST
#' }
#'
#' # RoMST: overall ranks obtained by ordering edges in the MST
#' \donttest{
#' out.RoMST <- RISE(X = X, Y = Y, k = 10, rank.type = "RoMST", perm = 1000)
#' out.RoMST
#' }
#'
#' # RoMDP: overall ranks obtained by ordering edges from minimum-distance pairings
#' \donttest{
#' out.RoMDP <- RISE(X = X, Y = Y, k = 10, rank.type = "RoMDP", perm = 1000)
#' out.RoMDP
#' }
#'
#' @seealso \code{\link{rTests.base}}, \code{\link{Cov.asy}}
#' @importFrom stats dist pchisq as.dist
#' @export

RISE <- function(X=NULL, Y=NULL, S=NULL, sample1ID=NULL, sample2ID=NULL,
                 k=10, rank.type='RgNN', perm=0){
  if(!is.null(X) & !is.null(Y)){
    X = as.matrix(X)
    Y = as.matrix(Y)
    Z = rbind(X,Y)
    D = as.matrix(stats::dist(Z))
    S = - D
    m = nrow(X); n = nrow(Y); N = n+m
    sample1ID=1:m; sample2ID=(m+1):N
  }

  if (!is.matrix(S) || nrow(S) != ncol(S)) {
    stop("`S` must be a square similarity matrix.")
  }
  if (!is.null(X) && !is.null(Y)) {
    if (length(sample1ID) != nrow(X)) {
      stop("`sample1ID` length must match the number of rows in X.")
    }
    if (length(sample2ID) != nrow(Y)) {
      stop("`sample2ID` length must match the number of rows in Y.")
    }
  }

  # N is dimension size (scalar)
  N <- nrow(S)

  if(rank.type %in% c('RgNN','RoNN')){
    diag(S) = min(S) - 100
    R = t(apply(S, 1, rank)) - 1
    R = R-N+1+k; R[R<0] = 0
    R = (R + t(R))/2
    if(rank.type == 'RoNN'){
      R[(R>0)&upper.tri(R) ] = rank(S[ (R>0)&upper.tri(R) ])
      R[lower.tri(R)] = 0
      R = R + t(R)
    }
  }

  if(rank.type %in% c('RgMST','RoMST')){
    R = matrix(0,N,N)
    D = max(S) - S
    diag(D) = 0
    D = as.dist(D)
    if(rank.type == 'RgMST'){
      for(j in 1:k){
        gpmst <- ade4::mstree(D, j)# minimum spanning tree (number of edges = (N-1)*j)
        R[cbind(gpmst[,1],gpmst[,2])] = R[cbind(gpmst[,1],gpmst[,2])] + 1
        R[cbind(gpmst[,2],gpmst[,1])] = R[cbind(gpmst[,2],gpmst[,1])] + 1
      }
    }else{
      gpmst <- ade4::mstree(D, k)# minimum spanning tree (number of edges = (N-1)*j)
      R[cbind(gpmst[,1],gpmst[,2])] = R[cbind(gpmst[,2],gpmst[,1])] = 1
      R[(R>0)&upper.tri(R) ] = rank(S[ (R>0)&upper.tri(R) ])
      R[lower.tri(R)] = 0
      R = R + t(R)
    }
  }

  if(rank.type == 'RoMDP'){
    R = matrix(0,N,N)
    D = max(S) - S
    diag(D) = 0
    maxD = max(D)
    for(j in 1:k){
      mdm = nbpMatching::distancematrix(D)
      MDP = nbpMatching::nonbimatch(mdm)
      R[cbind(MDP$matches$Group1.Row,MDP$matches$Group2.Row)] = 1
      D[cbind(MDP$matches$Group1.Row,MDP$matches$Group2.Row)] = 100*maxD}

    R[(R>0)&upper.tri(R) ] = rank(S[ (R>0)&upper.tri(R) ])
    R[lower.tri(R)] = 0
    R = R + t(R)
  }

  test.asy = rTests.base(R, sample1ID, sample2ID, perm)
  return(test.asy)
}



#' Core Test on a Given Rank/Graph Matrix R
#'
#' @description
#' Internal function used by \code{\link{RISE}} once a rank/graph matrix \eqn{R} is constructed.
#' It computes the within-group sums, asymptotic covariance, quadratic statistic,
#' and (optionally) permutation-based p-value.
#'
#' @param R Symmetric nonnegative matrix (N x N) with zero diagonal.
#' @param sample1ID Integer indices for group X.
#' @param sample2ID Integer indices for group Y.
#' @param perm Integer permutations for permutation p-value (default 0).
#' @return See \code{RISE()}.
#' @keywords internal
#' @export
rTests.base <- function(R, sample1ID, sample2ID, perm=0){
  N = nrow(R); m = length(sample1ID); n = length(sample2ID)
  ourR = r.stat(R, sample1ID, sample2ID)
  Cov = Cov.asy(R,m,n)
  r0 = sum(R)/(N*(N-1))
  mu = c(m*(m-1),n*(n-1))*r0
  TR = (t(ourR-mu)%*%solve(Cov)%*%(ourR-mu))[1,1]
  p = 1-stats::pchisq(TR,2)
  test.asy = list('test.statistic'=TR,'pval.approx'=p)
  if(perm>0){
    BR = t(sapply(1:perm, function(i){
      ID1 = sample(1:N,size=m,replace=F)
      ID2 = setdiff(1:N,ID1)
      r.stat(R, ID1, ID2)
    }))
    BR0 = scale(BR,center=mu ,scale=F)
    T2B.all = rowSums((BR0%*%solve(Cov))*BR0)
    pval.perm = mean(test.asy$test.statistic<T2B.all)
    test.asy = c(test.asy,list('pval.perm'=pval.perm))
  }
  return(test.asy)
}
#' Within-Group Sums (Ux, Uy)
#'
#' @description
#' Supporting function for \code{\link{RISE}}.
#' Given a symmetric rank/graph matrix \eqn{R}, compute the within-group
#' rank sums for the two samples.
#'
#' @param R Symmetric nonnegative matrix (N x N) with zero diagonal.
#' @param sample1ID Indices for group X.
#' @param sample2ID Indices for group Y.
#' @return Numeric vector \code{c(Ux, Uy)}.
#' @keywords internal
#' @export
r.stat <- function(R, sample1ID, sample2ID){
  Ux = sum(R[sample1ID,sample1ID]);
  Uy = sum(R[sample2ID,sample2ID]);
  return(c(Ux,Uy))
}

#' Asymptotic Covariance of (Ux, Uy) under Permutation Null
#'
#' @description
#' Supporting function for \code{\link{RISE}} and \code{\link{rTests.base}}.
#' Computes the 2x2 asymptotic covariance matrix of \eqn{(U_x, U_y)} under the
#' permutation null distribution.
#'
#' @param R Symmetric nonnegative matrix (N x N) with zero diagonal.
#' @param m Sample size of X.
#' @param n Sample size of Y.
#' @return 2x2 covariance matrix.
#' @keywords internal
#' @export
Cov.asy <- function(R,m,n){
  N = nrow(R)
  Ris = rowSums(R)

  r0 = sum(Ris)/(N*(N-1));
  r1 = sqrt(mean(Ris^2)/(N-1)^2);
  rd = sqrt(sum(R*R)/(N*(N-1)))

  Vd = rd^2 - r0^2
  V1 = r1^2 - r0^2

  Vx = 2*m*n*(m-1)/( (N-2)*(N-3) )*( (n-1)*Vd + 2*(m-2)*(N-1)*V1)
  Vy = 2*m*n*(n-1)/( (N-2)*(N-3) )*( (m-1)*Vd + 2*(n-2)*(N-1)*V1)
  Vxy = 2*m*(m-1)*n*(n-1)/((N-2)*(N-3))*(Vd - 2*(N-1)*V1)

  Cov = matrix(c(Vx,Vxy,Vxy,Vy),nrow=2)
  Cov
}
