
#' @title Test multiple linear relationships between probability vectors in factorial designs
#' @description Perform an optimal transport based HSD test to deal with multiple comparisons simultaneously.
#' @param test a `FDOTT` object, i.e., output of [`FDOTT`].
#' @param weights weight vector of length \eqn{K}. `weights = NULL` means that no weights are used. For `weights = TRUE` the standard weighting is used.
#' @param group.sizes integer vector summing to the number of comparisons \eqn{M}. Used to split the null hypothesis into sub-hypotheses of the
#' specified sizes. The default `group.sizes = TRUE` extracts these sizes from `test`. For `group.sizes = NULL`, each equation is its own group.
#' @returns A `FDOTT_HSD` object containing:
#' \tabular{ll}{
#'  `p.value`      \tab the \eqn{p}-values \cr
#'  `statistic`    \tab the values of the test statistics \cr
#'  `null.samples` \tab samples drawn from the null distribution \cr
#' }
#' @details Let \eqn{H_0^L : L\mu = 0} be the null hypothesis of `test`. In the case of rejection, it is of interest to
#' find out exactly which row-equations are not satisfied with statistical significance. To this end, \eqn{L\mu = 0} can be split into
#' a number of sub-hypotheses which are tested simultaneously via an approach inspired by Tukey's HSD test, see Groppe et al. (2025) for more details.
#' @example examples/hsd.R
#' @references M. Groppe, L. Niemöller, S. Hundrieser, D. Ventzke, A. Blob, S. Köster and A. Munk (2025). Optimal Transport Based Testing in Factorial Design.
#' arXiv preprint. \doi{10.48550/arXiv.2509.13970}.
#' @seealso [`FDOTT`]
#' @export
FDOTT_HSD <- \(test, weights = NULL, group.sizes = TRUE) {
    stopifnot(methods::is(test, "FDOTT"))
    M <- nrow(test$L)

    if (isTRUE(weights)) {
        delta <- limit_coeffs(test$n)$delta
        weights <- 1 / sqrt(c(abs(test$L) %*% delta))
        weights <- weights / sum(weights)
    } else if (isFALSE(weights) || is.null(weights)) {
        weights <- rep(1, M)
    } else {
        stopifnot(is_prob_vec(weights, TRUE) && length(weights) == M)
    }

    if (isTRUE(group.sizes)) {
        group.sizes <- attr(test$L, "size")
        names(group.sizes) <- attr(test$L, "effect")
        if (length(group.sizes) == 1) {
            group.sizes <- NULL
            # better formatting for one-way layout
            if (attr(test$L, "effect") == "=") {
                L <- test$L
                name <- rownames(test$mu)
                i <- apply(L ==  1, 1, which)
                j <- apply(L == -1, 1, which)
                group.sizes <- rep(1, length(i))
                names(group.sizes) <- paste0(name[i], " = ", name[j])
            }
        }
    } else if (isFALSE(group.sizes)) {
        group.sizes <- NULL
    }

    tstat.m <- test$diff * weights
    ls <- sweep(test$null.samples.diff, 1, weights, "*")

    if (is.null(group.sizes)) {
        group.sizes <- rep(1, M)
    } else {
        stopifnot(
            is_num_vec(group.sizes),
            is_wholenumber(group.sizes),
            sum(group.sizes) == M
        )

        idx <- group_idx(group.sizes)
        tstat.m <- sapply(idx, \(i) sum(tstat.m[i]))
        ls <- do.call(rbind, lapply(idx, \(i) colSums(ls[i, , drop = FALSE])))
    }
    ls <- apply(ls, 2, max)

    p <- sapply(tstat.m, \(tstat) mean(ls >= tstat))
    names(p) <- names(group.sizes)
    if (is.null(names(p))) {
        names(p) <- paste0("group", seq_along(p))
    }

    list(
        test         = test,
        p.value      = p,
        statistic    = tstat.m,
        null.samples = ls,
        weights      = weights,
        group.sizes  = group.sizes
    ) |> structure(class = "FDOTT_HSD")
}

#' @export
print.FDOTT_HSD <- \(x, ...) {
    lvls <- x$test$fac.lvls
    weighted <- isFALSE(all.equal(x$weights, rep(1, nrow(x$test$null.samples.diff)), check.attributes = FALSE))
    cat("\n")
    catf("FDOTT_HSD: Optimal Transport Based %sHSD Test in Factorial Design\n", if (weighted) "Weighted " else "")
    cat("\n")
    catf("data: %d-way layout with factor%s %s of size %s and on ground space of size %d\n",
         length(lvls), if (length(lvls) == 1) "" else "s",
         paste0(names(lvls), collapse = ", "), paste0(lvls, collapse = ", "), nrow(x$test$costm))
    if (isTRUE(all.equal(x$group.sizes, attr(x$test$L, "size"), check.attributes = FALSE))) {
        catf("null hypotheses: %s\n", paste(attr(x$test$L, "effect"), paste0("[", attr(x$test$L, "size"), " eqs]"), collapse = ", "))
    } else if (isTRUE(attr(x$test$L, "effect") == "=") && all(x$group.sizes == 1)) {
        catf("null hypotheses: all %d pairwise comparisons between the %d probability vectors\n", nrow(x$test$L), prod(lvls))
    } else {
        catf("null hypotheses: %s\n", paste(attr(x$test$L, "effect"), paste0("[", attr(x$test$L, "size"), " eqs]"), collapse = " & "))
        catf("                 splitted into %d groups of size %s\n", length(x$group.sizes), paste0(x$group.sizes, collapse = ", "))
    }
    cat("p-values:\n")
    print(x$p.value)
    cat("\n")
    invisible(x)
}
