% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cluster.R
\name{pdkMeans}
\alias{pdkMeans}
\title{K-means clustering for HPD matrices}
\usage{
pdkMeans(
  X,
  K,
  metric = "Riemannian",
  m = 1,
  eps = 1e-05,
  max_iter = 100,
  centroids
)
}
\arguments{
\item{X}{a (\eqn{d,d,S})-dimensional array of (\eqn{d,d})-dimensional HPD matrices for \eqn{S}
different subjects. Also accepts a (\eqn{d,d,n,S})-dimensional array, which is understood to be an array of
\eqn{n}-dimensional sequences of (\eqn{d,d})-dimensional HPD matrices for \eqn{S} different subjects.}

\item{K}{the number of clusters, a positive integer larger than 1.}

\item{metric}{the metric that the space of HPD matrices is equipped with. The default choice is \code{"Riemannian"},
but this can also be one of: \code{"logEuclidean"}, \code{"Cholesky"}, \code{"rootEuclidean"} or
\code{"Euclidean"}. Additional details are given below.}

\item{m}{a fuzziness parameter larger or equal to \eqn{1}. If \eqn{m = 1} the cluster assignments are no longer fuzzy,
i.e., the procedure performs hard clustering. Defaults to \code{m = 1}.}

\item{eps}{an optional tolerance parameter determining the stopping criterion. The k-means algorithm
terminates if the intrinsic distance between cluster centers is smaller than \code{eps}, defaults to \code{eps = 1e-05}.}

\item{max_iter}{an optional parameter tuning the maximum number of iterations in the
k-means algorithm, defaults to \code{max_iter = 100}.}

\item{centroids}{an optional (\eqn{d,d,K})- or (\eqn{d,d,n,K})-dimensional array depending on the input array \code{X}
specifying the initial cluster centroids. If not specified, \code{K} initial cluster centroids are randomly sampled without
replacement from the input array \code{X}.}
}
\value{
Returns a list with two components:
\describe{
  \item{cl.assignments }{ an (\eqn{S,K})-dimensional matrix, where the value at position (\eqn{s,k}) in the
  matrix corresponds to the (probabilistic or binary) cluster membership assignment of subject \eqn{s} with respect
  to cluster \eqn{k}.}
  \item{cl.centroids }{ either a (\eqn{d,d,K})- or (\eqn{d,d,n,K})-dimensional array depending on the input array \code{X}
  corresponding respectively to the \code{K} \eqn{(d,d)}- or (\eqn{d,d,n})-dimensional final cluster centroids.
  }
}
}
\description{
\code{pdkMeans} performs (fuzzy) k-means clustering for collections of HPD matrices, such as covariance or
spectral density matrices, based on a number of different metrics in the space of HPD matrices.
}
\details{
The input array \code{X} corresponds to a collection of \eqn{(d,d)}-dimensional HPD matrices
for \eqn{S} different subjects. If the fuzziness parameter satisfies \code{m > 1}, the \eqn{S} subjects are assigned to
\eqn{K} different clusters in a probabilistic fashion according to a fuzzy k-means algorithm as detailed in classical texts,
such as \insertCite{BE81}{pdSpecEst}. If \code{m = 1}, the \eqn{S} subjects are assigned to the \eqn{K} clusters in a non-probabilistic
fashion according to a standard (hard) k-means algorithm. If not specified by the user, the \eqn{K} cluster
centers are initialized by random sampling without replacement from the input array of HPD matrices \code{X}.
The distance measure in the (fuzzy) k-means algorithm is induced by the metric on the space of HPD matrices specified by the user.
By default, the space of HPD matrices is equipped with (i) the affine-invariant Riemannian metric (\code{metric = 'Riemannian'})
as detailed in e.g., \insertCite{B09}{pdSpecEst}[Chapter 6] or \insertCite{PFA05}{pdSpecEst}. Instead, this can also be one of:
(ii) the log-Euclidean metric (\code{metric = 'logEuclidean'}), the Euclidean inner product between matrix logarithms;
(iii) the Cholesky metric (\code{metric = 'Cholesky'}), the Euclidean inner product between Cholesky decompositions; (iv) the
Euclidean metric (\code{metric = 'Euclidean'}); or (v) the root-Euclidean metric (\code{metric = 'rootEuclidean'}). The default
choice of metric (affine-invariant Riemannian) satisfies several useful properties not shared by the other metrics, see e.g.,
\insertCite{C18}{pdSpecEst} for more details. Note that this comes at the cost of increased computation time in comparison to one
of the other metrics.
}
\examples{
## Generate 20 random HPD matrices in 2 groups
m <- function(rescale){
 x <- matrix(complex(real = rescale * rnorm(9), imaginary = rescale * rnorm(9)), nrow = 3)
 t(Conj(x)) \%*\% x
}
X <- array(c(replicate(10, m(0.25)), replicate(10, m(1))), dim = c(3, 3, 20))

## Compute fuzzy k-means cluster assignments
cl <- pdkMeans(X, K = 2, m = 2)$cl.assignments

}
\references{
\insertAllCited{}
}
\seealso{
\code{\link{pdDist}}, \code{\link{pdSpecClust1D}}, \code{\link{pdSpecClust2D}}
}
