% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/statistical.R
\name{statistical}
\alias{statistical}
\alias{statistical.default}
\alias{statistical.formula}
\title{Statistical meta-features}
\usage{
statistical(...)

\method{statistical}{default}(x, y, features = "all", summary = c("mean",
  "sd"), by.class = FALSE, transform = TRUE, ...)

\method{statistical}{formula}(formula, data, features = "all",
  summary = c("mean", "sd"), by.class = FALSE, transform = TRUE, ...)
}
\arguments{
\item{...}{Further arguments passed to the summarization functions.}

\item{x}{A data.frame contained only the input attributes.}

\item{y}{A factor response vector with one label for each row/component of x.}

\item{features}{A list of features names or \code{"all"} to include all them.
The details section describes the valid values for this group.}

\item{summary}{A list of summarization functions or empty for all values. See
\link{post.processing} method to more information. (Default: 
\code{c("mean", "sd")})}

\item{by.class}{A logical value indicating if the meta-features must be
computed for each group of samples belonging to different output classes.
(Default: FALSE)}

\item{transform}{A logical value indicating if the categorical attributes
should be transformed. If \code{FALSE} they will be ignored. (Default: 
\code{TRUE})}

\item{formula}{A formula to define the class column.}

\item{data}{A data.frame dataset contained the input attributes and class
The details section describes the valid values for this group.}
}
\value{
A list named by the requested meta-features.
}
\description{
Statistical meta-features are the standard statistical measures to describe
the numerical properties of a distribution of data. As it requires only
numerical attributes, the categorical data are transformed to numerical.
}
\details{
The following features are allowed for this method:
 \describe{
   \item{"canCor"}{Canonical correlations between the predictive attributes 
   and the class (multi-valued).}
   \item{"gravity"}{Center of gravity, which is the distance between the 
   instance in the center of the majority class and the instance-center of 
   the minority class.}
   \item{"cor"}{Absolute attributes correlation, which measure the 
   correlation between each pair of the numeric attributes in the dataset 
   (multi-valued). This measure accepts an extra argument called 
   \code{method = c("pearson", "kendall", "spearman")}. See 
   \code{\link[stats]{cor}} for more details.}
   \item{"cov"}{Absolute attributes covariance, which measure the covariance 
   between each pair of the numeric attributes in the dataset 
   (multi-valued).}
   \item{"nrDisc"}{Number of the discriminant functions.}
   \item{"eigenvalues"}{Eigenvalues of the covariance matrix (multi-valued).}
   \item{"gMean"}{Geometric mean of attributes (multi-valued).}
   \item{"hMean"}{Harmonic mean of attributes (multi-valued).}
   \item{"iqRange"}{Interquartile range of attributes (multi-valued).}
   \item{"kurtosis"}{Kurtosis of attributes (multi-valued).}
   \item{"mad"}{Median absolute deviation of attributes (multi-valued).}
   \item{"max"}{Maximum value of attributes (multi-valued).}
   \item{"mean"}{Mean value of attributes (multi-valued).}
   \item{"median"}{Median value of attributes (multi-valued).}
   \item{"min"}{Minimum value of attributes (multi-valued).}
   \item{"nrCorAttr"}{Number of attributes pairs with high correlation 
   (multi-valued when \code{by.class=TRUE}).}
   \item{"nrNorm"}{Number of attributes with normal distribution. The 
   Shapiro-Wilk Normality Test is used to assess if an attribute is or not is
   normally distributed (multi-valued only when \code{by.class=TRUE}).}
   \item{"nrOutliers"}{Number of attributes with outliers values. The 
   Turkey's boxplot algorithm is used to compute if an attributes has or does 
   not have outliers (multi-valued only when \code{by.class=TRUE}).}
   \item{"range"}{Range of Attributes (multi-valued).}
   \item{"sd"}{Standard deviation of the attributes (multi-valued).}
   \item{"sdRatio"}{Statistic test for homogeneity of covariances.}
   \item{"skewness"}{Skewness of attributes (multi-valued).}
   \item{"sparsity"}{Attributes sparsity, which represents the degree of 
   discreetness of each attribute in the dataset (multi-valued).}
   \item{"tMean"}{Trimmed mean of attributes (multi-valued). It is the 
   arithmetic mean excluding the 20\% of the lowest and highest instances.}
   \item{"var"}{Attributes variance (multi-valued).}
   \item{"wLambda"}{Wilks Lambda.}
 }
 This method uses simple binarization to transform the categorical attributes
 when \code{transform=TRUE}.
}
\examples{
## Extract all meta-features
statistical(Species ~ ., iris)

## Extract some meta-features
statistical(iris[1:4], iris[5], c("cor", "nrNorm"))

## Extract all meta-features without summarize the results
statistical(Species ~ ., iris, summary=c())

## Use another summarization function
statistical(Species ~ ., iris, summary=c("min", "median", "max"))

## Extract statistical measures using by.class approach
statistical(Species ~ ., iris, by.class=TRUE)

## Do not transform the data (using only categorical attributes)
statistical(Species ~ ., iris, transform=FALSE)
}
\references{
Ciro Castiello, Giovanna Castellano, and Anna M. Fanelli. Meta-data: 
 Characterization of input features for meta-learning. In 2nd International 
 Conference on Modeling Decisions for Artificial Intelligence (MDAI), 
 pages 457 - 468, 2005.

 Shawkat Ali, and Kate A. Smith. On learning algorithm selection for 
 classification. Applied Soft Computing, volume 6, pages 119 - 138, 2006.
}
\seealso{
Other meta-features: \code{\link{general}},
  \code{\link{infotheo}}, \code{\link{landmarking}},
  \code{\link{model.based}}
}
