% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/inTrainingSample.R
\name{inTrainingSample}
\alias{inTrainingSample}
\title{Function to define a learning sample based on balanced sampling}
\usage{
inTrainingSample(
  y,
  propTraining = 2/3,
  classdist = c("balanced", "unbalanced")
)
}
\arguments{
\item{y}{factor with the class labels for the total data set}

\item{propTraining}{proportion of the data that should be in a training set;
the default value is 2/3.}

\item{classdist}{distribution of classes; allows to indicate whether your
distribution 'balanced' or 'unbalanced'. The sampling strategy for each run
is adapted accordingly.}
}
\value{
logical vector indicating for each observation in \code{y} whether
the observation is in the learning sample (\code{TRUE}) or not
(\code{FALSE})
}
\description{
This function takes in a factor with class labels of the total dataset,
draws a sample (balanced with respect to the different levels of the factor)
and returns a logical vector indicating whether the observation is in the
learning sample (\code{TRUE}) or not (\code{FALSE}).
}
\examples{

  ### this example demonstrates the logic of sampling in case of unbalanced distribution of classes
  y <- factor(c(rep("A", 21), rep("B", 80)))
  
  nlcv:::inTrainingSample(y, 2/3, "unbalanced") 
  table(y[nlcv:::inTrainingSample(y, 2/3, "unbalanced")])  # should be 14, 14 (for A, B resp.)
  table(y[!nlcv:::inTrainingSample(y, 2/3, "unbalanced")]) # should be 7, 66  (for A, B resp.) 

}
\author{
Willem Talloen and Tobias Verbeke
}
\keyword{manip}
