% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/miceRanger.R
\name{miceRanger}
\alias{miceRanger}
\title{miceRanger: Fast Imputation with Random Forests}
\usage{
miceRanger(
  data,
  m = 5,
  maxiter = 5,
  vars,
  valueSelector = c("meanMatch", "value"),
  meanMatchCandidates = pmax(round(nrow(data) * 0.01), 5),
  returnModels = FALSE,
  parallel = FALSE,
  verbose = TRUE,
  ...
)
}
\arguments{
\item{data}{The data to be imputed.}

\item{m}{The number of datasets to produce.}

\item{maxiter}{The number of iterations to run for each dataset.}

\item{vars}{Specifies which and how variables should be imputed. Can be specified in 3 different ways:
\itemize{
  \item {<missing>} If not provided, all columns will be imputed using all columns. If
  a column contains no missing values, it will still be used as a feature to impute missing columns.
  \item {<character vector>} If a character vector of column names is passed, these columns will
  be imputed using all available columns in the dataset. The order of this vector will determine the 
  order in which the variables are imputed.
 \item {<named list of character vectors>} Predictors can be specified for each variable with a named list. 
 List names are the variables to impute. Elements in the vectors should be features used to 
 impute that variable. The order of this list will determine the order in which the variables are imputed.
}}

\item{valueSelector}{How to select the value to be imputed from the model predictions. 
Can be "meanMatching", "value", or a named vector containing a mixture of those values.
If a named vector is passed, the names must equal the variables to be imputed specified in \code{vars}.}

\item{meanMatchCandidates}{Specifies the number of candidate values which are selected from in the 
mean matching algorithm. Can be either specified as an integer or a named integer vector for different 
values by variable. If a named integer vector is passed, the names of the vector must contain at a 
minimum the names of the numeric variables imputed using \code{valueSelector = "meanMatch"}.}

\item{returnModels}{Logical. Should the final model for each variable be returned? Set to \code{TRUE}
to use the \code{impute} function, which allows imputing new samples without having to run \code{miceRanger} again.
Setting to TRUE can cause the returned \code{miceDefs} object to take up a lot of memory. Use only if
you plan on using the \code{impute} function.}

\item{parallel}{Should the process run in parallel? Usually not necessary. This process will 
take advantage of any cluster set up when \code{miceRanger} is called.}

\item{verbose}{should progress be printed?}

\item{...}{other parameters passed to \code{ranger()} to control forest growth.}
}
\value{
a miceDefs object, containing the following:
\item{callParams}{The parameters of the object.}
\item{data}{The original data provided by the user.}
\item{naWhere}{Logical index of missing data, having the same dimensions as \code{data}.}
\item{missingCounts}{The number of missing values for each variable}
\item{rawClasses}{The original classes provided in \code{data}}
\item{newClasses}{The new classes of the returned data.}
\item{allImps}{The imputations of all variables at each iteration, for each dataset.}
\item{allImport}{The variable importance metrics at each iteration, for each dataset.}
\item{allError}{The OOB model error for all variables at each iteration, for each dataset.}
\item{finalImps}{The final imputations for each dataset.}
\item{finalImport}{The final variable importance metrics for each dataset.}
\item{finalError}{The final model error for each variable in every dataset.}
\item{finalModels}{Only returned if \code{returnModels = TRUE}. A list of \code{ranger} random forests for each dataset/variable.}
\item{imputationTime}{The total time in seconds taken to create the imputations for the 
  specified datasets and iterations. Does not include any setup time.}
}
\description{
Performs multiple imputation by chained random forests.
Returns a miceDefs object, which contains information about the imputation process.
}
\section{Vignettes}{


It is highly recommended to visit the \href{https://github.com/farrellday/miceRanger}{GitHub README} 
for a thorough walkthrough of miceRanger's capabilities, as well as performance benchmarks.

Several vignettes are also available on \href{https://cran.r-project.org/package=miceRanger}{miceRanger's listing}
on the CRAN website.
\enumerate{
  \item \href{https://cran.r-project.org/package=miceRanger/vignettes/miceAlgorithm.html}{The MICE Algorithm}
  \item \href{https://cran.r-project.org/package=miceRanger/vignettes/usingMiceRanger.html}{Imputing Missing Data with miceRanger}
  \item \href{https://cran.r-project.org/package=miceRanger/vignettes/diagnosticPlotting.html}{Diagnostic Plotting}
}
}

\examples{
#################
## Simple Example

data(iris)
ampIris <- amputeData(iris)

miceObj <- miceRanger(
    ampIris
  , m = 1
  , maxiter = 1
  , verbose=FALSE
  , num.threads = 1
  , num.trees=5
)

\donttest{
##################
## Run in parallel

data(iris)
ampIris <- amputeData(iris)

library(doParallel)
cl <- makeCluster(2)
registerDoParallel(cl)

# Perform mice 
miceObjPar <- miceRanger(
    ampIris
  , m = 2
  , maxiter = 2
  , parallel = TRUE
  , verbose = FALSE
)
stopCluster(cl)
registerDoSEQ()


############################
## Complex Imputation Schema

data(iris)
ampIris <- amputeData(iris)

# Define variables to impute, as well as their predictors
v <- list(
  Sepal.Width = c("Sepal.Length","Petal.Width","Species")
  , Sepal.Length = c("Sepal.Width","Petal.Width")
  , Species = c("Sepal.Width")
)

# Specify mean matching for certain variables.
vs <- c(
  Sepal.Width = "meanMatch"
  , Sepal.Length = "value"
  , Species = "meanMatch"
)

# Different mean matching candidates per variable.
mmc <- c(
  Sepal.Width = 4
  , Species = 10
)

miceObjCustom <- miceRanger(
    ampIris
  , m = 1
  , maxiter = 1
  , vars = v
  , valueSelector = vs
  , meanMatchCandidates = mmc
  , verbose=FALSE
)
}
}
