% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/haldensify.R
\name{haldensify}
\alias{haldensify}
\title{Cross-validated HAL Conditional Density Estimation}
\usage{
haldensify(
  A,
  W,
  wts = rep(1, length(A)),
  grid_type = "equal_range",
  n_bins = round(c(0.5, 1, 1.5, 2) * sqrt(length(A))),
  cv_folds = 5L,
  lambda_seq = exp(seq(-1, -13, length = 1000L)),
  smoothness_orders = 0L,
  hal_basis_list = NULL,
  ...
)
}
\arguments{
\item{A}{The \code{numeric} vector observed values.}

\item{W}{A \code{data.frame}, \code{matrix}, or similar giving the values of
baseline covariates (potential confounders) for the observed units. These
make up the conditioning set for the density estimate. For estimation of a
marginal density, specify a constant \code{numeric} vector or \code{NULL}.}

\item{wts}{A \code{numeric} vector of observation-level weights. The default
is to weight all observations equally.}

\item{grid_type}{A \code{character} indicating the strategy to be used in
creating bins along the observed support of \code{A}. For bins of equal
range, use \code{"equal_range"}; consult the documentation of
\code{\link[ggplot2]{cut_interval}} for more information. To ensure each
bin has the same number of observations, use \code{"equal_mass"}; consult
the documentation of \code{\link[ggplot2]{cut_number}} for details. The
default is \code{"equal_range"} since this has been found to provide better
performance in simulation experiments; however, both types may be specified
(i.e., \code{c("equal_range", "equal_mass")}) together, in which case
cross-validation will be used to select the optimal binning strategy.}

\item{n_bins}{This \code{numeric} value indicates the number(s) of bins into
which the support of \code{A} is to be divided. As with \code{grid_type},
multiple values may be specified, in which case cross-validation will be
used to choose the optimal number of bins. The default sets the candidate
choices of the number of bins based on heuristics tested in simulation.}

\item{cv_folds}{A \code{numeric} indicating the number of cross-validation
folds to be used in fitting the sequence of HAL conditional density models.}

\item{lambda_seq}{A \code{numeric} sequence of values of the regularization
parameter of Lasso regression; passed to \code{\link[hal9001]{fit_hal}} via
its argument \code{lambda}, itself passed to \code{\link[glmnet]{glmnet}}.}

\item{smoothness_orders}{A \code{integer} indicating the smoothness of the
HAL basis functions; passed to \code{\link[hal9001]{fit_hal}}. The default
is set to zero, for indicator basis functions.}

\item{hal_basis_list}{A \code{list} consisting of a preconstructed set of
HAL basis functions, as produced by \code{\link[hal9001]{fit_hal}}. The
default of \code{NULL} results in creating such a set of basis functions.
When specified, this is passed directly to the HAL model fitted upon the
augmented (repeated measures) data structure, resulting in a much lowered
computational cost. This is useful, for example, in fitting HAL conditional
density estimates with external cross-validation or bootstrap samples.}

\item{...}{Additional (optional) arguments of \code{\link[hal9001]{fit_hal}}
that may be used to control fitting of the HAL regression model. Possible
choices include \code{use_min}, \code{reduce_basis}, \code{return_lasso},
and \code{return_x_basis}, but this list is not exhaustive. Consult the
documentation of \code{\link[hal9001]{fit_hal}} for complete details.}
}
\value{
Object of class \code{haldensify}, containing a fitted
 \code{hal9001} object; a vector of break points used in binning \code{A}
 over its support \code{W}; sizes of the bins used in each fit; the tuning
 parameters selected by cross-validation; the full sequence (in lambda) of
 HAL models for the CV-selected number of bins and binning strategy; and
 the range of \code{A}.
}
\description{
Cross-validated HAL Conditional Density Estimation
}
\details{
Estimation of the conditional density A|W through using the highly
 adaptive lasso to estimate the conditional hazard of failure in a given
 bin over the support of A. Cross-validation is used to select the optimal
 value of the penalization parameters, based on minimization of the weighted
 log-likelihood loss for a density.
}
\note{
Parallel evaluation of the cross-validation procedure to select tuning
 parameters for density estimation may be invoked via the framework exposed
 in the \pkg{future} ecosystem. Specifically, set \code{\link[future]{plan}}
 for \code{\link[future.apply]{future_mapply}} to be used internally.
}
\examples{
# simulate data: W ~ U[-4, 4] and A|W ~ N(mu = W, sd = 0.5)
set.seed(429153)
n_train <- 50
w <- runif(n_train, -4, 4)
a <- rnorm(n_train, w, 0.5)
# learn relationship A|W using HAL-based density estimation procedure
haldensify_fit <- haldensify(
  A = a, W = w, n_bins = 10L, lambda_seq = exp(seq(-1, -10, length = 100)),
  # the following arguments are passed to hal9001::fit_hal()
  max_degree = 3, reduce_basis = 1 / sqrt(length(a))
)
}
