% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ltmle.R
\name{ltmle}
\alias{ltmle}
\alias{ltmleMSM}
\title{Longitudinal Targeted Maximum Likelihood Estimation}
\usage{
ltmle(
  data,
  Anodes,
  Cnodes = NULL,
  Lnodes = NULL,
  Ynodes,
  survivalOutcome = NULL,
  Qform = NULL,
  gform = NULL,
  abar,
  rule = NULL,
  gbounds = c(0.01, 1),
  Yrange = NULL,
  deterministic.g.function = NULL,
  stratify = FALSE,
  SL.library = "glm",
  SL.cvControl = list(),
  estimate.time = TRUE,
  gcomp = FALSE,
  iptw.only = FALSE,
  deterministic.Q.function = NULL,
  variance.method = "tmle",
  observation.weights = NULL,
  id = NULL
)

ltmleMSM(
  data,
  Anodes,
  Cnodes = NULL,
  Lnodes = NULL,
  Ynodes,
  survivalOutcome = NULL,
  Qform = NULL,
  gform = NULL,
  gbounds = c(0.01, 1),
  Yrange = NULL,
  deterministic.g.function = NULL,
  SL.library = "glm",
  SL.cvControl = list(),
  regimes,
  working.msm,
  summary.measures,
  final.Ynodes = NULL,
  stratify = FALSE,
  msm.weights = "empirical",
  estimate.time = TRUE,
  gcomp = FALSE,
  iptw.only = FALSE,
  deterministic.Q.function = NULL,
  variance.method = "tmle",
  observation.weights = NULL,
  id = NULL
)
}
\arguments{
\item{data}{data frame following the time-ordering of the nodes. See
'Details'.}

\item{Anodes}{column names or indicies in \code{data} of treatment nodes}

\item{Cnodes}{column names or indicies in \code{data} of censoring nodes}

\item{Lnodes}{column names or indicies in \code{data} of time-dependent
covariate nodes}

\item{Ynodes}{column names or indicies in \code{data} of outcome nodes}

\item{survivalOutcome}{If \code{TRUE}, then Y nodes are indicators of an
event, and if Y at some time point is 1, then all following should be 1.
Required to be \code{TRUE} or \code{FALSE} if outcomes are binary and there
are multiple Ynodes.}

\item{Qform}{character vector of regression formulas for \eqn{Q}. See
'Details'.}

\item{gform}{character vector of regression formulas for \eqn{g} or a
matrix/array of prob(A=1). See 'Details'.}

\item{abar}{binary vector (numAnodes x 1) or matrix (n x numAnodes) of
counterfactual treatment or a list of length 2. See 'Details'.}

\item{rule}{a function to be applied to each row (a named vector) of
\code{data} that returns a numeric vector of length numAnodes. See 'Details'.}

\item{gbounds}{lower and upper bounds on estimated cumulative probabilities
for g-factors. Vector of length 2, order unimportant.}

\item{Yrange}{NULL or a numerical vector where the min and max of
\code{Yrange} specify the range of all Y nodes. See 'Details'.}

\item{deterministic.g.function}{optional information on A and C nodes that
are given deterministically. See 'Details'. Default \code{NULL} indicates no
deterministic links.}

\item{stratify}{if \code{TRUE} stratify on following \code{abar} when
estimating Q and g. If \code{FALSE}, pool over \code{abar}.}

\item{SL.library}{optional character vector of libraries to pass to
\code{\link[SuperLearner:SuperLearner]{SuperLearner}}. \code{NULL} indicates
\link{glm} should be called instead of
\code{\link[SuperLearner:SuperLearner]{SuperLearner}}. '\code{default}'
indicates a standard set of libraries. May be separately specified for
\eqn{Q} and \eqn{g}. See 'Details'.}

\item{SL.cvControl}{optional list to be passed as \code{cvControl} to \code{\link[SuperLearner:SuperLearner]{SuperLearner}}}

\item{estimate.time}{if \code{TRUE}, run an initial estimate using only 50
observations and use this to print a very rough estimate of the total time
to completion. No action if there are fewer than 50 observations.}

\item{gcomp}{if \code{TRUE}, run the maximum likelihood based G-computation
estimate \emph{instead} of TMLE}

\item{iptw.only}{by default (\code{iptw.only = FALSE}), both TMLE and IPTW
are run in \code{ltmle} and \code{ltmleMSM}. If \code{iptw.only = TRUE},
only IPTW is run, which is faster.}

\item{deterministic.Q.function}{optional information on Q given
deterministically. See 'Details'. Default \code{NULL} indicates no
deterministic links.}

\item{variance.method}{Method for estimating variance of TMLE.
One of "ic", "tmle", "iptw". If "tmle", compute both the robust variance
estimate using TMLE and the influence curve based variance estimate (use the
larger of the two). If "iptw", compute both the robust variance
estimate using IPTW and the influence curve based variance estimate (use the
larger of the two). If "ic", only compute the influence curve based
variance estimate. "ic" is fastest, but may be substantially
anti-conservative if there are positivity violations or rare outcomes. "tmle" is
slowest but most robust if there are positivity violations or rare outcomes.
"iptw" is a compromise between speed and robustness.
variance.method="tmle" or "iptw" are not yet available with non-binary outcomes,
gcomp=TRUE, stratify=TRUE, or deterministic.Q.function.}

\item{observation.weights}{observation (sampling) weights. Vector of length
n. If \code{NULL}, assumed to be all 1.}

\item{id}{Household or subject identifiers. Vector of length n or \code{NULL}.
Integer, factor, or character recommended, but any type that can be coerced
to factor will work. \code{NULL} means all distinct ids.}

\item{regimes}{binary array: n x numAnodes x numRegimes of counterfactual
treatment or a list of 'rule' functions}

\item{working.msm}{character formula for the working marginal structural
model}

\item{summary.measures}{array: num.regimes x num.summary.measures x
num.final.Ynodes - measures summarizing the regimes that will be used on the
right hand side of \code{working.msm} (baseline covariates may also be used
in the right hand side of \code{working.msm} and do not need to be included
in \code{summary.measures})}

\item{final.Ynodes}{vector subset of Ynodes - used in MSM to pool over a set
of outcome nodes}

\item{msm.weights}{projection weights for the working MSM. If "empirical",
weight by empirical proportions of rows matching each regime for each
final.Ynode, with duplicate regimes given zero weight. If \code{NULL}, no
weights. Or an array of user-supplied weights with dimensions c(n,
num.regimes, num.final.Ynodes) or c(num.regimes, num.final.Ynodes).}
}
\value{
\code{ltmle} returns an object of class "\code{ltmle}" (unless
\code{abar} or \code{rule} is a list, in which case it returns an object of
class \code{ltmleSummaryMeasures}, which has the same components as
\code{ltmleMSM}.) The function \code{\link{summary}} (i.e.
\code{\link{summary.ltmle}}) can be used to obtain or print a summary of the
results. An object of class "\code{ltmle}" is a list containing the
following components: \item{estimates}{a named vector of length 4 with
elements, each an estimate of \eqn{E[Y_{bar{a}}]}: \itemize{ \item
\code{tmle} - Targeted Maximum Likelihood Estimate [NULL if \code{gcomp} is
\code{TRUE}] \item \code{iptw} - Inverse Probability of Treatment/Censoring
Weighted estimate \item \code{gcomp} - maximum likelihood based
G-computation estimate [NULL if \code{gcomp} is \code{FALSE}] } }
\item{IC}{a list with the following components of Influence Curve values}
\itemize{ \item \code{tmle} - vector of influence curve values for Targeted
Maximum Likelihood Estimate [NULL if \code{gcomp} is \code{TRUE}] \item
\code{iptw} - vector of influence curve values for Inverse Probability of
Treatment/Censoring Weighted estimate \item \code{gcomp} - vector of
influence curve values for Targeted Maximum Likelihood Estimate without
updating [NULL if \code{gcomp} is \code{FALSE}] } \item{cum.g}{cumulative g,
after bounding: for ltmle, n x numACnodes, for ltmleMSM, n x numACnodes x
num.regimes} \item{cum.g.unbounded}{cumulative g, before bounding: for
ltmle, n x numACnodes, for ltmleMSM, n x numACnodes x num.regimes}
\item{cum.g.used}{binary - TRUE if an entry of cum.g was used in the updating
step (note: even if cum.g.used is FALSE, a small value of cum.g.unbounded may
still indicate a positivity problem): for ltmle, n x numACnodes,
for ltmleMSM, n x numACnodes x num.regimes}
\item{call}{the matched call} \item{gcomp}{the \code{gcomp} input}
\item{formulas}{a \code{list} with elements \code{Qform} and \code{gform}}
\item{fit}{a list with the following components} \itemize{ \item \code{g} -
list of length numACnodes - \code{glm} or \code{SuperLearner} (see Details)
return objects from fitting g regressions
\item \code{Q} - list of length numLYnodes - \code{glm} or \code{SuperLearner}
(see Details) return objects from fitting Q regressions
\item \code{Qstar} - list of length numLYnodes - \code{glm} (or numerical
optimization if \code{glm} fails to solve the score equation) return objects
from updating the Q fit }

\code{ltmleMSM} returns an object of class "\code{ltmleMSM}" The function
\code{\link{summary}} (i.e. \code{\link{summary.ltmleMSM}}) can be used to
obtain or print a summary of the results. An object of class
"\code{ltmleMSM}" is a list containing the following components:
\item{beta}{parameter estimates for working.msm using TMLE (GCOMP if
\code{gcomp} input is \code{TRUE})} \item{beta.iptw}{parameter estimates for
working.msm using IPTW} \item{IC}{matrix, n x numBetas - influence curve
values for TMLE (without updating if \code{gcomp} input is \code{TRUE})}
\item{IC.iptw}{matrix, n x numBetas - influence curve values for IPTW}
\item{msm}{object of class glm - the result of fitting the working.msm}
\item{cum.g}{array, n x numACnodes x numRegimes - cumulative g, after
bounding} \item{cum.g.unbounded}{array, n x numACnodes x numRegimes -
cumulative g, before bounding} \item{call}{the matched call}
\item{gcomp}{the \code{gcomp} input} \item{formulas}{a \code{list} with
elements \code{Qform} and \code{gform}}
\item{fit}{a list with the following components}
\itemize{ \item \code{g} - list of length numRegimes of list of length
numACnodes - \code{glm} or \code{SuperLearner} (see Details) return objects from
fitting g regressions \item \code{Q} - list of length numLYnodes -
\code{glm} or \code{SuperLearner} (see Details) return objects from fitting Q
regressions
\item \code{Qstar} - list of length numLYnodes - \code{glm} (or numerical
optimization if \code{glm} fails to solve the score equation) return objects
from updating the Q fit }
}
\description{
\code{ltmle} is Targeted Maximum Likelihood Estimation (TMLE) of
treatment/censoring specific mean outcome for point-treatment and
longitudinal data. \code{ltmleMSM} adds Marginal Structural Models. Both
always provide Inverse Probability of Treatment/Censoring Weighted estimate
(IPTW) as well. Maximum likelihood based G-computation estimate (G-comp) can
be obtained instead of TMLE. \code{ltmle} can be used to calculate additive
treatment effect, risk ratio, and odds ratio.
}
\details{
The estimates returned by \code{ltmle} are of a treatment specific mean,
\eqn{E[Y_{\bar{a}}]}, the mean of the final treatment node, where all
treatment nodes, \eqn{A}, are set to \eqn{\bar{a}} (\code{abar}) and all
censoring nodes \eqn{C} are set to 1 (uncensored). The estimates returned by
\code{ltmleMSM} are similar but are the parameters in a working marginal
structural model.

\code{data} should be a data frame where the order of the columns
corresponds to the time-ordering of the model.  \itemize{ \item in censoring
columns (Cnodes): factor with two levels: "censored" and "uncensored". The
helper function \code{BinaryToCensoring} can be used to create these
factors.  \item in treatment columns (Anodes): 1 = treated, 0 = untreated
(must be binary) \item in event columns (Ynodes): If \code{survivalOutcome}
is \code{TRUE}, then Y nodes are treated as indicators of a one-time event.
See details for \code{survivalOutocme}. If \code{survivalOutcome} is
\code{FALSE}, Y nodes are treated as binary if all values are 0 or 1, and
are treated as continuous otherwise. If Y nodes are continuous, they may be
automatically scaled. See details for \code{Yrange}.  \item time-dependent
covariate columns (Lnodes): can be any numeric data \item Data in
\code{Cnodes}, \code{Anodes}, \code{Lnodes} and \code{Ynodes} are not used
after (to the right of) censoring (or an event when
\code{survivalOutcome==TRUE}) and may be coded as \code{NA} or any other
value.  \item Columns in \code{data} that are before (to the left of) the
first of \code{Cnodes} or \code{Anodes} are treated as baseline variables,
even if they are specified as \code{Lnodes}.  \item After the first of
\code{Cnodes}, \code{Anodes}, \code{Ynodes}, or \code{Lnodes}, every column
must be in one of \code{Cnodes}, \code{Anodes}, \code{Ynodes}, or
\code{Lnodes}.  }

If \code{survivalOutcome} is \code{TRUE}, all Y values are indicators of an
event (e.g. death) at or before the current time, where 1 = event and 0 = no
event. The events in Ynodes must be of the form where once Y jumps to 1, Y
remains 1 at subsequent nodes.

For continuous outcomes, (\code{survivalOutcome==FALSE} and some Y nodes are
not 0 or 1,) Y values are truncated at the minimum and maximum of
\code{Yrange} if specified, and then transformed and scaled to be in [0,1].
That is, transformed to \code{(Y-min(Yrange))/(max(Yrange)-min(Yrange))}. If
\code{Yrange} is \code{NULL}, it is set to the range of all Y nodes. In that
case, Y nodes are only scaled if any values fall outside of [0,1]. For
intervention specific means (\code{ltmle}), parameter estimates are
transformed back based \code{Yrange}.

\code{Qform} should be \code{NULL}, in which case all parent nodes of each L
and Y node will be used as regressors, or a named character vector that can
be coerced to class "\code{\link{formula}}". The length of \code{Qform} must
be equal to \code{length(Lnodes) + length(Ynodes)}** and the names and order
of the formulas must be the same as the names and order of the L and Y nodes
in \code{data}. The left hand side of each formula should be
"\code{Q.kplus1}". If \code{SL.library} is \code{NULL}, \code{glm} will be
called using the elements of \code{Qform}. If \code{SL.library} is
specified, \code{\link[SuperLearner:SuperLearner]{SuperLearner}} will be
called after a design matrix is created using \code{Qform.}

** If there is a "block" of L and Y nodes not separated by A or C nodes,
only one regression is required at the first L/Y node in a block. You can
pass regression formulas for the other L/Y nodes, but they will be ignored
(with a message). See example 5.

\code{gform} should be \code{NULL}, in which case all parent nodes of each L
and Y node will be used as regressors, or a character vector that can be
coerced to class "\code{\link{formula}}", or a matrix/array of Prob(A=1). If
\code{gform} is a character vector, the length of \code{gform} must be equal
to \code{length(Anodes) + length(Cnodes)} and the order of the formulas must
be the same as the order the A and C nodes appear in \code{data}. The left
hand side of each formula should be the name of the Anode or Cnode. If
\code{SL.library} is \code{NULL}, \code{glm} will be called using the
elements of \code{gform}. If \code{SL.library} is specified,
\code{\link[SuperLearner:SuperLearner]{SuperLearner}} will be called after a
design matrix is created using \code{gform}.

In \code{ltmle}, \code{gform} can also be a n x numACnodes matrix where
entry (i, j) is the probability that the ith observation of the jth A/C node
is 1 (if an Anode) or uncensored (if a Cnode), conditional on following abar
up to that node. In \code{ltmleMSM}, \code{gform} can similarly be a n x
numACnodes x numRegimes array, where entry (i, j, k) is the probability that
the ith observation of the jth A/C node is 1 (if an Anode) or uncensored (if
a Cnode), conditional on following regime k up to that node. If \code{gform}
is a matrix/array, \code{deterministic.g.function} will not be used and
should be \code{NULL}.

\code{abar} specifies the counterfactual values of the Anodes, using the
order they appear in \code{data} and should have the same length (if abar is
a vector) or number of columns (if abar is a matrix) as \code{Anodes}.

\code{rule} can be used to specify a dynamic treatment rule. \code{rule} is
a function applied to each row of \code{data} which returns a numeric
vector of the same length as \code{Anodes}.

\code{abar} and \code{rule} cannot both be specified. If one of them if a
list of length 2, additive treatment effect, risk ratio, and odds ratio can
be computed using \code{\link{summary.ltmleEffectMeasures}}.

\code{regimes} can be a binary array: n x numAnodes x numRegimes of
counterfactual treatment or a list of 'rule' functions as described above
for the \code{rule} argument for the \code{ltmle} function

\code{deterministic.g.function} can be a function used to specify model
knowledge about value of Anodes and/or Cnodes that are set
deterministically. For example, it may be the case that once a patient
starts treatment, they always stay on treatment. For details on the form of
the function and examples, see
\code{\link{deterministic.g.function_template}}

\code{deterministic.Q.function} can be a function used to specify model
knowledge about the final event state. For example, it may be the case that
a patient can complete the study at some intermediate time point, in which
case the probability of death is 0 (assuming they have not died already).
For details on the form of the function and examples, see
\code{\link{deterministic.Q.function_template}}

\code{SL.library} may be a character vector of libraries (or '\code{glm}' or
'\code{default}'), in which case these libraries are used to estimate both
\eqn{Q} and \eqn{g} OR a list with two components, \code{Q} and \code{g},
where each is a character vector of libraries (or '\code{glm}' or
'\code{default}').  '\code{glm}' indicates \link{glm} should be called
instead of \code{\link[SuperLearner:SuperLearner]{SuperLearner}} If
\code{SL.library} is the string '\code{default}', \code{SL.library} is set
to \code{list("SL.glm", "SL.stepAIC", "SL.bayesglm", c("SL.glm",
"screen.corP"), c("SL.step", "screen.corP"), c("SL.step.forward",
"screen.corP"), c("SL.stepAIC", "screen.corP"), c("SL.step.interaction",
"screen.corP"), c("SL.bayesglm", "screen.corP")}.  Note that the default set
of libraries consists of main terms models. It may be advisable to include
squared terms, interaction terms, etc in \code{gform} and \code{Qform} or
include libraries that consider non-linear terms.

If \code{attr(SL.library, "return.fit") == TRUE}, then \code{fit$g} and
\code{fit$Q} will return full \code{SuperLearner} or \code{glm} objects.
If not, only a summary matrix will be returned to save memory.

The print method for \code{ltmle} objects only prints the tmle estimates.
}
\section{Functions}{
\itemize{
\item \code{ltmleMSM()}: Longitudinal Targeted Maximum Likelihood Estimation for a Marginal Structural Model

}}
\examples{

# See \url{http://joshuaschwab.github.io/ltmle/} for more examples.

rexpit <- function(x) rbinom(n=length(x), size=1, prob=plogis(x))

# Single time point Example
n <- 1000
W <- rnorm(n)
A <- rexpit(-1 + 2 * W)
Y <- rexpit(W + A)
data <- data.frame(W, A, Y)

result1 <- ltmle(data, Anodes="A", Ynodes="Y", abar=1)
summary(result1)
summary(result1, estimator="iptw")
# MSM Example
# Given data over 3 time points where A switches to 1 once and then stays 1. We want to know
# how death varies as a function of gender, time and an indicator of whether a patient's
# intended regime was to switch before time.
# Note that working.msm includes time and switch.time, which are columns of
# summary.measures; working.msm also includes male, which is ok because it is a baseline
# covariate (it comes before any A/C/L/Y nodes).
data(sampleDataForLtmleMSM)
Anodes <- grep("^A", names(sampleDataForLtmleMSM$data))
Lnodes <- c("CD4_1", "CD4_2")
Ynodes <- grep("^Y", names(sampleDataForLtmleMSM$data))
msm.weights <- matrix(1:12, nrow=4, ncol=3) #just an example (can also use a 200x3x4 array),
                                            #or NULL (for no weights), or "empirical" (the default)

result2 <- ltmleMSM(sampleDataForLtmleMSM$data, Anodes=Anodes, Lnodes=Lnodes, Ynodes=Ynodes,
                   survivalOutcome=TRUE,
                   regimes=sampleDataForLtmleMSM$regimes,
                   summary.measures=sampleDataForLtmleMSM$summary.measures, final.Ynodes=Ynodes,
                   working.msm="Y ~ male + time + I(pmax(time - switch.time, 0))",
                   msm.weights=msm.weights, estimate.time=FALSE)
print(summary(result2))

}
\seealso{
\code{\link{summary.ltmle}}, \code{\link{summary.ltmleMSM}},
\code{\link[SuperLearner:SuperLearner]{SuperLearner}},
\code{\link{deterministic.g.function_template}},
\code{\link{deterministic.Q.function_template}}
}
\author{
Joshua Schwab \email{jschwab77@berkeley.edu}, Samuel Lendle, Maya
Petersen, and Mark van der Laan
}
