% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/predict.emfrail.R
\name{predict.emfrail}
\alias{predict.emfrail}
\title{Predicted hazard and survival curves from an \code{emfrail} object}
\usage{
\method{predict}{emfrail}(object, lp = c(0), newdata = NULL,
  quantity = c("cumhaz", "survival"), type = c("conditional", "marginal"),
  conf_int = c("regular", "adjusted"), ...)
}
\arguments{
\item{object}{An \code{emfrail} fit object}

\item{lp}{A vector of linear predictor values at which to calculate the curves. Default is 0 (baseline).}

\item{newdata}{A data frame with the same variable names as those that appear in the \code{emfrail} formula, used to calculate the \code{lp} (optional).}

\item{quantity}{Can be \code{"cumhaz"} and/or \code{"survival"}. The quantity to be calculated for the values of \code{lp}.}

\item{type}{Can be \code{"conditional"} and/or \code{"marginal"}. The type of the quantity to be calculated.}

\item{conf_int}{Can be \code{"regular"} and/or \code{"adjusted"}. The type of confidence interval to be calculated.}

\item{...}{Ignored}
}
\value{
A data frame with the column \code{time} and several other columns according to the input.
By default, for each \code{lp} it will give the following columns: \code{cumhaz}, \code{survival},
\code{cumhaz_m}, \code{survival_m} for the cumulative hazard and survival, conditional and marginal.
}
\description{
Predicted hazard and survival curves from an \code{emfrail} object
}
\details{
There are two ways of specifying for which individuals to calculate the cumulative hazard or survival curve.
One way is directly, for certain values of the linear predictor, via \code{lp},
and the second way is via \code{newdata}, a \code{data.frame} where the column names are the same as in the original data.
If \code{newdata} is specified, then the \code{lp} argument is ignored.

The names of the columns in the returned object are as follows: \code{time} represents the unique event time points
from the data set, \code{lp} is the value of the linear predictor (as specified in the input or as calculated from the lines of \code{newdata}). If
\code{newdata} is specified, columns repeating each line of \code{newdata} are also added.
 The two "quantities" that can be returned are
named \code{cumhaz} and \code{survival}. If we denote each quantity with \code{q}, then the columns with the marginal estimates
are named \code{q_m}. The confidence intervals contain the name of the quantity (conditional or marginal) followed by \code{_l} or \code{_r} for
the lower and upper bound. The bounds calculated with the adjusted standard errors have the name of the regular bounds followed by
\code{_a}. For example, the adjusted lower bound for the marginal survival is in the column named \code{survival_m_l_a}.

The \code{emfrail} only gives the Breslow estimates of the  baseline hazard \eqn{\lambda_0(t)} at the
event time points, conditional on the frailty. Let \eqn{\lambda(t)} be the baseline hazard for a linear predictor of interest.
The estimated conditional cumulative hazard is then
\eqn{\Lambda(t) = \sum_{s= 0}^t \lambda(s)}. The variance of \eqn{\Lambda(t)} can be calculated from the (maybe adjusted)
variance-covariance matrix.

The conditional survival is obtained by the usual expression \eqn{S(t) = \exp(-\Lambda(t))}. The marginal survival
is given by
\deqn{\bar S(t) = E \left[\exp(-\Lambda(t)) \right] = \mathcal{L}(\Lambda(t)),}
i.e. the Laplace transform of the frailty distribution calculated in \eqn{\Lambda(t)}.

The marginal hazard is obtained as \deqn{\bar \Lambda(t) = - \log \bar S(t).}

The only standard errors that are available from \code{emfrail} are those for \eqn{\lambda_0(t)}. From this,
standard errors of \eqn{\Lambda(t)} may be calculated. They have the following two issues: (1) the linear predictor is taken as fixed,
i.e. the variability in the estimation of the regression coefficient is not taken into account and (2) the confidence intervals
are based on asymptotic normality and are symmetric, which may lead in some situations to confidence intervals containing negative values.
In this case, the lower bound for the cumulative hazard (or upper bound, for the survival) is truncated at 0 (or 1, for the survival).
}
\examples{
kidney$sex <- ifelse(kidney$sex == 1, "male", "female")
m1 <- emfrail(.data =  kidney,
              .formula = Surv(time, status) ~  sex + age  + cluster(id))

pred <- predict(m1)

names(pred)

# Plot baseline cumulative hazard: note that is for someone aged 0!
plot_pred(m1)

# More realistic:
plot_pred(m1, newdata = data.frame(sex = "female", age = mean(kidney$age)))

# Plot survival
plot_pred(m1,
          newdata = data.frame(sex = "female", age = mean(kidney$age)),
          quantity = "survival", conf_int = "none")


# Plot cumulative hazard with confidence intervals, ggplot2
library(ggplot2)
ggplot(pred, aes(x = time, y = cumhaz)) +
  geom_step() +
  geom_ribbon(aes(ymin = cumhaz_l, ymax = cumhaz_r), alpha = 0.2) +
  geom_ribbon(aes(ymin = cumhaz_l_a, ymax = cumhaz_r_a), alpha = 0.2) +
  ggtitle("Baseline cumulative hazard with confidence intervals")

# For two individuals: with sex 1 and sex 0
pred2 <- predict(m1, newdata = data.frame(sex = c("female", "male"), age = c(44, 44)))
# Plot the conditional & survival of two individuals
ggplot(pred2, aes(x = time, y = survival, group = sex)) +
  geom_step(aes(colour = sex)) + ggtitle("Conditional survival")

ggplot(pred2, aes(x = time, y = survival_m, group = sex)) +
  geom_step(aes(colour = sex)) + ggtitle("Marginal survival")

# Plot the conditional and the marginal survival in the same place
library(dplyr)
library(tidyr)
pred2 \%>\%
  gather(key = variable, value = survival, survival, survival_m) \%>\%
  mutate(variable = ifelse(variable == "survival", "conditional", "marginal")) \%>\%
  ggplot(aes(x = time, y = survival, colour = sex, linetype = variable)) +
  geom_step() + ggtitle("Survival by sex")

# The hazard ratio
hr_conditional <- pred2$cumhaz[pred2$sex == "female"] / pred2$cumhaz[pred2$sex == "male"]
hr_marginal <- pred2$cumhaz_m[pred2$sex == "female"] / pred2$cumhaz_m[pred2$sex == "male"]
time <- pred2$time[pred2$sex == "male"]

plot(time, hr_marginal, type = "s", col = 2, main = "Hazard ratio female vs male")
lines(time, hr_conditional, type = "s")
legend(c("conditional", "marginal"), x = "bottomleft", col = c(1,2), lty = 1)
# The marginal hazard ratio in the case of gamma frailty shrinks towards 1
# With positive stable, this plot would be two parallel lines

# Or easier, in this way:
plot_hr(m1, newdata = data.frame(sex = c("female", "male"), age = c(44, 44)))
}
\seealso{
\code{\link{plot_pred}}, \code{\link{plot_hr}}
}
