% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/fusedTree.R
\name{PenOpt}
\alias{PenOpt}
\title{Tuning of the penalty parameters of fusedTree using cross-validation}
\usage{
PenOpt(
  Tree,
  X,
  Y,
  Z,
  model = NULL,
  lambdaInit = 10,
  alphaInit = 10,
  folds = CVfoldsTree(Y = Y, Tree = Tree, Z = Z, model = model),
  loss = "loglik",
  multistart = FALSE,
  maxIter = 30,
  LinVars = FALSE
)
}
\arguments{
\item{Tree}{The fitted tree. Currently this should be a tree fitted using
\code{rpart}. Trees fitted by other R packages (e.g., \code{partykit}) may
be allowed in the future.}

\item{X}{The original omics data matrix. Has dimensions (sample size × number
of omics variables). Should be a matrix.}

\item{Y}{The response; should be either numeric, binary (encoded by 0 and 1),
or a survival object created by \code{Surv()} from the \code{survival} package.
Only right-censored survival data is allowed.}

\item{Z}{The original clinical data matrix, which was used to fit the tree.
Should be a \code{data.frame}.}

\item{model}{Character. Specifies the outcome model. One of \code{"linear"},
\code{"logistic"}, or \code{"cox"}.}

\item{lambdaInit}{Numeric. Initial value for the standard ridge (L2) penalty
\code{lambda}. Must be greater than zero. Defaults to 10.}

\item{alphaInit}{Numeric. Initial value for the fusion penalty \code{alpha}.
If set to 0, fusion is omitted and only \code{lambda} is tuned. Must be zero
or greater. Defaults to 10.}

\item{folds}{List. Each element contains the indices of the test samples for
a fold. It is advisable to balance the samples with respect to the outcome
(for binary and survival models) and the tree structure. If not provided,
folds are generated internally.}

\item{loss}{Character. The loss function to optimize in cross-validation.
For binary and survival outcomes, only \code{"loglik"} (cross-validated
likelihood) is supported. For continuous outcomes, an alternative is
\code{"sos"} (sum of squares loss). Defaults to \code{"loglik"}.}

\item{multistart}{Logical. Whether to initialize with different starting values when
optimizing the cross-validated likelihood. Can help with stability when both
\code{lambda} and \code{alpha} are tuned, at the cost of longer runtime.
Defaults to \code{FALSE}.}

\item{maxIter}{Integer. Maximum number of iterations for the IRLS (iterative
reweighted least squares) algorithm. Used only for logistic and Cox models.
Defaults to 30.}

\item{LinVars}{Logical. Whether to include continuous clinical variables
linearly in the model (in addition to the tree structure). Can be helpful
since trees may not capture linear effects well. Defaults to \code{TRUE}.}
}
\value{
A numeric vector with the tuned values of the penalties:
\itemize{
\item \code{lambda}: standard ridge (L2) penalty.
\item \code{alpha}: fusion penalty (only if \code{alphaInit > 0}).
}
If \code{alphaInit = 0}, only the tuned \code{lambda} is returned.
}
\description{
Tuning is conducted by optimizing the cross-validated likelihood.
Users can either include the fusion penalty (by specifying \code{alphaInit > 0}),
or omit the fusion penalty (by specifying \code{alphaInit = 0}). If
\code{alphaInit = 0}, only the standard ridge penalty \code{lambda} is tuned.
Note that \code{Dat_Tree()} is called internally so please provide the
original data as input arguments.
}
\details{
The cross-validated likelihood is optimized using the \code{Nelder-Mead}
method from \code{stats::optim()}. When tuning both \code{lambda} and
\code{alpha}, the objective function can be noisy. Setting
\code{multistart = TRUE} performs optimization from several starting values
to improve robustness. This is only applicable when \code{alphaInit > 0}.
}
\examples{
p = 5 # number of omics variables (low for illustration)
p_Clin = 5 # number of clinical variables
N = 100 # sample size
# simulate from Friedman-like function
g <- function(z) {
  15 * sin(pi * z[,1] * z[,2]) + 10 * (z[,3] - 0.5)^2 + 2 * exp(z[,4]) + 2 * z[,5]
}
set.seed(11)
Z <- as.data.frame(matrix(runif(N * p_Clin), nrow = N))
X <- matrix(rnorm(N * p), nrow = N)            # omics data
betas <- c(1,-1,3,4,2)                         # omics effects
Y <- g(Z) + X \%*\% betas + rnorm(N)             # continuous outcome
Y <- as.vector(Y)
dat = cbind.data.frame(Y, Z) #set-up data correctly for rpart
rp <- rpart::rpart(Y ~ ., data = dat,
                   control = rpart::rpart.control(xval = 5, minbucket = 10),
                   model = TRUE)
cp = rp$cptable[,1][which.min(rp$cptable[,4])] # best model according to pruning
Treefit <- rpart::prune(rp, cp = cp)
plot(Treefit)
folds <- CVfoldsTree(Y = Y, Tree = Treefit, Z = Z, model = "linear")
optPenalties <- PenOpt(Tree = Treefit, X = X, Y = Y, Z = Z,
                       model = "linear", lambdaInit = 10, alphaInit = 10,
                       loss = "loglik",
                       LinVars = FALSE,
                       folds = folds, multistart = FALSE)
optPenalties
}
\references{
\CRANpkg{porridge}
}
