% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/RPtest.R
\name{RPtest}
\alias{RPtest}
\title{Goodness of fit tests for potentially high-dimensional linear models}
\usage{
RPtest(x, y, resid_type = c("Lasso", "OLS"), test = c("nonlin", "group",
  "hetero"), x_alt, RPfunction = NULL, B = 49L, rand_gen = rnorm,
  mc.cores = 1L, nfolds = 5L, nperms = 2L, beta_est, resid_only = FALSE,
  output_all = FALSE)
}
\arguments{
\item{x}{Input matrix with \code{nobs} rows, each an observation vector.}

\item{y}{Response vector.}

\item{resid_type}{Type of residuals used for the test (see details below).
Use \code{Lasso} when the null model is high-dimensional; otherwise use
\code{OLS}.}

\item{test}{Type of departure from the linear model to test for (see details
below). Ignored if \code{RPfunction} is given.}

\item{x_alt}{If \code{test} is \code{group}, this gives the set of variables
whose significance we wish to ascertain, after controlling for those in
\code{x}. If \code{RPfunction} is given, it is the input matrix passed to
the function \code{RPfunction}.}

\item{RPfunction}{A residual prediction (RP) function that must permit
calling as \code{RPfunction(x_alt, resid)} where \code{resid} is a numeric
vector with \code{nobs} components. The output must be either a single
number or a numeric vector (in the latter case \code{RPfunction} would
encode a number of RP functions).}

\item{B}{The number of bootstrap samples to use - note the p-value produced
will always be at least 1/B.}

\item{rand_gen}{A function to generate the simulated errors up to an unknown
scale factor. It must permit calling as \code{rand_gen(nobs*B)}. Determines
the form of errors in the null model. The default \code{rnorm} equates to a
null of a (sparse) Gaussian linear model. Setting \code{rand_gen=NULL}
resamples residuals to generate simulated errors and approximates a null of
i.i.d. errors with unknown distribution.}

\item{mc.cores}{The number of cores to use. Will always be 1 in Windows.}

\item{nfolds}{Number of folds to use when performing cross-validation to
obtain \code{beta_est}, the initial estimate of the vector of regression
coefficients, via Lasso estimation.}

\item{nperms}{Number of permutations of the data for which \code{nfolds}
cross-validation is to be performed. Thus in total prediction errors on
\code{nfolds*nperms} folds are averaged over.}

\item{beta_est}{An optional user-supplied estimate.}

\item{resid_only}{If \code{TRUE} only outputs the residuals without applying
an RP function.}

\item{output_all}{In addition to the p-value, gives further output (see Value
below).}
}
\value{
When \code{resid_only=FALSE} and \code{output_all=FALSE}, the output
  is a single p-value. Otherwise, a list with some of the following
  components is returned (\code{resid_only=FALSE} causes the last two
  components to be omitted):
  \describe{
    \item{\code{p-value}}{p-value}
    \item{\code{beta_est}}{estimated vector of regression coefficients
    \code{beta_est}}
    \item{\code{sigma_est}}{set to 1 when \code{resid_type=OLS};
    otherwise the normalised root-RSS derived from
    \code{beta_est} used in generated the simulated errors}
    \item{\code{resid}}{scaled residuals}
    \item{\code{resid_sim}}{simulated scaled residuals}
    \item{\code{test}}{the test statistic(s) - may be a vector if multiple RP
    functions are being used such as when \code{test=group}}
    \item{\code{test_sim}}{a list of simulated test statistics}
  }
}
\description{
Can test for the significance of (potentially large) groups of predictors and
the presence of nonlinearity or heteroscedasticity in the context of both low
and high-dimensional linear models. Outputs a p-value. Also allows for the
calibration of arbitrary goodness of fit tests via specification of
\code{RPfunction}.
}
\details{
The function works by first computing residuals from a regression of
  y on x. Next \code{B} sets of errors generated through \code{rand_gen} are
  added to a signal derived from \code{beta_est} and aritificial residuals
  are computed. The option \code{resid_only=TRUE} then outputs these
  residuals along with the original residuals, scaled to have l_2-norm
  squared equal to \code{nobs}. The residuals in question are OLS residuals
  when \code{resid_type=OLS} (case a - for use when the null hypothesis is
  low-dimensional so the number of columns of \code{x} is smaller than
  \code{nobs-1}), and square-root / scaled Lasso residuals otherwise (case
  b). The options for \code{test} then apply different functions to the
  residuals as described below.
  \describe{
    \item{\code{nonlin}}{In case (a), the test statistic is the RSS (residual
    sum of squares) of a \code{\link[randomForest]{randomForest}} fit from
    regressing the residuals on to \code{x}; case (b) is similar but the OOB
    error is used and the regression is carried out on the equicorrelation set
    rather than all of \code{x}.}
    \item{\code{group}}{\code{x_alt} is first residualised with
    respect to \code{x} by (a) OLS or (b) \code{\link{sparse_proj}}. Then the
    RSS from Lasso fits from regressions of the residuals on to \code{x_alt}
    are used.}
    \item{\code{hetero}}{Uses the RSS from Lasso fits from
    regressions of the squared residuals to the equicorrelation set (b) or all
    of \code{x} (a).}
  }
}
\examples{
# Testing for nonlinearity
set.seed(1)
x <- scale(matrix(runif(100*200), 100, 200))
y <- x[, 1] + x[, 1]^4 + rnorm(nrow(x))
out <- RPtest(x, y, test="nonlin", B=9L, nperms=2, resid_type = "Lasso")

# Testing significance of a group
y <- x[, 1:5] \%*\% rep(1, 5) + x[, 151] + rnorm(nrow(x))
(out <- RPtest(x[, 1:150], y, test="group", x_alt = x[, 151:200], B=9L, nperms=1))

# Testing for heteroscedasticity
x <- scale(matrix(runif(250*100), 250, 100))
hetero_sig <- x[, 1] + x[, 2]
var_vec <- hetero_sig - min(hetero_sig) + 0.01
var_vec <- var_vec / mean(var_vec)
sd_vec <- sqrt(var_vec)
y <- x[, 1:5] \%*\% rep(1, 5) + sd_vec*rnorm(nrow(x))
(out <- RPtest(x, y, test="hetero", B=9L, nperms=1))
}
\references{
Shah, R. D., Buhlmann, P. (2016) \emph{Goodness of fit tests for
  high-dimensional linear models} \url{http://arxiv.org/abs/1511.03334}
}
\seealso{
\code{\link{RPtest_single}} and \code{\link{sqrt_lasso}}
}

