% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/GPModel.R
\name{GPModel}
\alias{GPModel}
\title{Create a \code{GPModel} object}
\usage{
GPModel(likelihood = "gaussian", group_data = NULL,
  group_rand_coef_data = NULL, ind_effect_group_rand_coef = NULL,
  drop_intercept_group_rand_effect = NULL, gp_coords = NULL,
  gp_rand_coef_data = NULL, cov_function = "matern", cov_fct_shape = 1.5,
  gp_approx = "none", num_parallel_threads = NULL,
  cov_fct_taper_range = 1, cov_fct_taper_shape = 1, num_neighbors = 20L,
  vecchia_ordering = "random", ind_points_selection = "kmeans++",
  num_ind_points = 500L, cover_tree_radius = 1,
  matrix_inversion_method = "cholesky", seed = 0L, cluster_ids = NULL,
  likelihood_additional_param = NULL, free_raw_data = FALSE,
  vecchia_approx = NULL, vecchia_pred_type = NULL,
  num_neighbors_pred = NULL)
}
\arguments{
\item{likelihood}{A \code{string} specifying the likelihood function (distribution) of the response variable. 
Available options:
\itemize{
\item{ "gaussian" }
\item{ "bernoulli_probit": binary data with Bernoulli likelihood and a probit link function }
\item{ "bernoulli_logit": binary data with Bernoulli likelihood and a logit link function }
\item{ "gamma": gamma distribution with a with log link function }
\item{ "poisson": Poisson distribution with a with log link function }
\item{ "negative_binomial": negative binomial distribution with a with log link function }
\item{ "t": t-distribution (e.g., for robust regression) }
\item{ "t_fix_df": t-distribution with the degrees-of-freedom (df) held fixed and not estimated. 
The df can be set via the \code{likelihood_additional_param} parameter }
\item{ Note: other likelihoods could be implemented upon request }
}}

\item{group_data}{A \code{vector} or \code{matrix} whose columns are categorical grouping variables. 
The elements being group levels defining grouped random effects.
The elements of 'group_data' can be integer, double, or character.
The number of columns corresponds to the number of grouped (intercept) random effects}

\item{group_rand_coef_data}{A \code{vector} or \code{matrix} with numeric covariate data 
for grouped random coefficients}

\item{ind_effect_group_rand_coef}{A \code{vector} with integer indices that 
indicate the corresponding categorical grouping variable (=columns) in 'group_data' for 
every covariate in 'group_rand_coef_data'. Counting starts at 1.
The length of this index vector must equal the number of covariates in 'group_rand_coef_data'.
For instance, c(1,1,2) means that the first two covariates (=first two columns) in 'group_rand_coef_data'
have random coefficients corresponding to the first categorical grouping variable (=first column) in 'group_data',
and the third covariate (=third column) in 'group_rand_coef_data' has a random coefficient
corresponding to the second grouping variable (=second column) in 'group_data'}

\item{drop_intercept_group_rand_effect}{A \code{vector} of type \code{logical} (boolean). 
Indicates whether intercept random effects are dropped (only for random coefficients). 
If drop_intercept_group_rand_effect[k] is TRUE, the intercept random effect number k is dropped / not included. 
Only random effects with random slopes can be dropped.}

\item{gp_coords}{A \code{matrix} with numeric coordinates (= inputs / features) for defining Gaussian processes}

\item{gp_rand_coef_data}{A \code{vector} or \code{matrix} with numeric covariate data for
Gaussian process random coefficients}

\item{cov_function}{A \code{string} specifying the covariance function for the Gaussian process. 
Available options:
\itemize{
\item{"exponential": Exponential covariance function (using the parametrization of Diggle and Ribeiro, 2007) }
\item{"gaussian": Gaussian, aka squared exponential, covariance function (using the parametrization of Diggle and Ribeiro, 2007) }
\item{ "matern": Matern covariance function with the smoothness specified by 
the \code{cov_fct_shape} parameter (using the parametrization of Rasmussen and Williams, 2006) }
\item{"powered_exponential": powered exponential covariance function with the exponent specified by 
the \code{cov_fct_shape} parameter (using the parametrization of Diggle and Ribeiro, 2007) }
\item{ "wendland": Compactly supported Wendland covariance function (using the parametrization of Bevilacqua et al., 2019, AOS) }
\item{ "matern_space_time": Spatio-temporal Matern covariance function with different range parameters for space and time. 
Note that the first column in \code{gp_coords} must correspond to the time dimension }
\item{ "matern_ard": anisotropic Matern covariance function with Automatic Relevance Determination (ARD), 
i.e., with a different range parameter for every coordinate dimension / column of \code{gp_coords} }
\item{ "gaussian_ard": anisotropic Gaussian, aka squared exponential, covariance function with Automatic Relevance Determination (ARD), 
i.e., with a different range parameter for every coordinate dimension / column of \code{gp_coords} }
}}

\item{cov_fct_shape}{A \code{numeric} specifying the shape parameter of the covariance function 
(=smoothness parameter for Matern covariance)  
This parameter is irrelevant for some covariance functions such as the exponential or Gaussian}

\item{gp_approx}{A \code{string} specifying the large data approximation
for Gaussian processes. Available options: 
\itemize{
\item{"none": No approximation }
\item{"vecchia": A Vecchia approximation; see Sigrist (2022, JMLR) for more details }
\item{"tapering": The covariance function is multiplied by 
a compactly supported Wendland correlation function }
\item{"fitc": Fully Independent Training Conditional approximation aka 
modified predictive process approximation; see Gyger, Furrer, and Sigrist (2024) for more details }
\item{"full_scale_tapering": A full scale approximation combining an 
inducing point / predictive process approximation with tapering on the residual process; 
see Gyger, Furrer, and Sigrist (2024) for more details }
\item{"vecchia_latent": similar as "vecchia" but a Vecchia approximation is applied to the latent Gaussian process 
for likelihood == "gaussian". For likelihood != "gaussian", "vecchia" and "vecchia_latent" are equivalent }
}}

\item{num_parallel_threads}{An \code{integer} specifying the number of parallel threads for OMP. 
If num_parallel_threads = NULL, all available threads are used}

\item{cov_fct_taper_range}{A \code{numeric} specifying the range parameter 
of the Wendland covariance function and Wendland correlation taper function. 
We follow the notation of Bevilacqua et al. (2019, AOS)}

\item{cov_fct_taper_shape}{A \code{numeric} specifying the shape (=smoothness) parameter 
of the Wendland covariance function and Wendland correlation taper function. 
We follow the notation of Bevilacqua et al. (2019, AOS)}

\item{num_neighbors}{An \code{integer} specifying the number of neighbors for 
the Vecchia approximation. Note: for prediction, the number of neighbors can 
be set through the 'num_neighbors_pred' parameter in the 'set_prediction_data'
function. By default, num_neighbors_pred = 2 * num_neighbors. Further, 
the type of Vecchia approximation used for making predictions is set through  
the 'vecchia_pred_type' parameter in the 'set_prediction_data' function}

\item{vecchia_ordering}{A \code{string} specifying the ordering used in 
the Vecchia approximation. Available options:
\itemize{
\item{"none": the default ordering in the data is used }
\item{"random": a random ordering }
\item{"time": ordering accorrding to time (only for space-time models) }
\item{"time_random_space": ordering according to time and randomly for all 
spatial points with the same time points (only for space-time models) }
}}

\item{ind_points_selection}{A \code{string} specifying the method for choosing inducing points
Available options:
\itemize{
\item{"kmeans++: the k-means++ algorithm }
\item{"cover_tree": the cover tree algorithm }
\item{"random": random selection from data points }
}}

\item{num_ind_points}{An \code{integer} specifying the number of inducing 
points / knots for, e.g., a predictive process approximation}

\item{cover_tree_radius}{A \code{numeric} specifying the radius (= "spatial resolution") 
for the cover tree algorithm}

\item{matrix_inversion_method}{A \code{string} specifying the method used for inverting covariance matrices. 
Available options:
\itemize{
\item{"cholesky": Cholesky factorization }
\item{"iterative": iterative methods. A combination of conjugate gradient, Lanczos algorithm, and other methods. 

This is currently only supported for the following cases: 
\itemize{
\item{likelihood != "gaussian" and gp_approx == "vecchia" (non-Gaussian likelihoods with a Vecchia-Laplace approximation) }
\item{likelihood == "gaussian" and gp_approx == "full_scale_tapering" (Gaussian likelihood with a full-scale tapering approximation) }
}
}
}}

\item{seed}{An \code{integer} specifying the seed used for model creation 
(e.g., random ordering in Vecchia approximation)}

\item{cluster_ids}{A \code{vector} with elements indicating independent realizations of 
random effects / Gaussian processes (same values = same process realization).
The elements of 'cluster_ids' can be integer, double, or character.}

\item{likelihood_additional_param}{A \code{numeric} specifying an additional parameter for the \code{likelihood} 
which cannot be estimated for this \code{likelihood} (e.g., degrees of freedom for \code{likelihood = "t_fix_df"}). 
This is not to be confused with any auxiliary parameters that can be estimated and accessed through 
the function \code{get_aux_pars} after estimation.
Note that this \code{likelihood_additional_param} parameter is irrelevant for many likelihoods.
If \code{likelihood_additional_param = NULL}, the following internal default values are used:
\itemize{
\item{ df = 2 for likelihood = "t_fix_df"}
}}

\item{free_raw_data}{A \code{boolean}. If TRUE, the data (groups, coordinates, covariate data for random coefficients) 
is freed in R after initialization}

\item{vecchia_approx}{Discontinued. Use the argument \code{gp_approx} instead}

\item{vecchia_pred_type}{A \code{string} specifying the type of Vecchia approximation used for making predictions.
This is discontinued here. Use the function 'set_prediction_data' to specify this}

\item{num_neighbors_pred}{an \code{integer} specifying the number of neighbors for making predictions.
This is discontinued here. Use the function 'set_prediction_data' to specify this}
}
\value{
A \code{GPModel} containing ontains a Gaussian process and / or mixed effects model with grouped random effects
}
\description{
Create a \code{GPModel} which contains a Gaussian process and / or mixed effects model with grouped random effects
}
\examples{
# See https://github.com/fabsig/GPBoost/tree/master/R-package for more examples

data(GPBoost_data, package = "gpboost")

#--------------------Grouped random effects model: single-level random effect----------------
gp_model <- GPModel(group_data = group_data[,1], likelihood="gaussian")

#--------------------Gaussian process model----------------
gp_model <- GPModel(gp_coords = coords, cov_function = "matern", cov_fct_shape = 1.5,
                    likelihood="gaussian")

#--------------------Combine Gaussian process with grouped random effects----------------
gp_model <- GPModel(group_data = group_data,
                    gp_coords = coords, cov_function = "matern", cov_fct_shape = 1.5,
                    likelihood="gaussian")
}
\author{
Fabio Sigrist
}
