% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/train_model.R
\name{predict.forecast_model}
\alias{predict.forecast_model}
\title{Predict on validation datasets or forecast}
\usage{
\method{predict}{forecast_model}(..., prediction_function = list(NULL),
  data)
}
\arguments{
\item{...}{One or more trained models from \code{train_model()}.}

\item{prediction_function}{A list of user-defined prediction functions with length equal to
the number of models supplied in \code{...}. The prediction functions
take 2 required positional arguments--(1) a 'forecast_model' object from \code{train_model()} and (2) a
data.frame of model features from \code{create_lagged_df()}--and return a 1- or 3-column data.frame
of model predictions. If the prediction function returns a 1-column data.frame, point forecasts are assumed.
If the prediction function returns a 3-column data.frame, lower and upper forecast bounds are assumed (the
order and names of the 3 columns does not matter). See the example below for details.}

\item{data}{If \code{data} is a training dataset from \code{create_lagged_df(..., type = "train")}, validation dataset
predictions are returned; else, if \code{data} is a forecasting dataset from \code{create_lagged_df(..., type = "forecast")},
forecasts from horizons 1:h are returned.}
}
\value{
If \code{data = create_lagged_df(..., type = "forecast")}, an S3 object of class 'training_results'. If
\code{data = create_lagged_df(..., type = "forecast")}, an S3 object of class 'forecast_results'.

  \strong{Columns in returned 'training_results' data.frame:}
    \itemize{
      \item \code{model}: User-supplied model name in \code{train_model()}.
      \item \code{model_forecast_horizon}: The direct-forecasting time horizon that the model was trained on.
      \item \code{window_length}: Validation window length measured in dataset rows.
      \item \code{window_number}: Validation dataset number.
      \item \code{valid_indices}: Validation dataset row names from \code{attributes(create_lagged_df())$row_indices}.
      \item \code{date_indices}: If given, validation dataset date indices from \code{attributes(create_lagged_df())$date_indices}.
      \item \code{"groups"}: If given, the user-supplied groups in \code{create_lagged_df()}.
      \item \code{"outcome_name"}: The target being forecasted.
      \item \code{"outcome_name"_pred}: The model predictions.
      \item \code{"outcome_name"_pred_lower}: If given, the lower prediction bounds returned by the user-supplied prediction function.
      \item \code{"outcome_name"_pred_upper}: If given, the upper prediction bounds returned by the user-supplied prediction function.
   }

   \strong{Columns in returned 'forecast_results' data.frame:}
    \itemize{
      \item \code{model}: User-supplied model name in \code{train_model()}.
      \item \code{model_forecast_horizon}: The direct-forecasting time horizon that the model was trained on.
      \item \code{horizon}: Forecast horizons, 1:h, measured in dataset rows.
      \item \code{window_length}: Validation window length measured in dataset rows.
      \item \code{forecast_period}: The forecast period in row indices or dates. The forecast period starts at either \code{attributes(create_lagged_df())$data_stop + 1} for row indices or \code{attributes(create_lagged_df())$data_stop + 1 * frequency} for date indices.
      \item \code{"groups"}: If given, the user-supplied groups in \code{create_lagged_df()}.
      \item \code{"outcome_name"}: The target being forecasted.
      \item \code{"outcome_name"_pred}: The model forecasts.
      \item \code{"outcome_name"_pred_lower}: If given, the lower forecast bounds returned by the user-supplied prediction function.
      \item \code{"outcome_name"_pred_upper}: If given, the upper forecast bounds returned by
      the user-supplied prediction function.
   }
}
\description{
Predict with a 'forecast_model' object from \code{train_model()}. If \code{data = create_lagged_df(..., type = "train")},
predictions are returned for the outer-loop nested cross-validation datasets.
If \code{data} is an object of class 'lagged_df' from \code{create_lagged_df(..., type = "forecast")},
predictions are returned for the horizons specified in \code{create_lagged_df()}.
}
\examples{
# Sampled Seatbelts data from the R package datasets.
data("data_seatbelts", package = "forecastML")

# Example - Training data for 2 horizon-specific models w/ common lags per predictor.
horizons <- c(1, 12)
lookback <- 1:15

data_train <- create_lagged_df(data_seatbelts, type = "train", outcome_col = 1,
                               lookback = lookback, horizon = horizons)

# One custom validation window at the end of the dataset.
windows <- create_windows(data_train, window_start = 181, window_stop = 192)

# User-define model - LASSO
# A user-defined wrapper function for model training that takes the following
# arguments: (1) a horizon-specific data.frame made with create_lagged_df(..., type = "train")
# (e.g., my_lagged_df$horizon_h) and, optionally, (2) any number of additional named arguments
# which are passed as '...' in train_model().
library(glmnet)
model_function <- function(data, my_outcome_col) {

  x <- data[, -(my_outcome_col), drop = FALSE]
  y <- data[, my_outcome_col, drop = FALSE]
  x <- as.matrix(x, ncol = ncol(x))
  y <- as.matrix(y, ncol = ncol(y))

  model <- glmnet::cv.glmnet(x, y, nfolds = 3)
  return(model)
}

# my_outcome_col = 1 is passed in ... but could have been defined in model_function().
model_results <- train_model(data_train, windows, model_name = "LASSO", model_function,
                             my_outcome_col = 1)

# User-defined prediction function - LASSO
# The predict() wrapper takes two positional arguments. First,
# the returned model from the user-defined modeling function (model_function() above).
# Second, a data.frame of predictors--identical to the datasets returned from
# create_lagged_df(..., type = "train"). The function can return a 1- or 3-column data.frame
# with either (a) point forecasts or (b) point forecasts plus lower and upper forecast
# bounds (column order and column names do not matter).
prediction_function <- function(model, data_features) {

  x <- as.matrix(data_features, ncol = ncol(data_features))

  data_pred <- data.frame("y_pred" = predict(model, x, s = "lambda.min"))
  return(data_pred)
}

# Predict on the validation datasets.
data_valid <- predict(model_results, prediction_function = list(prediction_function),
                      data = data_train)

# Forecast.
data_forecast <- create_lagged_df(data_seatbelts, type = "forecast", outcome_col = 1,
                                  lookback = lookback, horizon = horizons)

data_forecasts <- predict(model_results, prediction_function = list(prediction_function),
                          data = data_forecast)
}
