#' @title Cross-validate time series samples
#' @name sits_kfold_validate
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description Splits the set of time series into training and validation and
#' perform k-fold cross-validation.
#' Cross-validation is a technique for assessing how the results
#' of a statistical analysis will generalize to an independent data set.
#' It is mainly used in settings where the goal is prediction,
#' and one wants to estimate how accurately a predictive model will perform.
#' One round of cross-validation involves partitioning a sample of data
#' into complementary subsets, performing the analysis on one subset
#' (called the training set), and validating the analysis on the other subset
#' (called the validation set or testing set).
#'
#' The k-fold cross validation method involves splitting the dataset
#' into k-subsets. For each subset is held out while the model is trained
#' on all other subsets. This process is completed until accuracy
#' is determine for each instance in the dataset, and an overall
#' accuracy estimate is provided.
#'
#' This function returns the confusion matrix, and Kappa values.
#'
#' @param samples            Time series.
#' @param folds              Number of partitions to create.
#' @param ml_method          Machine learning method.
#' @param multicores         Number of cores to process in parallel.
#'
#' @return A \code{caret::confusionMatrix} object to be used for
#'         validation assessment.
#' @note
#' Please refer to the sits documentation available in
#' <https://e-sensing.github.io/sitsbook/> for detailed examples.
#'
#' @examples
#' if (sits_run_examples()) {
#'     # A dataset containing a tibble with time series samples
#'     # for the Mato Grosso state in Brasil
#'     # create a list to store the results
#'     results <- list()
#'
#'     # accuracy assessment lightTAE
#'     acc_ltae <- sits_kfold_validate(
#'         samples_modis_ndvi,
#'         folds = 5,
#'         ml_method = sits_lighttae()
#'     )
#'     # use a name
#'     acc_ltae$name <- "LightTAE"
#'     # put the result in a list
#'     results[[length(results) + 1]] <- acc_ltae
#'
#'     # Machine Learning - Random Forests
#'     acc_rf <- sits_kfold_validate(
#'         samples_modis_ndvi,
#'         folds = 5,
#'         ml_method = sits_rfor()
#'     )
#'     acc_rf$name <- "RandomForests"
#'     # put the result in a list
#'     results[[length(results) + 1]] <- acc_rf
#'     # save to xlsx file
#'     sits_to_xlsx(
#'         results,
#'         file = tempfile("accuracy_mato_grosso_dl_", fileext = ".xlsx")
#'     )
#' }
#'
#' @export
sits_kfold_validate <- function(samples,
                                folds = 5,
                                ml_method = sits_rfor(),
                                multicores = 2) {

    # set caller to show in errors
    .check_set_caller("sits_kfold_validate")

    # require package
    .check_require_packages("caret")

    # pre-condition
    .check_that(
        inherits(ml_method, "function"),
        local_msg = "ml_method is not a valid sits method",
        msg = "invalid ml_method parameter"
    )

    # pre-condition
    .check_multicores(multicores)

    # For now, torch models does not support multicores in Windows
    if (multicores > 1 && .Platform$OS.type == "windows" &&
        "optimizer" %in% ls(environment(ml_method))) {
        multicores <- 1
        if (.check_warnings())
            warning("sits_kfold_validate() works only with 1 core in Windows OS.",
                call. = FALSE, immediate. = TRUE
            )
    }

    # Get labels from samples
    labels <- .sits_labels(samples)
    # Create numeric labels vector
    code_labels <- seq_along(labels)
    names(code_labels) <- labels

    # Is the data labelled?
    .check_that(
        x = !("NoClass" %in% labels),
        msg = "requires labelled set of time series"
    )

    # start parallel process
    multicores <- min(multicores, folds)

    .sits_parallel_start(workers = multicores, log = FALSE)
    on.exit(.sits_parallel_stop())

    # Create partitions different splits of the input data
    samples <- .create_folds(samples, folds = folds)
    # Do parallel process
    conf_lst <- .sits_parallel_map(seq_len(folds), function(k) {
        # Split data into training and test data sets
        data_train <- samples[samples$folds != k, ]
        data_test <- samples[samples$folds == k, ]
        # Create a machine learning model
        ml_model <- sits_train(samples = data_train, ml_method = ml_method)
        # Convert samples time series in predictors and preprocess data
        pred_test <- .predictors(samples = data_test, ml_model = ml_model)
        # Get predictors features to classify
        values <- .pred_features(pred_test)
        # Classify the test data
        values <- ml_model(values)
        # Extract classified labels (majority probability)
        values <- labels[C_label_max_prob(as.matrix(values))]
        # Removes 'ml_model' variable
        remove(ml_model)
        return(list(pred = values, ref = .pred_references(pred_test)))
    }, n_retries = 0, progress = FALSE)

    pred <- unlist(lapply(conf_lst, function(x) x$pred))
    ref <- unlist(lapply(conf_lst, function(x) x$ref))

    # call caret to provide assessment
    unique_ref <- unique(ref)
    pred_fac <- factor(pred, levels = unique_ref)
    ref_fac <- factor(ref, levels = unique_ref)

    # call caret package to the classification statistics
    acc <- caret::confusionMatrix(pred_fac, ref_fac)

    class(acc) <- c("sits_accuracy", class(acc))

    return(acc)
}
#' @title Validate time series samples
#' @name sits_validate
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description
#' One round of cross-validation involves partitioning a sample of data
#' into complementary subsets, performing the analysis on one subset
#' (called the training set), and validating the analysis on the other subset
#' (called the validation set or testing set).
#'
#' The function takes two arguments: a set of time series
#' with a machine learning model and another set with validation samples.
#' If the validation sample set is not provided,
#' The sample dataset is split into two parts, as defined by the parameter
#' validation_split. The accuracy is determined by the result of
#' the validation test set.
#'
#' This function returns the confusion matrix, and Kappa values.
#'
#' @param samples            Time series set to be validated.
#' @param samples_validation Time series set used for validation.
#' @param validation_split   Percent of original time series set to be used
#'                           for validation (if samples_validation is NULL)
#' @param ml_method          Machine learning method.
#'
#' @return A \code{caret::confusionMatrix} object to be used for
#'         validation assessment.
#'
#' @examples
#' if (sits_run_examples()){
#'    conf_matrix <- sits_validate(cerrado_2classes)
#' }
#' @export
sits_validate <- function(samples,
                          samples_validation = NULL,
                          validation_split = 0.2,
                          ml_method = sits_rfor()) {

    # set caller to show in errors
    .check_set_caller("sits_validate")

    # require package
    .check_require_packages("caret")

    # pre-condition
    .check_that(
        inherits(ml_method, "function"),
        local_msg = "ml_method is not a valid sits method",
        msg = "invalid ml_method parameter"
    )

    # is the data labelled?
    .check_samples_train(samples)

    if (is.null(samples_validation)) {
        samples <- .tibble_samples_split(
            samples = samples,
            validation_split = validation_split
        )
        samples_validation <- dplyr::filter(samples, !.data[["train"]])
        samples <- dplyr::filter(samples, .data[["train"]])
    }

    # create a machine learning model
    ml_model <- sits_train(samples = samples, ml_method = ml_method)
    # Convert samples time series in predictors and preprocess data
    predictors <- .predictors(samples = samples_validation, ml_model = ml_model)
    # Get predictors features to classify
    values <- .pred_features(predictors)
    # Classify
    values <- ml_model(values)
    # Get the labels of the data
    labels <- .sits_labels(samples)
    # Extract classified labels (majority probability)
    predicted_labels <- labels[C_label_max_prob(as.matrix(values))]
    # Call caret to provide assessment
    predicted <- factor(predicted_labels, levels = labels)
    reference <- factor(.pred_references(predictors), levels = labels)
    # Call caret package to the classification statistics
    acc_obj <- caret::confusionMatrix(predicted, reference)
    # Set result class and return it
    .set_class(x = acc_obj, "sits_accuracy", class(acc_obj))
}
#' @title Create partitions of a data set
#' @name  .create_folds
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#' @author Alexandre Ywata, \email{alexandre.ywata@@ipea.gov.br}
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description Split a sits tibble into k groups, based on the label.
#'
#' @keywords internal
#' @noRd
#' @param data   A sits tibble to be partitioned.
#' @param folds  Number of folds
#'
#' @return A list of row position integers corresponding to the training data.
#'
.create_folds <- function(data, folds = 5) {
    # verify if data exists
    # splits the data into k groups
    data$folds <- caret::createFolds(data$label,
        k = folds,
        returnTrain = FALSE, list = FALSE
    )
    return(data)
}
