## SimInf, a framework for stochastic disease spread simulations
## Copyright (C) 2015 - 2019  Stefan Engblom
## Copyright (C) 2015 - 2019  Stefan Widgren
##
## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program.  If not, see <https://www.gnu.org/licenses/>.

## C code: heading
C_heading <- function()
{
    c(sprintf("/* Generated by SimInf (v%s) %s */",
            packageVersion("SimInf"),
            format(Sys.time(), "%Y-%m-%d %H:%M")),
      "")
}

## C code: include directives
C_include <- function()
{
    c("#include <R_ext/Rdynload.h>",
      "#include \"SimInf.h\"",
      "")
}

## C code: transition rate functions.
C_trFun <- function(transitions)
{
    lines <- character(0)
    for (i in seq_len(length(transitions))) {
        lines <- c(lines,
                   sprintf("double trFun%i(", i),
                   "    const int *u,",
                   "    const double *v,",
                   "    const double *ldata,",
                   "    const double *gdata,",
                   "    double t)",
                   "{",
                   sprintf("    return %s;", transitions[[i]]$propensity),
                   "}",
                   "")
    }
    lines
}

## C code: post time step function.
C_ptsFun <- function(pts_fun)
{
    if (is.null(pts_fun))
        pts_fun <- "    return 0;"

    if (!is.character(pts_fun))
        stop("'pts_fun' must be a character vector.")

    f <- textConnection(pts_fun)
    lines <- readLines(f)
    close(f)

    c("int ptsFun(",
      "    double *v_new,",
      "    const int *u,",
      "    const double *v,",
      "    const double *ldata,",
      "    const double *gdata,",
      "    int node,",
      "    double t)",
      "{",
           lines,
      "}",
      "")
}

## C code: SimInf model run function.
C_run <- function(transitions)
{
    c("SEXP SimInf_model_run(SEXP model, SEXP threads, SEXP solver)",
      "{",
      sprintf("    TRFun tr_fun[] = {%s};",
              paste0("&trFun", seq_len(length(transitions)), collapse = ", ")),
      "    DL_FUNC SimInf_run = R_GetCCallable(\"SimInf\", \"SimInf_run\");",
      "    return SimInf_run(model, threads, solver, tr_fun, &ptsFun);",
      "}",
      "")
}

## C code: Generate C code for mparse.
C_code_mparse <- function(transitions, pts_fun)
{
    c(C_heading(),
      C_include(),
      C_trFun(transitions),
      C_ptsFun(pts_fun),
      C_run(transitions))
}

## Split the propensity in order to separate preprocessor and
## punctuator tokens from identifiers, for example:
##
## > tokens(" bR * R ")
## [1] "bR" "*"  "R"
tokens <- function(propensity)
{
    ## List of valid preprocessor operator or punctuator tokens.
    operators <- c("<<=", ">>=", "!=", "%=", "##", "&&", "&=", "*=",
                   "++", "+=", "--", "-=", "->", "/=", "<<", "<=", "==",
                   ">=", ">>", "^=", "|=", "||", "!", "~", "%", "&", "(",
                   ")", "*", "+", ",", "-", "/", ":", ";", "<", "=",
                   ">", "?", "[", "]", "^", "{", "|", "}", "#")

    ## Create a matrix (1 x 2) of the propensity, where the first
    ## column is the token and the second column indicates if the
    ## token is one of the operators (indicated with 'op').
    propensity <- cbind(token = propensity, type = "")

    ## Iterate over each operator and try to split each row in the
    ## propensity in smaller pieces.
    for (op in operators) {
        propensity <- lapply(seq_len(nrow(propensity)), function(i) {
            x <- propensity[i, , drop = FALSE]

            ## Is it a non-operator token that we could split?
            if (nchar(x[1, 2]) == 0) {
                m <- gregexpr(op, x[1, 1], fixed = TRUE)[[1]]
                if (m[1] != -1) {
                    ## The operator exists in the token. Split the
                    ## token in smaller pieces. The cut-points are
                    ## deterimined by the position and length of op
                    ## e.g. "A op B" -> "A", "op", "B".
                    x <- as.character(x[1, 1])
                    j <- 1
                    xx <- NULL
                    for (i in seq_len(length(m))) {
                        if (m[i] > j)
                            xx <- c(xx, substr(x, j, m[i] - 1))
                        j <- m[i] + attr(m, "match.length")[i]
                        xx <- c(xx, substr(x, m[i], j - 1))
                    }

                    ## Make sure last sub-string is copied.
                    if (j <= nchar(x))
                        xx <- c(xx, substr(x, j, nchar(x)))

                    ## Remove leading and trailing whitespace and drop
                    ## empty strings
                    xx <- gsub("(^\\s+)|(\\s+$)", "", xx)
                    xx <- xx[nchar(xx) > 0]

                    ## Create a 2-column matrix from all sub-strings
                    x <- cbind(token = xx, type = ifelse(xx == op, "op", ""))
                }
            }

            x
        })

        propensity <- do.call("rbind", propensity)
    }

    propensity[, 1]
}

## Rewrite propensity
##
## Rewrite the propensity by replacing all compartments by
## \code{u[compartments[j]]} where \code{j} is the numbering in
## compartments. On return, 'depends' contains all compartments upon
## which the propensity depends.
rewrite_propensity <- function(propensity, compartments, ldata_names,
                               gdata_names, v0_names)
{
    propensity <- tokens(propensity)
    G_rowname <- paste0(propensity, collapse = "")
    depends <- integer(length(compartments))

    ## Find compartments in propensity
    i <- match(propensity, compartments)
    propensity <- ifelse(is.na(i), propensity, sprintf("u[%i]", i-1L))
    i <- i[!is.na(i)]
    if (length(i))
        depends[i] <- 1

    ## Find ldata parameters in the propensity
    i <- match(propensity, ldata_names)
    propensity <- ifelse(is.na(i), propensity, sprintf("ldata[%i]", i-1L))

    ## Find gdata parameters in the propensity
    i <- match(propensity, gdata_names)
    propensity <- ifelse(is.na(i), propensity, sprintf("gdata[%i]", i-1L))

    ## Find v0 parameters in the propensity
    i <- match(propensity, v0_names)
    propensity <- ifelse(is.na(i), propensity, sprintf("v[%i]", i-1L))

    list(propensity = paste0(propensity, collapse = ""),
         depends    = depends,
         G_rowname  = G_rowname)
}

## Generate the 'from' or 'dest' labels in the G rownames.
G_label <- function(x)
{
    if (length(x) == 0)
        return("@")

    ## Prefix compartments if more than one unit, e.g., '2*S'.
    lbl <- ifelse(abs(x) > 1, paste0(abs(x), "*"), "")
    lbl <- paste0(lbl, names(x))

    ## Combine all compartments, e.g., 'S + I'
    paste0(lbl, collapse = " + ")
}

## Generate rownames from the parsed transitions
G_rownames <- function(transitions)
{
    as.character(do.call("rbind", lapply(transitions, "[[", "G_rowname")))
}

parse_compartments <- function(x, compartments)
{
    ## Remove spaces and the empty set
    x <- gsub(" ", "", gsub("@", "", x))

    ## Split into 'compartment1 + compartment2 + ..'
    x <- unlist(strsplit(x, "+", fixed = TRUE))

    ## Replace 'n*compartment' with n replicates of 'compartment'
    x <- unlist(sapply(x, function(xx) {
        m <- regexpr("^[[:digit:]]+[*]", xx)
        if (m != 1)
            return(xx)

        ## Determine number of replicates and remove 'n*'
        n <- regmatches(xx, m)
        xx <- sub(n, "", xx, fixed = TRUE)
        n <- as.integer(substr(n, 1, nchar(n) - 1))

        rep(xx, n)
    }))

    ## Assign each compartment into its number according to the
    ## ordering in compartments
    i <- match(x, compartments)
    if (any(is.na(i)))
        stop(sprintf("Unknown compartment: '%s'.", x[is.na(i)]))

    tabulate(i, length(compartments))
}

parse_transitions <- function(transitions, compartments, ldata_names,
                              gdata_names, v0_names)
{
    lapply(strsplit(transitions, "->", fixed = TRUE), function(x) {
        if (length(x) < 3)
            stop("Invalid transition: '", paste0(x, collapse = "->"), "'")

        ## Remove spaces
        propensity <- gsub(" ", "", x[c(-1, -length(x))])
        propensity <- paste0(propensity, collapse = "->")

        ## Determine the corresponding column in the state change
        ## vector S.
        from <- parse_compartments(x[1], compartments)
        dest <- parse_compartments(x[length(x)], compartments)
        S <- dest - from

        propensity <- rewrite_propensity(propensity, compartments,
                                         ldata_names, gdata_names,
                                         v0_names)

        ## Determine the G rowname
        names(from) <- compartments
        names(dest) <- compartments
        from <- G_label(from[which(from > 0)])
        dest <- G_label(dest[which(dest > 0)])
        G_rowname <- paste(from, "->", propensity$G_rowname, "->", dest)

        list(propensity = propensity$propensity,
             depends    = propensity$depends,
             S          = S,
             G_rowname  = G_rowname)
    })
}

##' Extract variable names from data
##'
##' @param x data to extract the variable names from.
##' @param is_vector_ok TRUE if x can be a numeric vector, else FALSE.
##' @noRd
variable_names <- function(x, is_vector_ok) {
    if (is.null(x))
        return(NULL)

    if (is.data.frame(x)) {
        lbl <- colnames(x)
    } else if (isTRUE(is_vector_ok)) {
        if (is.atomic(x) && is.numeric(x)) {
            lbl <- names(x)
        } else {
            stop(paste0("'",
                        as.character(substitute(x)),
                        "' must either be a 'data.frame' or a 'numeric' vector."))
        }
    } else if (is.matrix(x)) {
        lbl <- rownames(x)
    } else {
        stop(paste0("'",
                    as.character(substitute(x)),
                    "' must either be a 'data.frame' or a 'matrix'."))
    }

    if (any(duplicated(lbl)) || any(nchar(lbl) == 0)) {
        stop(paste0("'",
                    as.character(substitute(x)),
                    "' must have non-duplicated parameter names."))
    }

    lbl
}

##' Model parser to define new models to run in \code{SimInf}
##'
##' Describe your model in a logical way in R. \code{mparse} creates a
##' \code{\linkS4class{SimInf_model}} object with your model
##' definition that is ready to \code{\link{run}}.
##' @param transitions character vector containing transitions on the
##'     form \code{"X -> ... -> Y"}. The left (right) side is the
##'     initial (final) state and the propensity is written in between
##'     the \code{->}-signs. The special symbol \code{@} is reserved
##'     for the empty set. For example, \code{transitions =
##'     c("S -> k1*S*I -> I", "I -> k2*I -> R")} expresses a SIR
##'     model.
##' @param compartments contains the names of the involved
##'     compartments, for example, \code{compartments = c("S", "I",
##'     "R")}.
##' @param ldata optional data for the nodes. Can be specified either
##'     as a numeric matrix where column \code{ldata[, j]} contains
##'     the local data vector for the node \code{j} or as a
##'     \code{data.frame} with one row per node. If it's specified as
##'     a matrix, it must have row names to identify the parameters in
##'     the transitions. If it's specified as a data.frame, each
##'     column is one parameter. The local data vector is passed as an
##'     argument to the transition rate functions and the post time
##'     step function.
##' @param gdata optional data that are common to all nodes in the
##'     model. Can be specified either as a named numeric vector or as
##'     as a one-row data.frame. The names are used to identify the
##'     parameters in the transitions. The global data vector is
##'     passed as an argument to the transition rate functions and the
##'     post time step function.
##' @param u0 A \code{data.frame} (or an object that can be coerced to
##'     a \code{data.frame} with \code{as.data.frame}) with the
##'     initial state i.e. the number of individuals in each
##'     compartment in each node when the simulation starts..
##' @param v0 optional data with the initial continuous state in each
##'     node. Can be specified either as a \code{data.frame} with one
##'     row per node or as a numeric matrix where column \code{v0[,
##'     j]} contains the initial state vector for the node
##'     \code{j}. If \code{v0} is specified as a \code{data.frame},
##'     each column is one parameter. If \code{v0} is specified as a
##'     matrix, the row names identify the parameters. The 'v' vector
##'     is passed as an argument to the transition rate functions and
##'     the post time step function. The continuous state can be
##'     updated in the post time step function.
##' @template tspan-param
##' @param events A \code{data.frame} with the scheduled
##'     events. Default is \code{NULL} i.e. no scheduled events in the
##'     model.
##' @param E matrix to handle scheduled events, see
##'     \code{\linkS4class{SimInf_events}}. Default is \code{NULL}
##'     i.e. no scheduled events in the model.
##' @param N matrix to handle scheduled events, see
##'     \code{\linkS4class{SimInf_events}}. Default is \code{NULL}
##'     i.e. no scheduled events in the model.
##' @param pts_fun optional character vector with C code for the post
##'     time step function. The C code should contain only the body of
##'     the function i.e. the code between the opening and closing
##'     curly brackets.
##' @return a \code{\linkS4class{SimInf_model}} object
##' @export
##' @importFrom methods as
##' @importFrom utils packageVersion
##' @template mparse-example
mparse <- function(transitions = NULL, compartments = NULL, ldata = NULL,
                   gdata = NULL, u0 = NULL, v0 = NULL, tspan = NULL,
                   events = NULL, E = NULL, N = NULL, pts_fun = NULL)
{
    ## Check transitions
    if (!is.atomic(transitions) || !is.character(transitions) ||
        any(nchar(transitions) == 0))
        stop("'transitions' must be specified in a character vector.")

    ## Check u0 and compartments
    u0 <- check_u0(u0, compartments)

    ## Extract variable names from data.
    ldata_names <- variable_names(ldata, FALSE)
    gdata_names <- variable_names(gdata, TRUE)
    v0_names <- variable_names(v0, FALSE)

    if (any(duplicated(c(compartments, gdata_names, ldata_names, v0_names))))
        stop("'u0', 'gdata', 'ldata' and 'v0' have names in common.")

    ## Parse transitions
    transitions <- parse_transitions(transitions, compartments,
                                     ldata_names, gdata_names,
                                     v0_names)

    ## Create the state-change matrix S
    S <- do.call("cbind", lapply(transitions, "[[", "S"))
    S <- as(S, "dgCMatrix")
    colnames(S) <- as.character(seq_len(dim(S)[2]))
    rownames(S) <- compartments

    ## Create the dependency graph G
    depends <- do.call("rbind", lapply(transitions, "[[", "depends"))
    G <- as(((depends %*% abs(S)) > 0) * 1, "dgCMatrix")
    colnames(G) <- as.character(seq_len(dim(G)[2]))
    rownames(G) <- G_rownames(transitions)

    SimInf_model(G      = G,
                 S      = S,
                 E      = E,
                 N      = N,
                 tspan  = tspan,
                 events = events,
                 ldata  = ldata,
                 gdata  = gdata,
                 u0     = u0,
                 v0     = v0,
                 C_code = C_code_mparse(transitions, pts_fun))
}

##' Extract the C code from a \code{SimInf_model} object
##'
##' @param model The \code{SimInf_model} object to extract the C code
##'     from.
##' @param pkg Character vector. If the C could should be used in a
##'     package named \code{pkg}, the function modifies the C code to
##'     facilitate adding the code to the package. Default is to not
##'     use this argument and return the C code unmodified.
##' @return Character vector with C code for the model.
##' @export
##' @examples
##' ## Use the model parser to create a 'SimInf_model' object that
##' ## expresses an SIR model, where 'b' is the transmission rate and
##' ## 'g' is the recovery rate.
##' model <- mparse(transitions = c("S -> b*S*I/(S+I+R) -> I", "I -> g*I -> R"),
##'                 compartments = c("S", "I", "R"),
##'                 gdata = c(b = 0.16, g = 0.077),
##'                 u0 = data.frame(S = 99, I = 1, R = 0),
##'                 tspan = 1:10)
##'
##' ## View the C code.
##' C_code(model)
##'
##' ## Modify the C code for a package named "XYZ"
##' C_code(model, "XYZ")
C_code <- function(model, pkg = NULL)
{
    check_model_argument(model)

    if (is.null(pkg))
        return(model@C_code)

    pkg <- as.character(pkg)
    stopifnot(identical(length(pkg), 1L), nchar(pkg[1]) > 0)

    lines <- model@C_code

    lines <- c(
        lines[1:2],
        "#include <R_ext/Visibility.h>",
        lines[-(1:2)],
        "static const R_CallMethodDef callMethods[] =",
        "{",
        "    {\"SimInf_model_run\", (DL_FUNC)&SimInf_model_run, 3},",
        "    {NULL, NULL, 0}",
        "};",
        "",
        paste0("void attribute_visible R_init_", pkg, "(DllInfo *info)"),
        "{",
        "    R_registerRoutines(info, NULL, callMethods, NULL, NULL);",
        "    R_useDynamicSymbols(info, FALSE);",
        "    R_forceSymbols(info, TRUE);",
        "}",
        "")

    lines
}
