#' McCullagh's palindromic symmetry model
#'
#' McCullagh, P. (1978). A class of parametric models for the analysis of
#' square contingency tables with ordered categories. Biometrika, 65(2). 413-416.
#' @param n matrix of observed counts
#' @param max_iter maximum number of iterations to maximize the log(likelihood)
#' @param verbose should cycle-by-cycle info be printed out? Default is FALSE, don't print.
#' @returns a list containing
#'    delta: the value of the asymmetry parameter delta
#'    sigma_delta: SE(delta)
#'    logL: value of log(likelihood) for final estimates
#'    chisq: Pearson chi-square for solution
#'    df: degrees of freedom for solution chisq
#'    psi: matrix of symmetry parameters
#'    alpha:
#'    c: constraint, sum of pi - values
#'    condition: constraint on psi to make model identified, Lagrange multiplier
#'    SE: vector of standard errors for all parameters
#' @export
#' @examples
#' McCullagh_palindromic_symmetry(vision_data)
McCullagh_palindromic_symmetry <- function(n, max_iter=15, verbose=FALSE) {
  M <- nrow(n)

  c_value <- 1.0
  alpha <- rep(1.0, M)
  delta <- McCullagh_initialize_delta(n)
  psi <- McCullagh_initialize_psi(n, delta, alpha, c_value)

  n_psi <- M * (M + 1) / 2
  n_delta <- length(delta)

  pi_orig <- McCullagh_compute_pi(psi, delta, alpha, c_value)
  if (verbose) {
    message(paste("initial log L =", McCullagh_log_L(n, psi, delta, alpha, c_value)))
    message(paste("initial chisq =", pearson_chisq(n, pi_orig)))
  }

  for (iter in 1:max_iter) {
    gradient <- McCullagh_gradient_log_l(n, psi, delta, alpha, c_value)
    hessian <- McCullagh_hessian_log_l(n, psi, delta, alpha, c_value)
    result <- McCullagh_newton_raphson_update(n, gradient, hessian, psi, delta, alpha, c_value)

    psi <- result$psi
    delta <- result$delta
    alpha <- result$alpha
    c_value <- result$c

    if (verbose) {
      message(paste(iter, "log L =", McCullagh_log_L(n, psi, delta, alpha, c_value)))
      pi_new <- McCullagh_compute_pi(psi, delta, alpha, c_value)
      message(paste("chisq =", pearson_chisq(n, pi_new)))
      message(paste("constraint on psi =", McCullagh_compute_condition(psi)))
      message(paste("c =", c_value))
    }
  }
  if (verbose) {
    message(paste("final log L =", McCullagh_log_L(n, psi, delta, alpha, c_value)))
  }
  pi_new <- McCullagh_compute_pi(psi, delta, alpha, c_value)
  if (verbose) {
    message(paste("chisq =", pearson_chisq(n, pi_new), "df =", McCullagh_compute_df(M)))
  }
  std_errors <- -solve(hessian)
  se_names <- McCullagh_generate_names(psi, delta, alpha, c)
  colnames(std_errors) <- se_names
  rownames(std_errors) <- se_names

  if (verbose) {
    se_delta <- sqrt(diag(std_errors)[(n_psi + 1): (n_psi + n_delta)])
    message(paste("delta =", delta, "std_err =", se_delta))
    message("alpha:")
    message(alpha)
    message("psi:")
    message(psi)
    pi <- McCullagh_compute_pi(psi, delta, alpha, c_value)
    message(paste("c =", c_value))
    message(paste("constraint on psi =", McCullagh_compute_condition(psi)))
    message(paste("sum of pi =", sum(pi)))
  }

  se_delta <- sqrt(diag(std_errors)[(n_psi + 1): (n_psi + n_delta)])
  logL <- McCullagh_log_L(n, psi, delta, alpha, c_value)
  pi <- McCullagh_compute_pi(psi, delta, alpha, c_value)
  pearson <- pearson_chisq(n, pi)
  condition <- McCullagh_compute_condition(psi)

  df <- McCullagh_compute_df(M)
  list(delta=delta, sigma_delta=se_delta, logL=logL, chisq=pearson, df=df,
       psi=psi, alpha=alpha[1:(length(alpha)-1)], c=c_value, condition=condition, SE=std_errors)
}


#' Generalized version of palindromic symmetry model
#'
#' delta now is a vector, varying by index
#' McCullagh, P. (1978). A class of parametric models for the analysis of
#' square contingency tables with ordered categories. Biometrika, 65(2). 413-416.
#' @param n matrix of observed counts
#' @param max_iter maximum number of iterations to maximize log(likelihood)
#' @param verbose should cycle-by-cycle information be printed out? Default is FALSE,
#' do not print
#' @param start_values logical should the regular palindomic symmetry model be fit first to
#' get good starting values.  Default is FALSE.
#' @returns a list containing
#' @returns a list containing
#'    delta: the vector of asymmetry parameter delta
#'    sigma_delta: vector of SE(delta)
#'    logL: value of log(likelihood) for final estimates
#'    chisq: Pearson chi-square for solution
#'    df: degrees of freedom for solution chisq
#'    psi: matrix of symmetry parameters
#'    alpha:
#'    c: constraint, sum of pi - values
#'    condition: constraint on psi to make model identified, Lagrange multiplier
#'    SE: vector of standard errors for all parameters
#' @export
#' @examples
#' McCullagh_generalized_palindromic_symmetry(vision_data)
McCullagh_generalized_palindromic_symmetry <- function(n, max_iter=15, verbose=FALSE, start_values=FALSE) {
  if (verbose) {
    message("enter generalized p-symmetry main function")
  }

  M <- nrow(n)

  if (start_values) {
    if (verbose) {
      message("Generate starting values:")
    }
    start_values <- McCullagh_palindromic_symmetry(n)
    delta <- rep(start_values$delta, M - 1)
    if (verbose) {
      message(paste("delta ", delta))
    }
    psi = start_values$psi
    c_value = start_values$c
    alpha = start_values$alpha
  } else {
    c_value <- 1.0
    alpha <- rep(1.0, M)
    delta <- McCullagh_initialize_delta_vec(n)
    psi <- McCullagh_initialize_psi(n, delta, alpha, c_value)
  }

  n_psi <- M * (M + 1) / 2
  n_delta <- length(delta)

  if (verbose) {
    message("Begin iterations:")
    message(paste("initial log L =", McCullagh_log_L(n, psi, delta, alpha, c_value)))
  }
  pi_orig <- McCullagh_compute_generalized_pi(psi, delta, alpha, c_value)
  if (verbose) {
    message(paste("initial chisq =", pearson_chisq(n, pi_orig)))
  }

  for (iter in 1:max_iter) {
    gradient <- McCullagh_gradient_log_l(n, psi, delta, alpha, c_value)
    hessian <- McCullagh_hessian_log_l(n, psi, delta, alpha, c_value)
    result <- McCullagh_newton_raphson_update(n, gradient, hessian, psi, delta, alpha, c_value)
    psi <- result$psi
    delta <- result$delta
    alpha <- result$alpha
    c_value <- result$c

    if (verbose) {
      message(paste(iter, "log L =", McCullagh_log_L(n, psi, delta, alpha, c_value)))
      pi_new <- McCullagh_compute_generalized_pi(psi, delta, alpha, c_value)
      message(paste("chisq =", pearson_chisq(n, pi_new)))
      message(paste("constraint on psi =", McCullagh_compute_condition(psi)))
      message(paste("c =", c_value))
    }
  }
  if (verbose) {
    message(paste("final log L =", McCullagh_log_L(n, psi, delta, alpha, c_value)))
  }

  std_errors <- -solve(hessian)
  se_names <- McCullagh_generate_names(psi, delta, alpha, c_value)
  rownames(std_errors) <- se_names
  colnames(std_errors) <- se_names

  if (verbose) {
    se_delta <- sqrt(diag(std_errors)[(n_psi + 1): (n_psi + n_delta)])
    message(paste("delta =", delta, "std_err =", se_delta))
    message("alpha:")
    message(alpha[1:(length(alpha) - 1)])
    message("psi:")
    message(psi)
    pi <- McCullagh_compute_pi(psi, delta, alpha, c_value)
    message(paste("c =", c_value))
    message(paste("constraint on psi =", McCullagh_compute_condition(psi)))
    message(paste("sum of pi =", sum(pi)))
  }

  se_delta <- sqrt(diag(std_errors)[(n_psi + 1): (n_psi + n_delta)])
  logL <- McCullagh_log_L(n, psi, delta, alpha, c_value)
  pi <- McCullagh_compute_pi(psi, delta, alpha, c_value)
  pearson <- pearson_chisq(n, pi)
  df <- McCullagh_compute_df(M, generalized = TRUE)
  condition <- McCullagh_compute_condition(psi)

  list(delta=delta, sigma_delta=se_delta, logL=logL, chisq=pearson, df=df,
       psi=psi, alpha=alpha[1:(length(alpha) - 1)], c=c_value, constraint=condition, SE=std_errors)
}


#' Computes the degrees of freedom for the model
#'
#' @param M the size of the M X M observed matrix
#' @param generalized is the generalized model being fit? Default is FALSE, regular model
McCullagh_compute_df <- function(M, generalized=FALSE) {
  if (! generalized) {
    (M - 1) * (M - 2) / 2
  } else {
    (M - 1) * (M - 2) / 2 - (M - 2)
  }
}


#' Compute the observed sums Nij
#'
#' @param n the matrix of observed counts
#' @returns a list containing Pij and Qij
McCullagh_compute_Nij <- function(n) {
  M <- nrow(n)
  n_pij <- matrix(0.0, nrow=M, ncol=M)
  n_qij = matrix(0.0, nrow=M, ncol=M)

  for (i in 1:(M - 1)) {
    for (j in (i + 1): M) {
      for (h in 1:i) {
        for (k in j: M) {
          n_pij[i, j] <- n_pij[i, j] + n[h, k]
        }
      }
    }
  }

  for (i in 1:(M - 1)) {
    for (j in (i + 1): M) {
      for (h in 1:i) {
        for (k in j: M) {
          n_qij[i, j] <- n_qij[i, j] + n[k, h]
        }
      }
    }
  }

  list("n_pij"=n_pij, "n_qij"=n_qij)
}


#' Compute initial values for scalar delta
#'
#' @param n matrix of observed counts
#' @returns value of delta
McCullagh_initialize_delta <- function(n) {
  M = nrow(n)
  n_pqij <- McCullagh_compute_Nij(n)
  n_pij <- n_pqij$n_pij
  n_qij <- n_pqij$n_qij

  delta <- 0.0
  for (i in 1:(M - 1)) {
    delta <- delta + n_pij[i, i + 1] / n_qij[i, i + 1]
  }
  delta <- log(delta / (M - 1))
}


#' Initialize vector delta
#'
#' @param n matrix of observed counts
#' @returns vector of delta values
McCullagh_initialize_delta_vec <- function(n) {
  M = nrow(n)
  n_pqij <- McCullagh_compute_Nij(n)
  n_pij <- n_pqij$n_pij
  n_qij <- n_pqij$n_qij

  delta_vec <- rep(0.0, M - 1)
  for (i in 1:(M - 1)) {
    delta_vec[i] <- delta_vec[i] + n_pij[i, i + 1] / n_qij[i, i + 1]
  }
  log(delta_vec)
}


#' Initialize the symmetry matrix psi
#'
#' @param n matrix of observed counts
#' @param delta scalar delta value
#' @param alpha vector of asymmetry parameters
#' @param c normalizing value of pi. Default is 1.0
#' @returns matrix psi
McCullagh_initialize_psi <- function(n, delta, alpha, c=1.0) {
  M <- nrow(n)
  n_pqij <- McCullagh_compute_Nij(n)
  n_pij <- n_pqij$n_pij
  n_qij <- n_pqij$n_qij

  psi <- matrix(0.0, nrow=M, ncol=M)
  for (i in 1:M) {
    psi[i, i] <- 1.0
  }

  N <- sum(n)
  for (i in 1:(M - 1)) {
    for (j in (i + 1):M) {
      psi[i, j] <- (n_pij[i, j] + n_qij[i, j]) / (2.0 * N)
      psi[j, i] <- psi[i, j]
    }
  }

  sum_psi <- 0.0
  for (i in 1:(M - 1)) {
    sum_psi <- sum_psi + psi[i, i + 1] + psi[i + 1, i]
  }

  for (i in 1:(M - 2)) {
    sum_psi <- sum_psi - psi[i, i + 2] - psi[i + 2, i]
  }

  diag <- 1.0 - sum_psi

  sum_psi = 0.0
  for (i in 1:M) {
    sum_psi <- sum_psi + n[i,i] / N
  }

  for (i in 1:M) {
    psi[i, i] <- diag * (n[i,i] / N) / sum_psi
  }

  pi <- McCullagh_compute_pi(psi, delta, alpha, 1.0)
  const <- sum(pi)
  psi / const
}


#' Compute model-based cumulative probabilities
#'
#' @param i row index
#' @param j column index
#' @param psi the symmetry matrix
#' @param delta the asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for pi.  Default is 1.0
#' @returns the model-based cumulative probability pi_ij
McCullagh_pij_qij <- function(i, j, psi, delta, alpha, c=1.0) {
  if (j == i) {
    return(psi[i, j])
  } else if (i < j) {
    if(length(delta) == 1) {
      return(c * exp(0.5 * delta) * psi[i, j] * alpha[i] / alpha[j - 1])
    } else {
      return(c * exp(0.5 * delta[i]) * psi[i, j] * alpha[i] / alpha[j - 1])
    }
  } else {
    if (length(delta) == 1) {
      return(c * exp(-0.5 * delta) * psi[j, i] * alpha[i - 1] / alpha[j])
    } else {
      return(c * exp(-0.5 * delta[j]) * psi[j, i] * alpha[i - 1] / alpha[j])
    }
  }
}


#' Computes culuative model probabilities for the generalized model using vector delta.
#'
#' @param i row index
#' @param j column index
#' @param psi symmetry matrix
#' @param delta_vec vector of delta values
#' @param alpha vector of asymmetry values
#' @param c1 normalizing value for pi. Defaults to 1.0
#' @returns model-based cumulative probability pi_ij
McCullagh_generalized_pij_qij <- function(i, j, psi, delta_vec, alpha, c1=1.0) {
  if (length(delta_vec) == 1) {
    return(McCullagh_pij_qij(i, j, psi, delta_vec, alpha, c1))
  }
  if (j == i) {
    return(psi[i, j])
  } else if (i < j) {
    return(c1 * exp(0.5 * delta_vec[i]) * psi[i, j] * alpha[i] / alpha[j - 1])
  } else {
    return(c1 * exp(-0.5 * delta_vec[j]) * psi[j, i] * alpha[i - 1] / alpha[j])
  }
}


#' Computes the model-based cumulative probability matrices pij and  qij
#'
#' @param psi the matrix of symmetry parameters
#' @param delta the scalar asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for the pis to sum to 1.0  Default value is 1.0
#' @returns list containing matrices pij and qij
McCullagh_compute_cumulatives <- function(psi, delta, alpha, c=1.0) {
  M <- nrow(psi)
  pij <- matrix(0.0, nrow=M, ncol=M)
  qij <- matrix(0.0, nrow=M, ncol=M)

  for (i in 1:(M - 1)) {
    for (j in (i + 1): M) {
      pij[i, j] <- McCullagh_pij_qij(i, j, psi, delta, alpha, c)
      qij[i, j] <- McCullagh_pij_qij(j, i, psi, delta, alpha, c)
    }
  }

  list(pij=pij, qij=qij)
}


#' Coompute the model-based cumulative probabilities pij and qij.
#'
#' @param psi symmetry matrix
#' @param delta_vec vector of asymmetry parameters
#' @param alpha vector of asymmetry parameters
#' @param c normalizing constant so pis sum to 1. Defaults to 1.0
#' @returns matrices of model-based cumulative probabilities pij and qij
McCullagh_compute_generalized_cumulatives <- function(psi, delta_vec, alpha, c=1.0) {
  M <- nrow(psi)
  pij <- matrix(0.0, nrow=M, ncol=M)
  qij <- matrix(0.0, nrow=M, ncol=M)

  for (i in 1:(M - 1)) {
    for (j in (i + 1): M) {
      pij[i, j] <- McCullagh_generalized_pij_qij(i, j, psi, delta_vec, alpha, c)
      qij[i, j] <- McCullagh_generalized_pij_qij(j, i, psi, delta_vec, alpha, c)
    }
  }

  list(pij=pij, qij=qij)
}


#' Compute the regular (non-cumulative) model-based pi values
#'
#' @param psi the matrix of symmetry parameters
#' @param delta the scalar asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for the pis to sum to 1.0  Default value is 1.0
#' @returns the matrix pi
McCullagh_compute_pi <- function(psi, delta, alpha, c) {
  pqij <- McCullagh_compute_cumulatives(psi, delta, alpha, c)
  pij <- pqij$pij
  qij <- pqij$qij

  M <- nrow(psi)
  bound <- M - 1
  pi <- matrix(0.0, nrow=M, ncol=M)

  for (i in 1:M) {
    for (j in 1:M) {
      if (j == i) {
        pi[i, i] <- psi[i, i]
      } else if (i < j) {
        if (i >= 2 && j <= bound) {
          pi[i, j] <- (pij[i, j] - pij[i - 1, j] - pij[i, j + 1] + pij[i - 1, j + 1])
        } else if (j <= bound) {
          pi[i, j] <- pij[i, j] - pij[i, j + 1]
        } else if (i >= 2) {
          pi[i, j] <- pij[i, j] - pij[i - 1, j]
        } else {
         pi[i, j] <- pij[i, j]
        }
      } else {
        if (j >= 2 && i <= bound) {
          pi[i, j] <- qij[j, i] - qij[j - 1, i] - qij[j, i + 1] + qij[j - 1, i + 1]
        } else if (i <= bound) {
          pi[i, j] <- qij[j, i] - qij[j, i + 1]
        } else if (j >= 2) {
          pi[i, j] <- qij[j, i] - qij[j - 1, i]
        } else {
          # j = 0, i = 3
          pi[i, j] <- qij[j, i]
        }
      }
    }
  }
  pi
}


#' Cpompute matrix pi under generalized model.
#'
#' @param psi the matrix of symmetry parameters
#' @param delta_vec the vector asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for the pis to sum to 1.0  Default value is 1.0
#' @returns the matrix pi
McCullagh_compute_generalized_pi <- function(psi, delta_vec, alpha, c=1) {
  pqij <- McCullagh_compute_generalized_cumulatives(psi, delta_vec, alpha, c)
  pij <- pqij$pij
  qij <- pqij$qij

  M <- nrow(psi)
  bound <- M - 1
  pi <- matrix(0.0, nrow=M, ncol=M)

  for (i in 1:M) {
    for (j in 1:M) {
      if (j == i) {
        pi[i, i] <- psi[i, i]
      } else if (i < j) {
        if (i >= 2 && j <= bound) {
          pi[i, j] <- (pij[i, j] - pij[i - 1, j] - pij[i, j + 1] + pij[i - 1, j + 1])
        } else if (j <= bound) {
          pi[i, j] <- pij[i, j] - pij[i, j + 1]
        } else if (i >= 2) {
          pi[i, j] <- pij[i, j] - pij[i - 1, j]
        } else {
          pi[i, j] <- pij[i, j]
        }
      } else {
        if (j >= 2 && i <= bound) {
          pi[i, j] <- qij[j, i] - qij[j - 1, i] - qij[j, i + 1] + qij[j - 1, i + 1]
        } else if (i <= bound) {
          pi[i, j] <- qij[j, i] - qij[j, i + 1]
        } else if (j >= 2) {
          pi[i, j] <- qij[j, i] - qij[j - 1, i]
        } else {
          # j = 0, i = 3
          pi[i, j] <- qij[j, i]
        }
      }
    }
  }
  pi
}


#' Extracts the weights to convert cumulative model-based probabilities to regular probabilities.
#'
#' @param i row index sought
#' @param j column index sought
#' @param M the number of rows/columns in observed matrix
#' @returns a list containing
#'    w_psi for when i == j
#'    w_pij for when i < j
#'    w_qij for when j < i
#'    weight populated with correct entry based on actual i and j
McCullagh_extract_weights <- function(i, j, M) {
  bound <- M - 1

  weight_psi <- matrix(0.0, nrow=M, ncol=M)
  weight_pij <- matrix(0.0, nrow=M, ncol=M)
  weight_qij <- matrix(0.0, nrow=M, ncol=M)

  if (j == i) {
    # pi[i][i] = psi[i][i]
    weight_psi[i, i] <- 1.0
    weight <- weight_psi
  } else if (i < j) {
    if (i >= 2 && j <= bound) {
      weight_pij[i, j] = 1.0
      weight_pij[i - 1, j] = -1.0
      weight_pij[i, j + 1] = -1.0
      weight_pij[i - 1, j + 1] = 1.0
      # pi[i][j] = pij[i][j] - pij[i - i][j] - pij[i][j + 1] + pij[i - 1][j + 1]
    } else if (j <= bound) {
      weight_pij[i, j] = 1.0
      weight_pij[i, j + 1] = -1.0
      # pi[i][j] = pij[i][j] - pij[i][j + 1]
    } else if (i >= 2) {
      weight_pij[i, j] = 1.0
      weight_pij[i - 1, j] = -1.0
      # pi[i][j] = pij[i][j] - pij[i - 1][j]
    } else {
      weight_pij[i, j] = 1.0
      # pi[i][j] = pij[i][j]
    }
    weight <- weight_pij
  } else {
    if (j >= 2 && i <= bound) {
      weight_qij[j, i] = 1.0
      weight_qij[j - 1, i] = -1.0
      weight_qij[j, i + 1] = -1.0
      weight_qij[j - 1, i + 1] = 1.0
      # pi[i][j] = qij[j][i] - qij[j - 1][i] - qij[j][i + 1] + qij[j - 1][i + 1]
    } else if (i <= bound) {
      weight_qij[j, i] = 1.0
      weight_qij[j, i + 1] = -1.0
      # pi[i][j] = qij[j][i] - qij[j][i + 1]
    } else if (j >= 2) {
      weight_qij[j, i] = 1.0
      weight_qij[j - 1, i] = -1.0
      # pi[i][j] = qij[j][i] - qij[j - 1][i]
    } else {
      # j = 1, i = 4
      weight_qij[j, i] = 1.0
      # pi[i][j] = qij[j][i]
    }
    weight <- weight_qij
  }
  list("w_psi"=weight_psi, "w_pij"=weight_pij, "w_qij"=weight_qij, "weight"=weight)
}


#' Derivative of pij[a, b] wrt psi[h, k]
#'
#' @param a row index of pi
#' @param b column index of pi
#' @param h row index of phi
#' @param k column index of phi
#' @param delta scalar or vector version of asymmetry parameters
#' @param alpha vector of asymmetry parameters
#' @param c normalizing constant for to make pi sum to 1. Defaults to 1.0
#' @returns derivative
McCullagh_derivative_pij_wrt_psi <- function(a, b, h, k, delta, alpha, c=1.0) {
  #   :return: d Pij[a, b]
  #           -------------
  #            d psi[h, k]
  #   """
  match = a == h && b == k
  symmetric = a == k && b == h
  if (!(match || symmetric)) {
    return(0.0)
  } else if (a == b) {
    return(1.0)
  } else {
    if (a < b) {
      # Pij
      if (length(delta) == 1) {
        return(c * exp(0.5 * delta) * alpha[a] / alpha[b - 1])
      } else {
        return(c*exp(0.5 * delta[a]) * alpha[a] / alpha[b -1])
      }
    } else if (b < a) {
      # Qij
      if (length(delta) == 1) {
        return(c * exp(-0.5 * delta) * alpha[a - 1] / alpha[b])
      } else {
        return(c * exp(-0.5 * delta[b]) * alpha[a - 1] / alpha[b])
      }
    } else {
      return(0.0)
    }
  }
}


#' Derivative of pij[i, j] wrt scalar delta.
#'
#' @param i row index of pij
#' @param j column index of pij
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing constant so that pi sum to 1.0. Default value is 1.0
#' @returns derivative
McCullagh_derivative_pij_wrt_delta <- function(i, j, psi, delta, alpha, c=1.0) {
  #   :return:  d Pij[i,j]
  #            -----------
  #              d delta
  #   """
  if (i == j) {
    # psi
    return(0.0)
  } else if (i < j) {
    # Pij
    return(c * exp(0.5 * delta) * psi[i, j] * alpha[i] / (2.0 * alpha[j - 1]))
  } else {
    # Qij
    return(-c * exp(-0.5 * delta) * psi[j, i] * alpha[i - 1] / (2.0 * alpha[j]))
  }
}


#' Derivative of pij[i, j] wrt alpha[index]
#'
#' @param i row index of pij
#' @param j column index of pij
#' @param index index of alpha
#' @param psi matrix of symmetry parameters
#' @param delta scalar or vector of asymmetry parameters
#' @param alpha vector of asymmetry parameters
#' @param c normalizing constant to make pi sum to 1.0. Default ot 1.0
#' @returns derivative
McCullagh_derivative_pij_wrt_alpha <- function(i, j, index, psi, delta, alpha, c=1.0) {
  #   :return:    d Pij
  #            ---------------
  #            d alpha[index]
  #   """
  match <- index == i || index == j
  adjacent <- index == i - 1 || index == j - 1
  if (i == j || !(match || adjacent)) {
    return(0.0)
  } else if (i < j) {
    # Pij
    # j > 1 because alpha[0] always = 1.0, so the ratio alpha[0] / alpha[1] can differ from 1
    if (i == j - 1 && j > 1) {
     return(0.0)
    }

    if (index == i) {
      if (length(delta) == 1) {
        return(c * exp(0.5 * delta) * psi[i, j] / alpha[j - 1])
      } else {
        return(c * exp(0.5 * delta[i]) * psi[i, j] / alpha[j - 1])
      }
    } else if (index == j - 1) {
      if (length(delta) == 1) {
        return(-c * exp(0.5 * delta) * psi[i, j] * alpha[i] / (alpha[index]^2))
      } else {
        return(-c * exp(0.5* delta[i]) * psi[i, j] * alpha[i] / alpha[index]^2)
      }
    }
    return(0.0)
  } else {
    # Qij
    # i > 1 because alpha[0] always = 1.0, so the ratio alpha[1] / alpha[0] can differ from 1
    if (j == i - 1 && i > 1) {
      return(0.0)
    }
    if (index == j) {
      if (length(delta) == 1) {
        return(-c * exp(-0.5 * delta) * psi[j, i] * alpha[i - 1] / (alpha[index]^2))
      } else {
        return(-c * exp(-0.5 * delta[j]) * psi[j, i] * alpha[i - 1] / (alpha[index]^2))
      }
    } else if (index == i - 1) {
      if (length(delta) == 1) {
        return(c * exp(-0.5 * delta) * psi[j, i] / alpha[j])
      } else {
        return(c * exp(-0.5 * delta[j]) * psi[j, i] / alpha[j])
      }
    }
  }
  0.0
}


#' Derivative pij[i, j] wrt c.
#'
#' @param i row index of pij
#' @param j column index of pij
#' @param psi matrix of symmetry parameters
#' @param delta scalar or vector of asymmetry parameters
#' @param alpha vector of asymmetry parameters
#' @param c normalizing constant to make pi sum to 1.0
#' @returns derivative
McCullagh_derivative_pij_wrt_c <- function(i, j, psi, delta, alpha, c) {
  M <- nrow(psi)

  derivative <- 0.0
  if (i == j) {
    deriv <- 0.0
  } else if (i < j) {
    if (length(delta) == 1) {
      deriv <- exp(0.5 * delta) * psi[i, j] * alpha[i] / alpha[j - 1]
    } else {
      deriv <- exp(0.5 * delta[i]) * psi[i, j] * alpha[i] / alpha[j - 1]
    }
  } else if (i > j) {
    if (length(delta) == 1) {
      deriv <- exp(-0.5 * delta) * psi[i, j] * alpha[i - 1] / alpha[j]
    } else {
      deriv <- exp(-0.5 * delta[j]) * psi[i, j] * alpha[i -1] / alpha[j]
    }
  }
  derivative <- derivative + deriv
}


#' Derivative pij[i,j] wrt vector delta[k].
#'
#' @param i row index of pij
#' @param j column index of pij
#' @param k index of delta
#' @param psi the matrix of symmetry parameters
#' @param delta_vec the vector asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for the pis to sum to 1.0  Default value is 1.0
#' @returns list containing matrices pij and qij
McCullagh_derivative_pij_wrt_delta_vec <- function(i, j, k, psi, delta_vec, alpha, c=1.0) {
  if (length(delta_vec) == 1) {
    return(McCullagh_derivative_pij_wrt_delta(i, j, psi, delta_vec, alpha, c))
  }

  if (i == j) {
    return(0.0)
  } else if (i == k) {
    if (i < j) {
      # Pij
      return(c * exp(0.5 * delta_vec[k]) * psi[i, j] * alpha[i] / (2.0 * alpha[j - 1]))
    } else {
      return(0.0)
    }
  } else if (j == k) {
    if (j < i) {
      # Qij
      return(-c * exp(-0.5 * delta_vec[k]) * psi[j, i] * alpha[i - 1] / (2.0 * alpha[j]))
    } else {
      return(0.0)
    }
  }
  0.0
}


#' Derivative of pi[i, j] wrt psi[i1, j1].
#'
#' @param i row index of pi
#' @param j column index of pi
#' @param i1 row index of psi
#' @param j1 column index of psi
#' @param psi the matrix of symmetry parameters
#' @param delta the scalar asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for the pis to sum to 1.0  Default value is 1.0
#' @returns derivative
McCullagh_derivative_pi_wrt_psi <- function(i, j, i1, j1, psi, delta, alpha, c=1.0) {
  #   :return:    d pi[i][j]
  #            ---------------
  #             d psi[i1][j1]
  #   """
  M <- nrow(psi)
  weights = McCullagh_extract_weights(i, j, M)
  if  (i == j) {
    w <- weights$w_psi
  } else if (i < j) {
    w <- weights$w_pij
  } else {
    w <- weights$w_qij
  }
  derivative = 0.0

  # a and b index Pij/Qij
  for (a in 1:M) {
    for (b in 1:M) {
      if (i == j) {
        # psi
        derivative <- derivative + w[a, b] * McCullagh_derivative_pij_wrt_psi(a, b, i1, j1, delta, alpha, c)
      }
      if (i < j) {
        # Pij
        derivative <- derivative + w[a, b] * McCullagh_derivative_pij_wrt_psi(a, b, i1, j1, delta, alpha, c)
      }
      if (i > j) {
        # Qij
        derivative <- derivative + w[b, a] * McCullagh_derivative_pij_wrt_psi(a, b, i1, j1, delta, alpha, c)
      }
    }
  }
  derivative
}


#' Derivative of pi[i, j] wrt delta.
#'
#' @param i row index of pi
#' @param j column index of pi
#' @param psi the matrix of symmetry parameters
#' @param delta the scalar asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for the pis to sum to 1.0  Default value is 1.0
#' @returns derivative
McCullagh_derivative_pi_wrt_delta <- function(i, j, psi, delta, alpha, c=1.0) {t
  #   :return:    d pi[i1][j1]
  #              -------------
  #                d delta
  #   """
  M = nrow(psi)
  weights <- McCullagh_extract_weights(i, j, M)
  if (i == j) {
    w <- weights$w_psi
  } else if (i < j) {
    w <- weights$w_pij
  } else if (i > j) {
    w <- weights$w_qij
  }

  derivative <- 0.0
  # a and b index Pij and Qij
  for (a in 1:M) {
    for (b in 1:M) {
      if (i == j) {
        derivative <- derivative + w[a, b] * McCullagh_derivative_pij_wrt_delta(a, b, psi, delta, alpha, c)
       }
      if (i < j) {
        # Pij
        derivative <- derivative +  w[a, b] * McCullagh_derivative_pij_wrt_delta(a, b, psi, delta, alpha, c)
      }
      if (i > j) {
        # Qij
        derivative <- derivative + w[b, a] * McCullagh_derivative_pij_wrt_delta(a, b, psi, delta, alpha, c)
      }
    }
  }
  derivative
}


#' Derivative of pi[i, j] wrt alpha[index].
#'
#' @param i row index of pi
#' @param j column index of pi
#' @param index index of alpha
#' @param psi the matrix of symmetry parameters
#' @param delta the scalar asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for the pis to sum to 1.0  Default value is 1.0
#' @returns derivative
McCullagh_derivative_pi_wrt_alpha <- function(i, j, index, psi, delta, alpha, c=1.0) {
  #   :return:  d pi[i][j]
  #           --------------
  #           d alpha[index]
  #   """
  if (i == j) {
    return(0.0)
  }
  M = nrow(psi)
  weights <- McCullagh_extract_weights(i, j, M)
  if (i == j) {
    w = weights$w_psi
  } else if (i < j) {
    w = weights$w_pij
  } else {
    w = weights$w_qij
  }

  derivative = 0.0
  # a and b index Pij and Qij
  for (a in 1:M) {
    for (b in 1:M) {
      if (i == j) {
        # psi
        derivative <- derivative + w[a, b] * McCullagh_derivative_pij_wrt_alpha(a, b, index, psi, delta, alpha, c)
      } else if (i < j) {
        # Pij
        derivative <- derivative + w[a, b] * McCullagh_derivative_pij_wrt_alpha(a, b, index, psi, delta, alpha, c)
      } else if (i > j) {
        # Qij
        derivative <- derivative +  w[b, a] * McCullagh_derivative_pij_wrt_alpha(a, b, index, psi, delta, alpha, c)
      }
    }
  }

  derivative
}


#' Derivative pi[i, j] wrt c.
#'
#' @param i row index of pi
#' @param j column index of pi
#' @param psi the matrix of symmetry parameters
#' @param delta the scalar or vector asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for the pis to sum to 1.0
#' @returns derivative
McCullagh_derivative_pi_wrt_c <- function(i, j, psi, delta, alpha, c) {
  if (i == j) {
    return(0.0)
  }

  M <- nrow(psi)
  weights <- McCullagh_extract_weights(i, j, M)
  w_psi <- weights$w_psi
  w_pij <- weights$w_pij
  w_qij <- weights$w_qij

  derivative <- 0.0
  for (a in 1:M) {
    for (b in 1:M) {
      if (length(delta) == 1) {
        pq_ab <- McCullagh_pij_qij(a, b, psi, delta, alpha, c)
      } else {
        pq_ab <- McCullagh_generalized_pij_qij(a, b, psi, delta, alpha, c)
      }

      if (i < j) {
        wij = w_pij[a, b]
        derivative <- derivative + wij * pq_ab / c
      } else {
        wij = w_qij[b, a]
        derivative <- derivative + wij * pq_ab / c
      }
    }
  }
  derivative
}


#' Derivative pi[i, j] wrt delta[k].
#'
#' @param i row index of pi
#' @param j column index of pi
#' @param k index of delta_vec
#' @param psi the matrix of symmetry parameters
#' @param delta_vec the vector asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for the pis to sum to 1.0  Default value is 1.0
#' @returns derivative
McCullagh_derivative_pi_wrt_delta_vec <- function(i, j, k, psi, delta_vec, alpha, c=1.0) {
  M <- nrow(psi)

  weights <- McCullagh_extract_weights(i, j, M)
  if (i == j) {
    w <- weights$w_psi
  } else if (i < j) {
    w <- weights$w_pij
  } else if (i > j) {
    w <- weights$w_qij
  }

  derivative <- 0.0
  # a and b index Pij and Qij
  for (a in 1:M) {
    for (b in 1:M) {
      if (i == j) {
        derivative <- derivative + w[a, b] * McCullagh_derivative_pij_wrt_delta_vec(a, b, k, psi, delta_vec, alpha, c)
      }
      if (i < j) {
        # Pij
        derivative <- derivative + w[a, b] * McCullagh_derivative_pij_wrt_delta_vec(a, b, k, psi, delta_vec, alpha, c)
      }
      if (i > j) {
        # Qij
        derivative <- derivative + w[b, a] * McCullagh_derivative_pij_wrt_delta_vec(a, b, k, psi, delta_vec, alpha, c)
      }
    }
  }
  derivative
}


#' Second order derivative wrt psi^2.
#'
#' @param i row index of pi
#' @param j column index of pi
#' @param i1 first row index of psi
#' @param j1 first column index of psi
#' @param i2 second row index of psi
#' @param j2 second column index of pis
#' @param psi the matrix of symmetry parameters
#' @param delta the scalar asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for the pis to sum to 1.0  Default value is 1.0
#' @returns derivative
McCullagh_second_order_pi_wrt_psi_2 <- function(i, j, i1, j1, i2, j2, psi, delta, alpha, c=1.0) {
  # this is NOT an error. The second order partial wrt psi is uniformly 0.0
  0.0
}


#' Second order derivaitve of pi wrt pshi and scalar delta.
#'
#' @param i row index of pi
#' @param j column index of pi
#' @param i1 row index of psi
#' @param j1 column index of psi
#' @param psi the matrix of symmetry parameters
#' @param delta the scalar asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for the pis to sum to 1.0  Default value is 1.0
#' @returns derivative
McCullagh_second_order_pi_wrt_psi_delta <- function(i, j, i1, j1, psi, delta, alpha, c=1.0) {
  M <- nrow(psi)
  weights <- McCullagh_extract_weights(i, j, M)
  w_psi <- weights$w_psi
  w_pij <- weights$w_pij
  w_qij <- weights$w_qij

  if (i == j) {
    w <- w_psi
  } else if (i < j) {
    w <- w_pij
  } else if (i > j) {
    w <- w_qij
  }
  w <- weights$weight

  derivative <- 0.0

  # a and b index pi
  for (a in 1:M) {
    for (b in 1:M) {
      match <- i1 == a && j1 == b
      symmetric <- i1 == b && j1 == a
      if (!(match || symmetric)) {
          next
      }

      if (i == j) {
        deriv <- w[a, b] * McCullagh_derivative_pij_wrt_delta(a, b, psi, delta, alpha, c) / psi[i1, j1]
      } else if (i < j) {
        deriv <- w[a, b] * McCullagh_derivative_pij_wrt_delta(a, b, psi, delta, alpha, c) / psi[i1, j1]
      } else if (i > j) {
        deriv <- w[b, a] * McCullagh_derivative_pij_wrt_delta(a, b, psi, delta, alpha, c) / psi[i1, j1]
      } else {
        deriv <- 0.0
      }
      derivative <- derivative + deriv
    }
  }
  derivative
}


#' Second order derivative of pi[i, j] wrt psi[i1, j1] and alpha[index].
#'
#' @param i row index of pi
#' @param j column index of pi
#' @param i1 row index of psi
#' @param j1 column index of psi
#' @param index index of alpha
#' @param psi the matrix of symmetry parameters
#' @param delta the scalar asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for the pis to sum to 1.0  Default value is 1.0
#' @returns derivative
McCullagh_second_order_pi_wrt_psi_alpha <- function(i, j, i1, j1, index, psi, delta, alpha, c=1.0) {
  #   :return:         d^2 pi[i][j]
  #           ------------------------------
  #            d psi[i1][j1] d alpha[index]
  #   """

  # diagonal elements do not contain alpha
  if (i == j) {
    return(0.0)
  }

  M = nrow(psi)
  weights <- McCullagh_extract_weights(i, j, M)
  w_psi <- weights$w_psi
  w_pij <- weights$w_pij
  w_qij <- weights$w_qij

  if (i == j) {
    w <- w_psi
  } else if (i < j) {
    w <- w_pij
  } else if (i > j) {
    w <- w_qij
  }

  w <- weights$weight

  derivative <- 0.0
  # a and b index pi
  for (a in 1:M) {
    for (b in 1:M) {
      match <- a == i1 && b == j1
      symmetric <- a == j1 && b == i1
      if (!(match || symmetric)) {
        next
      }

      if (i == j) {
        wij <- w[a, b]
        deriv <- wij * McCullagh_derivative_pij_wrt_alpha(a, b, index, psi, delta, alpha, c) / psi[i1, j1]
      } else if (i < j) {
        wij <- w[a, b]
        deriv <- wij * McCullagh_derivative_pij_wrt_alpha(a, b, index, psi, delta, alpha, c) / psi[i1, j1]
      } else if (i > j) {
        wij <- w[b, a]
        deriv <- wij * McCullagh_derivative_pij_wrt_alpha(a, b, index, psi, delta, alpha, c) / psi[i1, j1]
      } else {
        deriv = 0.0
      }
      derivative <- derivative + deriv
    }
  }
  derivative
}


#' Second order derivative of pi[i, j] wrt psi[i1, j1] and c.
#'
#' @param i row index of pi
#' @param j column index of pi
#' @param i1 row index of psi
#' @param j1 column index of psi
#' @param psi the matrix of symmetry parameters
#' @param delta the scalar asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for the pis to sum to 1.0
#' @returns derivative
McCullagh_second_order_pi_wrt_psi_c <- function (i, j, i1, j1, psi, delta, alpha, c) {
  M <- nrow(psi)
  weights <- McCullagh_extract_weights(i, j, M)
  w_psi <- weights$w_psi
  w_pij <- weights$w_pij
  w_qij <- weights$w_qij
  w <- weights$weight

  derivative = 0.0
  for (a in 1:M) {
    for (b in 1:M) {
      match <- a == i1 && b == j1
      symmetric <- a == j1 && b == i1
      if (!(match || symmetric)) {
        next
      }
      pq_ab <- McCullagh_pij_qij(a, b, psi, delta, alpha, c)
      if (i < j) {
        wij <- w[a, b]
        derivative <- derivative + wij * pq_ab / (c * psi[i1, j1])
      } else {
        wij <- w_qij[b, a]
        derivative <- derivative + wij * pq_ab / (c * psi[i1, j1])
      }
    }
  }
  derivative
}


#' Second order derivative of pi[i, j] wrt scalar delta.
#'
#' @param i row index of pi
#' @param j column index of pi
#' @param psi the matrix of symmetry parameters
#' @param delta the scalar asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for the pis to sum to 1.0  Default value is 1.0
#' @returns derivative
McCullagh_second_order_pi_wrt_delta_2 <- function(i, j, psi, delta, alpha, c=1.0) {
  if (i == j) {
    return(0.0)
  }
  pi <- McCullagh_compute_pi(psi, delta, alpha, c)
  pi[i, j] / 4.0
}


#' Second order deriviative of pi[i, j] wrt scalar delta and alpha[index]
#'
#' @param i row index of pi
#' @param j column index of pi
#' @param index index of alpha
#' @param psi the matrix of symmetry parameters
#' @param delta the scalar asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for the pis to sum to 1.0  Default value is 1.0
#' @returns derivative
McCullagh_second_order_pi_wrt_delta_alpha <- function(i, j, index, psi, delta, alpha, c=1.0) {
  #   :return:       d^2 pi[i][j]
  #            ------------------------
  #             d alpha[index] d delta
  #   """
  M <- nrow(psi)
  weights <- McCullagh_extract_weights(i, j, M)
  w_psi <- weights$w_psi
  w_pij <- weights$w_pij
  w_qij <- weights$w_qij

  if (i == j) {
    w = w_psi
  } else if (i < j) {
    w = w_pij
  } else if (i > j) {
    w = w_qij
  }

  derivative <- 0.0
  for (a in 1:M) {
    for (b in 1:M) {
      if (i == j) {
        wij <- 0.0
      } else if (i < j) {
        wij <- 0.5 * w[a, b]
      } else if (i > j) {
        wij <- -0.5 * w[b, a]
      }
      deriv <- wij * McCullagh_derivative_pij_wrt_alpha(a, b, index, psi, delta, alpha, c)
      derivative <- derivative + deriv
    }
  }
  derivative
}


#' Second order derivative of pi[i, j] wrt scalae delta and c.
#'
#' @param i row index of pi
#' @param j column index of pi
#' @param psi the matrix of symmetry parameters
#' @param delta the scalar asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for the pis to sum to 1.0
#' @returns derivative
McCullagh_second_order_pi_wrt_delta_c <- function(i, j, psi, delta, alpha, c) {
  if (i == j) {
    return(0.0)
  }

  M <- nrow(psi)
  weights <- McCullagh_extract_weights(i, j, M)
  w_psi <- weights$w_psi
  w_pij <- weights$w_pij
  w_qij <- weights$w_qij

  derivative <- 0.0
  for (a in 1:M) {
    for (b in 1:M) {
      pq_ab = McCullagh_pij_qij(a, b, psi, delta, alpha, c)
      if (i < j) {
        wij <- w_pij[a, b]
        derivative <- derivative + wij * pq_ab / (2.0 * c)
      } else {
        wij <- w_qij[b, a]
        derivative <- derivative - wij * pq_ab / (2.0 * c)
      }
    }
  }
  derivative
}


#' Second derivative of pi[i, j] wrt alpha^2.
#'
#' @param i row index of pi
#' @param j column index of pi
#' @param index1 index of first alpha
#' @param index2 index of second aloha
#' @param psi the matrix of symmetry parameters
#' @param delta the scalar asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for the pis to sum to 1.0  Default value is 1.0
#' @returns derivative
McCullagh_second_order_pi_wrt_alpha_2 <- function(i, j, index1, index2, psi, delta, alpha, c=1.0) {
  if (i == j) {
    return(0.0)
  }

  M <- nrow(psi)
  weights <- McCullagh_extract_weights(i, j, M)
  w_psi <- weights$w_psi
  w_pij <- weights$w_pij
  w_qij <- weights$w_qij

  if (i == j) {
    w <- w_psi
  } else if (i < j) {
    w <- w_pij
  } else if (i > j) {
    w <- w_qij
  }

  derivative <- 0.0
  # a and b index Pij and Qij
  for (a in 1:M) {
    for (b in 1:M) {
      P_ab <- 0.0
      Q_ab <- 0.0
      if (a < b) {
        # Computes Pij[a][b]
        P_ab <- McCullagh_pij_qij(a, b, psi, delta, alpha, c)
      } else if (a > b) {
        # Computes Qij[a][b]
        Q_ab <- McCullagh_pij_qij(a, b, psi, delta, alpha, c)
      }

      if (i < j) {
        w_ab <- w[a, b]
      } else if (i > j) {
        w_ab <- w[b, a]
      } else {
        w_ab <- 0.0
      }

      deriv <- 0.0
      if (a == b) {
        deriv <- 0.0
      } else if (a < b) {
        if (index1 == a && index2 == a) {
          deriv <- 0.0
        } else if (index1 == a && index2 == b - 1) {
          deriv <- -P_ab / (alpha[index1] * alpha[index2])
        } else if (index1 == b - 1 && index2 == a) {
          deriv <- -P_ab / (alpha[index1] * alpha[index2])
        } else if (index1 == b - 1 && index2 == b - 1) {
          deriv <- 2.0 * P_ab / alpha[index1]^2
        }
      } else if (a > b) {
        if (index1 == a - 1 && index2 == a - 1) {
          deriv <- 0.0
        } else if (index1 == a - 1 && index2 == b) {
          deriv <- -Q_ab / (alpha[index1] * alpha[index2])
        } else if (index1 == b && index2 == a - 1) {
          deriv <- -Q_ab / (alpha[index1] * alpha[index2])
        } else if (index1 == b && index2 == b) {
          deriv <- 2.0 * Q_ab / alpha[index1]^2
        }
      }
      derivative <- derivative + w_ab * deriv
    }
  }
  derivative
}


#' Second derivaitve of pi[i, j] wrt alpha[index] and c.
#'
#' @param i row index of pi
#' @param j column index of pi
#' @param index index of alpha
#' @param psi the matrix of symmetry parameters
#' @param delta the scalar asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for the pis to sum to 1.0
#' @returns derivative
McCullagh_second_order_pi_wrt_alpha_c <- function(i, j, index, psi, delta, alpha, c) {
  M <- nrow(psi)

  weights <- McCullagh_extract_weights(i, j, M)
  w_psi <- weights$w_psi
  w_pij <- weights$w_pij
  w_qij <- weights$w_qij

  derivative <- 0.0
  for (a in 1:M) {
    for (b in 1:M) {
      pq_ab <- McCullagh_pij_qij(a, b, psi, delta, alpha, c)
      if ((a == b) || (abs(a - b) == 1 && a > 1 && b > 1)) {
        next
      }
      if (i < j) {
        wij <- w_pij[a, b]
        if (index == a) {
          derivative <- derivative + wij * pq_ab / (alpha[index] * c)
        } else if (index == b - 1) {
          derivative <- derivative - wij * pq_ab / (alpha[index] * c)
        }
      } else {
        wij <- w_qij[b, a]
        if (index == b) {
          derivative <- derivative - wij * pq_ab / (alpha[index] * c)
        } else if (index == a - 1) {
          derivative <- derivative + wij * pq_ab / (alpha[index] * c)
        }
      }
    }
  }
  derivative
}


#' Second order derivative of pi[i, j] wrt c^2.
#'
#' @param i row index of pi
#' @param j column index of pi
#' @param psi the matrix of symmetry parameters
#' @param delta the scalar asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for the pis to sum to 1.0  Default value is 1.0
#' @returns derivative
McCullagh_second_order_pi_wrt_c_2 <- function(i, j, psi, delta, alpha, c) {
  # this is correct, not a placeholder
  0.0
}


#' Second order derivaitve of pi[i, j] wrt psi[i1, j1] and kelta[k].
#'
#' @param i row index of pi
#' @param j column index of pi
#' @param i1 row index of psi
#' @param j1 column index of psi
#' @param k index of delta
#' @param psi the matrix of symmetry parameters
#' @param delta_vec the vector asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for the pis to sum to 1.0  Default value is 1.0
#' @returns derivative
McCullagh_second_order_pi_wrt_psi_delta_vec <- function(i, j, i1, j1, k, psi, delta_vec, alpha, c=1.0) {
  if (i == j) {
    return(0.0)
  }

  imin <- min(i1, j1)
  if (imin != k) {
    return(0.0)
  }

  M <- nrow(psi)
  weights <- McCullagh_extract_weights(i, j, M)
  w_psi <- weights$w_psi
  w_pij <- weights$w_pij
  w_qij <- weights$w_qij

  if (i == j) {
    w <- w_psi
  } else if (i < j) {
    w <- w_pij
  } else if (i > j) {
    w <- w_qij
  }

  derivative <- 0.0

  # a and b index pi
  for (a in 1:M) {
    for (b in 1:M) {
      ab <- min(a, b)
      if (length(delta_vec) > 1 && k != ab) {
        next
      }

      match <- i1 == a && j1 == b
      symmetric <- i1 == b && j1 == a
      if (!(match || symmetric)) {
        next
      }

      if (i == j) {
        deriv <- w[a, b] * McCullagh_derivative_pij_wrt_delta_vec(a, b, k, psi, delta_vec, alpha, c) / psi[i1, j1]
      } else if (i < j) {
        deriv <- w[a, b] * McCullagh_derivative_pij_wrt_delta_vec(a, b, k, psi, delta_vec, alpha, c) / psi[i1, j1]
      } else if (i > j) {
        deriv <- w[b, a] * McCullagh_derivative_pij_wrt_delta_vec(a, b, k, psi, delta_vec, alpha, c) / psi[i1, j1]
      } else {
        deriv <- 0.0
      }
      derivative <- derivative + deriv
    }
  }
  derivative
}


#' Derivative of pi[i, j] wrt delta^2.
#'
#' @param i row index of pi
#' @param j column index of pi
#' @param k1 first index of delta
#' @param k2 second index of delta
#' @param psi the matrix of symmetry parameters
#' @param delta_vec the vector asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for the pis to sum to 1.0  Default value is 1.0
#' @returns derivative
McCullagh_second_order_pi_wrt_delta_vec_2 <- function(i, j, k1, k2, psi, delta_vec, alpha, c=1.0) {

  if (i == j) {
    return(0.0)
  }

  M <- nrow(psi)

  weights <- McCullagh_extract_weights(i, j, M)
  if (i == j) {
    w <- weights$w_psi
  } else if (i < j) {
    w <- weights$w_pij
  } else {
    w <- weights$w_qij
  }

  derivative <- 0.0
  for(a in 1:M) {
    for (b in 1:M) {
      P_ab <- 0.0
      Q_ab <- 0.0
      if (a < b) {
        # Computes Pij[a][b]
        P_ab <- McCullagh_generalized_pij_qij(a, b, psi, delta_vec, alpha, c)
      } else if (a > b) {
        # Computes Qij[a][b]
        Q_ab <- McCullagh_generalized_pij_qij(a, b, psi, delta_vec, alpha, c)
      }

      if (i < j) {
        w_ab <- w[a, b]
      } else if (i > j) {
        w_ab <- w[b, a]
      } else {
        w_ab <- 0.0
      }

      deriv <- 0.0
      if (a == b) {
        deriv <- 0.0
      } else if (a < b && (k1 == a && k2 == a)) {
        deriv <- P_ab / 4.0
      } else if (b < a && (k1 == b & k2 == b)) {
        deriv <- Q_ab / 4.0
      }
      derivative <- derivative + w_ab * deriv
    }
  }
  derivative
}


#' Second order dertivative of pi[i, j] wrtt delta[k] alpha[index].
#'
#' @param i row index of pi
#' @param j column index of pi
#' @param k index of delta
#' @param index index of alpha
#' @param psi the matrix of symmetry parameters
#' @param delta_vec the vector asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for the pis to sum to 1.0  Default value is 1.0
#' @returns derivative
McCullagh_second_order_pi_wrt_delta_vec_alpha <- function(i, j, k, index, psi, delta_vec, alpha, c=1.0) {
  #   :param c: scaling coefficient
  #   :return:       d^2 pi[i][j]
  #            ---------------------------
  #             d alpha[index] d delta[k]
  #   """
  M <- nrow(psi)
  weights <- McCullagh_extract_weights(i, j, M)
  w_psi <- weights$w_psi
  w_pij <- weights$w_pij
  w_qij <- weights$w_qij

  if (i == j) {
    w = w_psi
  } else if (i < j) {
    w = w_pij
  } else if (i > j) {
    w = w_qij
  }

  derivative <- 0.0
  for (a in 1:M) {
    for (b in 1:M) {
      ab <- min(a, b)
      if (ab != k) {
        next
      }
      if (i == j) {
        wij <- 0.0
      } else if (i < j) {
        wij <- 0.5 * w[a, b]
      } else if (i > j) {
        wij <- -0.5 * w[b, a]
      }
      deriv <- wij * McCullagh_derivative_pij_wrt_alpha(a, b, index, psi, delta_vec, alpha, c)
      derivative <- derivative + deriv
    }
  }
  derivative
}


#' Second derivative of pi[i, j] wrt delta[k] and c.
#'
#' @param i row index of pi
#' @param j column index of pi
#' @param k index of delta
#' @param psi the matrix of symmetry parameters
#' @param delta_vec the vector asymmetry parameter
#' @param alpha the vector of asymmetry parameters
#' @param c the normalizing constant for the pis to sum to 1.0  Default value is 1.0
#' @returns derivative
McCullagh_second_order_pi_wrt_delta_vec_c <- function(i, j, k, psi, delta_vec, alpha, c) {
  if (i == j) {
    return(0.0)
  }

  M <- nrow(psi)
  weights <- McCullagh_extract_weights(i, j, M)
  w_psi <- weights$w_psi
  w_pij <- weights$w_pij
  w_qij <- weights$w_qij

  derivative <- 0.0
  for (a in 1:M) {
    for (b in 1:M) {
      ab = min(a, b)
      if (ab != k) {
        next
      }
      pq_ab = McCullagh_generalized_pij_qij(a, b, psi, delta_vec, alpha, c)
      if (i < j) {
        wij <- w_pij[a, b]
        derivative <- derivative + wij * pq_ab / (2.0 * c)
      } else {
        wij <- w_qij[b, a]
        derivative <- derivative - wij * pq_ab / (2.0 * c)
      }
    }
  }
  derivative
}


#' Compute the linear constraint on psi elements for identifiablity.
#'
#' @param psi symmetry matrix
#' @returns value of the constraint
McCullagh_compute_condition <- function(psi) {
  M <- nrow(psi)

  condition <- 0.0
  for (i in 1:M) {
    condition <- condition + psi[i, i]
  }

  if (1 < M) {
    for (i in 1:(M - 1)) {
      condition <- condition + psi[i, i + 1] + psi[i + 1, i]
    }
  }

  if (2 < M) {
    for (i in 1:(M - 2)) {
      condition <- condition - psi[i, i + 2] - psi[i + 2, i]
    }
  }
  condition - 1.0
}


#' Logical test of whether a specific psi will be in the constraint set.
#'
#' @param i first index of psi
#' @param j second index of psi
#' @returns TRUE if it falls within the set, FALSE otherwise.
McCullagh_is_in_constraint_set <- function(i, j) {
  abs(i - j) <= 2
}


#' Derivative of the condition wrt psi[i, j].
#'
#' @param i first index of psi
#' @param j second index of psi
#' @returns derivative
McCullagh_derivative_condition_wrt_psi <- function(i, j) {
  result <- 0.0
  if (i == j) {
    result <- 1.0
  } else if (abs(i - j) == 1) {
    result <- 2.0
  } else if (abs(i - j) == 2) {
    result <- -2.0
  } else {
    result <- 0.0
  }
  result
}


#' Compute the value of the Lagrange multiplier for the constraint on psi.
#'
#' @param n matrix of observed counts
#' @param pi matrix of model-based probabilities pi.
#' @returns the value of the Lagrange multiplier.
McCullagh_compute_omega <- function(n, pi) {
  M <- nrow(pi)
  N <- sum(n)

  omega = 0.0
  for (a in 1:(M - 3)) {
    for (b in (a + 3): M) {
      omega <- omega + N * pi[a, b] + N * pi[b, a] - n[a, b] - n[b, a]
    }
  }

  omega
}


#' Derivative of Lagrange multiplier omega wrt psi[i, j].
#'
#' @param n matrix of observed counts
#' @param i first index of psi
#' @param j second index of psi
#' @param psi symmetry matrix
#' @param delta scalar or vector asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Defaults to 1.0
McCullagh_derivative_omega_wrt_psi <- function(n, i, j, psi, delta, alpha, c=1.0) {
  M <- nrow(psi)
  N <- sum(n)

  if (McCullagh_is_in_constraint_set(i, j)) {
    return(0.0)
  } else {
    change = 0.0
    for (a in 1:(M - 3)) {
      for (b in (a + 3): M) {
        change <- change + N * McCullagh_derivative_pi_wrt_psi(a, b, i, j, psi, delta, alpha, c)
        change <- change + N * McCullagh_derivative_pi_wrt_psi(b, a, i, j, psi, delta, alpha, c)
      }
    }
  }
  change
}


#' Derivative of Lagrange multiplier omega wrt scalar delta.
#'
#' @param n matrix of observed counts
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0. Default is 1.0.
#' @returns derivative
McCullagh_derivative_omega_wrt_delta <- function(n, psi, delta, alpha, c=1.0) {
  M <- nrow(psi)
  N <- sum(n)

  derivative <- 0.0
  for (a in 1:(M - 3)) {
    for (b in (a + 3):M) {
      derivative <- derivative + N * McCullagh_derivative_pi_wrt_delta(a, b, psi, delta, alpha, c)
      derivative <- derivative + N * McCullagh_derivative_pi_wrt_delta(b, a, psi, delta, alpha, c)
    }
  }
  derivative
}


#' Derivative of Lagrange multiplier omega wrt alpha[index].
#'
#' @param n matrix of observed counts
#' @param index index of alpha
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_derivative_omega_wrt_alpha <- function(n, index, psi, delta, alpha, c=1.0) {
  M <- nrow(psi)
  N <- sum(n)

  derivative <- 0.0
  for (a in 1:(M - 3)) {
    for (b in (a + 3):M) {
      derivative <- derivative + N * McCullagh_derivative_pi_wrt_alpha(a, b, index, psi, delta, alpha, c)
      derivative <- derivative + N * McCullagh_derivative_pi_wrt_alpha(b, a, index, psi, delta, alpha, c)
    }
  }
  derivative
}


#' Derivative of Lagrange multiplier omega wrt c.
#'
#' @param n matrix of observed counts
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_derivative_omega_wrt_c <- function(n, psi, delta, alpha, c) {
  M <- nrow(psi)
  N <- sum(n)

  derivative <- 0.0
  for (a in 1:(M - 3)) {
    for (b in (a + 3):M) {
      derivative <- derivative + N * McCullagh_derivative_pi_wrt_c(a, b, psi, delta, alpha, c)
      derivative <- derivative + N * McCullagh_derivative_pi_wrt_c(b, a, psi, delta, alpha, c)
    }
  }
  derivative
}


#' Derivative of Lagrange multiplier omega wrt vector delta[k].
#'
#' @param n matrix of observed counts
#' @param k index of delta_vec
#' @param psi matrix of symmetry parameters
#' @param delta_vec scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_derivative_omega_wrt_delta_vec <- function(n, k, psi, delta_vec, alpha, c=1.0) {
  if (length(delta_vec) == 1) {
    return(McCullagh_derivative_omega_wrt_delta(n, psi, delta_vec, alpha, c))
  }

  M <- nrow(psi)
  N <- sum(n)

  derivative <- 0.0
  for (a in 1:(M - 3)) {
    for (b in (a + 3):M) {
      ab <- min(a, b)
      if (ab != k) {
        next
      }
      derivative <- derivative + N * McCullagh_derivative_pi_wrt_delta_vec(a, b, k, psi, delta_vec, alpha, c)
      derivative <- derivative + N * McCullagh_derivative_pi_wrt_delta_vec(b, a, k, psi, delta_vec, alpha, c)
    }
  }
  derivative
}


#' Second derivative of Lagrange multiplier omega wrt psi^2.
#'
#' @param n matrix of observed counts
#' @param i1 first row index of psi
#' @param j1 first column index of psi
#' @param i2 second row index of psi
#' @param j2 second column index of psi
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_omega_wrt_psi_2 <- function(n, i1, j1, i2, j2, psi, delta, alpha, c=1.0) {
  #   This is correct. The second-order partial of pi[i][j] wrt psi is 0.0.
  #   :return:          d^2 omega
  #           ----------------------------
  #            d psi[i1[j1] d psi[i2][j2]]
  #   """
  0.0
}


#' Second derivative of Lagrange multiplier omega wrt psi and scalar delta.
#'
#' @param n matrix of observed counts
#' @param i1 row index of psi
#' @param j1 column index of psi
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_omega_wrt_psi_delta <- function(n, i1, j1, psi, delta, alpha, c=1.0) {
  M <- nrow(psi)
  N <- sum(n)

  derivative = 0.0
  for (a in 1:(M - 3)) {
    for (b in (a + 3):M) {
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_psi_delta(a, b, i1, j1, psi, delta, alpha, c)
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_psi_delta(b, a, i1, j1, psi, delta, alpha, c)
    }
  }
  derivative
}


#' Second derivative of Lagrange multiplier omega wrt psi[i1, j1] and alpha[index].
#'
#' @param n matrix of observed counts
#' @param i1 row index of psi
#' @param j1 column index of psi
#' @param index index of alpha
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_omega_wrt_psi_alpha <- function(n, i1, j1, index, psi, delta, alpha, c=1.0) {
  M <- nrow(psi)
  N <- sum(n)

  derivative = 0.0
  for (a in 1:(M - 3)) {
    for (b in (a + 3):M) {
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_psi_alpha(a, b, i1, j1, index, psi, delta, alpha, c)
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_psi_alpha(b, a, i1, j1, index, psi, delta, alpha, c)
    }
  }
  derivative
}


#' Second derivative of Lagrange multiplier omega wrt psi[i1, j1] and c.
#'
#' @param n matrix of observed counts
#' @param i1 row index of psi
#' @param j1 column index of psi
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_omega_wrt_psi_c <- function(n, i1, j1, psi, delta, alpha, c) {
  M <- nrow(psi)
  N <- sum(n)

  derivative = 0.0
  for (a in 1:(M - 3)) {
    for (b in (a + 3):M) {
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_psi_c(a, b, i1, j1, psi, delta, alpha, c)
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_psi_c(b, a, i1, j1, psi, delta, alpha, c)
    }
  }
  derivative
}


#' Second derivative of Lagrange multiplier omega wrt scalae delta^2.
#'
#' @param n matrix of observed counts
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_omega_wrt_delta_2 <- function(n, psi, delta, alpha, c=1.0) {
  M <- nrow(psi)
  N <- sum(n)

  derivative <- 0.0
  for (a in 1:(M - 3)) {
    for (b in (a + 3):M) {
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_delta_2(a, b, psi, delta, alpha, c)
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_delta_2(b, a, psi, delta, alpha, c)
    }
  }
  derivative
}


#' Second derivative of Lagrange multiplier omega wrt delta and alpha[index].
#'
#' @param n matrix of observed counts
#' @param index index of alpha
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_omega_wrt_delta_alpha <- function(n, index, psi, delta, alpha, c=1.0) {
  M <- nrow(psi)
  N <- sum(n)

  derivative = 0.0
  for (a in 1:(M - 3)) {
    for (b in (a + 3): M) {
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_delta_alpha(a, b, index, psi, delta, alpha, c)
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_delta_alpha(b, a, index, psi, delta, alpha, c)
    }
  }
  derivative
}


#' Second derivative of Lagrange multiplier omega wrt scalar delta and c.
#'
#' @param n matrix of observed counts
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_omega_wrt_delta_c <- function(n, psi, delta, alpha, c) {
  M <- nrow(psi)
  N <- sum(n)

  derivative <- 0.0
  for (i in 1:(M - 3)) {
    for (j in (i + 3):M) {
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_delta_c(i, j, psi, delta, alpha, c)
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_delta_c(j, i, psi, delta, alpha, c)
    }
  }
  derivative
}


#' Second derivative of Lagrange multiplier omega wrt alpha^2.
#'
#' @param n matrix of observed counts
#' @param k1 first index of alpha
#' @param k2 second index of alpha
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_omega_wrt_alpha_2 <- function(n, k1, k2, psi, delta, alpha, c=1.0) {
  M <- nrow(psi)
  N <- sum(n)

  derivative <- 0.0
  for (a in 1:(M - 3)) {
    for (b in (a + 3):M) {
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_alpha_2(a, b, k1, k2, psi, delta, alpha, c)
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_alpha_2(b, a, k1, k2, psi, delta, alpha, c)
    }
  }
  derivative
}


#' Second derivative of Lagrange multiplier omega wrt alpha[index] and c.
#'
#' @param n matrix of observed counts
#' @param index row index of psi
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.
#' @returns derivative
McCullagh_second_order_omega_wrt_alpha_c <- function(n, index, psi, delta, alpha, c) {
  M <- nrow(psi)
  N <- sum(n)

  derivative = 0.0
  for (a in 1:(M - 3)) {
    for (b in (a + 3):M) {
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_alpha_c(a, b, index, psi, delta, alpha, c)
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_alpha_c(b, a, index, psi, delta, alpha, c)
    }
  }
  derivative
}


#' Second derivative of Lagrange multiplier omega wrt c^2.
#'
#' @param n matrix of observed counts
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.
#' @returns derivative
McCullagh_second_order_omega_wrt_c_2 <- function(n, psi, delta, alpha, c) {
  M <- nrow(psi)
  N <- sum(n)

  derivative = 0.0
  for (a in 1:(M - 3)) {
    for (b in (a + 3):M) {
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_c_2(a, b, delta, alpha, c)
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_c_2(b, a, psi, delta, alpha, c)
    }
  }
  derivative
}


#' Second derivative of Lagrange multiplier omega wrt psi[i1, j1] and delta_vec[k].
#'
#' @param n matrix of observed counts
#' @param i1 row index of psi
#' @param j1 column index of psi
#' @param k index of delta_vec
#' @param psi matrix of symmetry parameters
#' @param delta_vec vector asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_omega_wrt_psi_delta_vec <- function(n, i1, j1, k, psi, delta_vec, alpha, c=1.0) {
  if (length(delta_vec) == 1) {
    return(McCullagh_second_order_omega_wrt_psi_delta(n, i1, j1, psi, delta_vec, alpha, c))
  }
  M <- nrow(psi)
  N <- sum(n)

  derivative <- 0.0
  for (a in 1:(M - 3)) {
    for (b in (a + 3):M) {
      ab <- min(a, b)
      if (ab != k) {
        next
      }
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_psi_delta(a, b, i1, j1, psi, delta_vec[k], alpha, c)
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_psi_delta(b, a, i1, j1, psi, delta_vec[k], alpha, c)
    }
  }
  derivative
}


#' Second derivative of Lagrange multiplier omega wrt delta_vec^2.
#'
#' @param n matrix of observed counts
#' @param k1 first index of delta_vec
#' @param k2 second index of delta_vec
#' @param psi matrix of symmetry parameters
#' @param delta_vec vector asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_omega_wrt_delta_vec_2 <- function(n, k1, k2, psi, delta_vec, alpha, c=1.0) {
  if (length(delta_vec) == 1) {
    return(McCullagh_second_order_omega_wrt_delta_2(n, psi, delta_vec, alpha, c))
  }

  M <- nrow(psi)
  N <- sum(n)

  derivative <- 0.0
  for (a in 1:(M - 3)) {
    for (b in (a + 3):M) {
      ab <- min(a, b)
      if (ab != k1 || ab != k2) {
        next
      }
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_delta_vec_2(a, b, k1, k2, psi, delta_vec, alpha, c)
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_delta_vec_2(b, a, k1, k2, psi, delta_vec, alpha, c)
    }
  }
  derivative
}


#' Second derivative of Lagrange multiplier omega wrt delta_vec[k] and alpha[index].
#'
#' @param n matrix of observed counts
#' @param k index of delta_vec
#' @param index index of alpha
#' @param psi matrix of symmetry parameters
#' @param delta_vec vector asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_omega_wrt_delta_vec_alpha <- function(n, k, index, psi, delta_vec, alpha, c=1.0) {
  M <- nrow(psi)
  N <- sum(n)

  derivative <- 0.0
  for (a in 1:(M - 3)) {
    for (b in (a + 3): M) {
      ab <- min(a, b)
      if (ab != k) {
        next
      }
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_delta_vec_alpha(a, b, k, index, psi, delta_vec, alpha, c)
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_delta_vec_alpha(b, a, k, index, psi, delta_vec, alpha, c)
    }
  }
  derivative
}


#' Second derivative of Lagrange multiplier omega wrt delta_vec[k] and c.
#'
#' @param n matrix of observed counts
#' @param k index of delta_vec
#' @param psi matrix of symmetry parameters
#' @param delta_vec vector of asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.
#' @returns derivative
McCullagh_second_order_omega_wrt_delta_vec_c <- function(n, k, psi, delta_vec, alpha, c) {
  M <- nrow(psi)
  N <- sum(n)

  derivative <- 0.0
  for (i in 1:(M - 3)) {
    for (j in (i + 3):M) {
      # ij <- min(i, j)
      # if (ij != k) {
      #   next
      # }
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_delta_vec_c(i, j, k, psi, delta_vec, alpha, c)
      derivative <- derivative + N * McCullagh_second_order_pi_wrt_delta_vec_c(j, i, k, psi, delta_vec, alpha, c)
    }
  }
  derivative
}


#' Derivative of Lagrangian wrt psi[i1, j1].
#'
#' @param n matrix of observed counts
#' @param i1 first index of psi
#' @param j1 first index of psi
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_derivative_lagrangian_wrt_psi <- function(n, i1, j1, psi, delta, alpha, c=1.0) {
  condition <- McCullagh_compute_condition(psi)
  pi <- McCullagh_compute_pi(psi, delta, alpha, c)
  omega <- McCullagh_compute_omega(n, pi)
  deriv_omega <- McCullagh_derivative_omega_wrt_psi(n, i1, j1, psi, delta, alpha, c)
  deriv_condition <- McCullagh_derivative_condition_wrt_psi(i1, j1)
  result <- condition * deriv_omega + omega * deriv_condition
  result
}


#' Derivative of Lagrange multiplier wrt scalar delta.
#'
#' @param n matrix of observed counts
#' @param psi symmetry matrix
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing coefficient so that sum o pi = 1. Default value is 1.0
#' @returns value of the derivative
McCullagh_derivative_lagrangian_wrt_delta <- function(n, psi, delta, alpha, c=1) {
  condition <- McCullagh_compute_condition(psi)
  pi <- McCullagh_compute_pi(psi, delta, alpha, c)
  omega <- McCullagh_compute_omega(n, pi)
  deriv_omega <- McCullagh_derivative_omega_wrt_delta(n, psi, delta, alpha, c)
  result <- condition * deriv_omega
  result
}


#' Derivative of Lagrangian wrt delta_vec.
#'
#' @param n matrix of observed counts
#' @param k index of delta_vec to compute derivative wrt
#' @param psi matrix of symmetry parameters
#' @param delta_vec vector asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_derivative_lagrangian_wrt_delta_vec <- function(n, k, psi, delta_vec, alpha, c=1.0) {
  if (length(delta_vec) == 1) {
    return(McCullagh_derivative_lagrangian_wrt_delta(n, psi, delta_vec, alpha, c))
  }

  condition <- McCullagh_compute_condition(psi)
  pi <- McCullagh_compute_generalized_pi(psi, delta_vec, alpha, c)
  omega <- McCullagh_compute_omega(n, pi)
  deriv_omega <- McCullagh_derivative_omega_wrt_delta_vec(n, k, psi, delta_vec, alpha, c)
  result <- condition * deriv_omega
  result
}


#' Second derivative of Lagrangian wrt psi^2.
#'
#' @param n matrix of observed counts
#' @param i1 first row index of psi
#' @param j1 first column index of psi
#' @param i2 second row index of psi
#' @param j2 second column index of psi
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_lagrangian_wrt_psi_2 <- function(n, i1, j1, i2, j2, psi, delta, alpha, c=1.0) {
  # second order condition wrt psi^2 = 0.0
  # second order omega wrt psi^2 = 0.0
  der_condition1 <- McCullagh_derivative_condition_wrt_psi(i1, j1)
  der_condition2 <- McCullagh_derivative_condition_wrt_psi(i2, j2)
  der_omega1 <- McCullagh_derivative_omega_wrt_psi(n, i1, j1, psi, delta, alpha, c)
  der_omega2 <- McCullagh_derivative_omega_wrt_psi(n, i2, j2, psi, delta, alpha, c)
  result <- der_condition1 * der_omega2 + der_condition2 * der_omega1
  result
}


#' Second derivative of Lagrangian wrt psi[i1, j1] and delta.
#'
#' @param n matrix of observed counts
#' @param i1 row index of psi
#' @param j1 column index of psi
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_lagrangian_wrt_psi_delta <- function(n, i1, j1, psi, delta, alpha, c=1.0) {
  # derivative of condition wrt delta = 0.0
  # second order condition wrt psi and delta = 0.0
  condition <- McCullagh_compute_condition(psi)
  der_condition <- McCullagh_derivative_condition_wrt_psi(i1, j1)
  der_omega <- McCullagh_derivative_omega_wrt_delta(n, psi, delta, alpha, c)
  result <- der_condition * der_omega
  result <- result + condition * McCullagh_second_order_omega_wrt_psi_delta(n, i1, j1, psi, delta, alpha, c)
  result
}


#' Second derivative of Lagrangian wrt psi[i1, j1] and alpha[index].
#'
#' @param n matrix of observed counts
#' @param i1 row index of psi
#' @param j1 column index of psi
#' @param index second row index of alpha
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_lagrangian_wrt_psi_alpha <- function(n, i1, j1, index, psi, delta, alpha, c=1.0) {
  # """
  #   :param n: matrix of observed counts
  #   :param i1: first index of psi
  #   :param j1: second index of psi
  #   :param index: index of alpha
  #   :param psi: the psi matrix
  #   :param delta: the value of the delta parameter
  #   :param alpha: the alpha vector
  #   :return:
  #   """
  condition <- McCullagh_compute_condition(psi)
  der_condition1 <- McCullagh_derivative_condition_wrt_psi(i1, j1)
  der_omega1 <- McCullagh_derivative_omega_wrt_alpha(n, index, psi, delta, alpha, c)
  result <- der_condition1 * der_omega1
  result <- result + condition * McCullagh_second_order_omega_wrt_psi_alpha(n, i1, j1, index, psi, delta, alpha, c)
  result
}


#' Second derivative of Lagrangian wrt psi[i1, j1] and delta_vec[k[.
#'
#' @param n matrix of observed counts
#' @param i1 row index of psi
#' @param j1 column index of psi
#' @param k index of delta_vec
#' @param psi matrix of symmetry parameters
#' @param delta_vec vector asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_lagrangian_wrt_psi_delta_vec <- function(n, i1, j1, k, psi, delta_vec, alpha, c=1.0) {
  if (length(delta_vec) == 1) {
    return(McCullagh_second_order_lagrangian_wrt_psi_delta(n, i1, j1, psi, delta_vec, alpha, c))
  }

  # derivative of condition wrt delta = 0.0
  # second order condition wrt psi and delta = 0.0
  condition <- McCullagh_compute_condition(psi)
  der_condition <- McCullagh_derivative_condition_wrt_psi(i1, j1)
  der_omega <- McCullagh_derivative_omega_wrt_delta_vec(n, k, psi, delta_vec, alpha, c)
  result <- der_condition * der_omega
  result <- result + condition * McCullagh_second_order_omega_wrt_psi_delta_vec(n, i1, j1, k, psi, delta_vec, alpha, c)
  result
}


#' Computes the log(likelihood).
#'
#' @param n matrix of observed counts
#' @param psi matrix of symmetry parameters
#' @param delta scalar or vector asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_log_L <- function(n, psi, delta, alpha, c=1.0) {
  if (length(delta) == 1) {
    pi <- McCullagh_compute_pi(psi, delta, alpha, c)
  } else {
    pi <- McCullagh_compute_generalized_pi(psi, delta, alpha, c)
  }

  logL <- log_likelihood(n, pi)
  logL <- logL - McCullagh_compute_omega(n, pi) * McCullagh_compute_condition(psi)
  logL
}


#' Computes the penalized value of a derivative by adding the derivative
#'  of the penalty to it.
#'
#' @param derivative the base derivative
#' @param i1 first index of psi
#' @param j1 second index of psi
#' @param n matrix of observed counts
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_penalized <- function(derivative, i1, j1, n, psi, delta, alpha, c=1.0) {
  pi <- McCullagh_compute_pi(psi, delta, alpha, c)
  omega <- McCullagh_compute_omega(n, pi)
  if (abs(i1 - j1) == 0) {
    derivative <- derivative - omega
  } else if (abs(i1 - j1) == 1) {
    derivative <- derivative - 2.0 * omega
  } else if (abs(i1 - j1) == 2) {
    derivative <- derivative + 2.0 * omega
  }
  derivative
}


#' Derivative of log(likelihood) wrt delta (scalar or vector0.
#'
#' @param n matrix of observed counts
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @param k index into delta_vac. Defaults to 1.
#' @returns derivative
McCullagh_derivative_log_l_wrt_delta <- function(n, psi, delta, alpha, c=1.0, k=1) {
  #   :return: 1  d log(L)
  #            - ---------
  #            c  d delta
  #   """
  M <- nrow(psi)
  N <- sum(n)
  if (length(delta) == 1) {
    pi <- McCullagh_compute_pi(psi, delta, alpha, c)
  } else {
    pi <- McCullagh_compute_generalized_pi(psi, delta, alpha, c)
  }

  derivative <- 0.0
  # i and j index pi
  for (i in 1:M) {
    for (j in 1:M) {
      div <- n[i, j] / pi[i, j]
      if (length(delta == 1)) {
        deriv <- McCullagh_derivative_pi_wrt_delta(i, j, psi, delta, alpha, c)
      } else {
        deriv <- McCullagh_derivative_pi_wrt_delta_vec(i, j, k, psi, alpha, c)
      }
      derivative <- derivative + (div - N) * deriv
    }
  }
  condition <- McCullagh_compute_condition(psi)
  if (length(delta == 1)) {
    derivative <- derivative - condition * McCullagh_derivative_omega_wrt_delta(n, psi, delta, alpha, c)
  } else {
    derivative <- derivative - condition * McCullagh_derivative_omega_wrt_delta_vec(n, k, psi, delta, alpha, c)
  }
  derivative
}


#' Derivative of log(likelihood) wrt alpha[index].
#'
#' @param n matrix of observed counts
#' @param index index of alpha
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_derivative_log_l_wrt_alpha <- function(n, index, psi, delta, alpha, c=1.0) {
  #   :return: 1       d log(L)
  #            -   ----------------
  #            c    d alpha[index]
  #   """
  M <- nrow(psi)
  N <- sum(n)
  if (length(delta) == 1) {
    pi <- McCullagh_compute_pi(psi, delta, alpha, c)
  } else {
    pi <- McCullagh_compute_generalized_pi(psi, delta, alpha, c)
  }

  derivative <- 0.0
  # i and j index pi
  for (i in 1:M) {
    for (j in 1:M) {
      div <- n[i, j] / pi[i, j]
      deriv <- McCullagh_derivative_pi_wrt_alpha(i, j, index, psi, delta, alpha, c)
      derivative <- derivative + (div - N) * deriv
    }
  }

  condition <- McCullagh_compute_condition(psi)
  derivative <- derivative - condition * McCullagh_derivative_omega_wrt_alpha(n, index, psi, delta, alpha, c)
  derivative
}


#' Derivative of log(likelihood) wrt c.
#'
#' @param n matrix of observed counts
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_derivative_log_l_wrt_c <- function(n, psi, delta, alpha, c) {
  M <- nrow(psi)
  N <- sum(n)
  if (length(delta) == 1) {
    pi <- McCullagh_compute_pi(psi, delta, alpha, c)
  } else {
    pi <- McCullagh_compute_generalized_pi(psi, delta, alpha, c)
  }

  derivative <- 0.0
  for (i in 1:M) {
    for (j in 1:M) {
      div <- n[i, j] / pi[i, j]
      deriv <- McCullagh_derivative_pi_wrt_c(i, j, psi, delta, alpha, c)
      derivative <- derivative + (div - N) * deriv
    }
  }
  condition <- McCullagh_compute_condition(psi)
  derivative <- derivative - McCullagh_derivative_omega_wrt_c(n, psi, delta, alpha, c) * condition
  derivative
}


#' Derivative of log(likelihood) wrt delta_vec[k].
#'
#' @param n matrix of observed counts
#' @param k index of delta_vec
#' @param psi matrix of symmetry parameters
#' @param delta_vec vector asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_derivative_log_l_wrt_delta_vec <- function(n, k, psi, delta_vec, alpha, c=1.0) {
  #   :return: 1  d log(L)
  #            - ---------
  #            c  d delta
  #   """
  M <- nrow(psi)
  N <- sum(n)
  pi <- McCullagh_compute_generalized_pi(psi, delta_vec, alpha, c)

  derivative <- 0.0
  # i and j index pi
  for (i in 1:M) {
    for (j in 1:M) {
      div <- n[i, j] / pi[i, j]
      deriv <- McCullagh_derivative_pi_wrt_delta_vec(i, j, k, psi, delta_vec, alpha, c)
      derivative <- derivative + (div - N) * deriv
    }
  }
  condition <- McCullagh_compute_condition(psi)
  derivative <- derivative - condition * McCullagh_derivative_omega_wrt_delta_vec(n, k, psi, delta_vec, alpha, c)
  derivative
}


#' Derivative of log(likelihood) wrt psi.
#'
#' @param n matrix of observed counts
#' @param i1 row index of psi
#' @param j1 column index of psi
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_derivative_log_l_wrt_psi <- function(n, i1, j1, psi, delta, alpha, c=1.0) {
  #   :return: 1     d log(L)
  #            -  ---------------
  #            c   d psi[i1][j1]
  #   """
  M <- nrow(psi)
  N <- sum(n)
  if (length(delta) == 1) {
    pi <- McCullagh_compute_pi(psi, delta, alpha, c)
  } else {
    pi <- McCullagh_compute_generalized_pi(psi, delta, alpha, c)
  }

  derivative <- 0.0
  # i and j index pi
  for (i in 1:M) {
    for (j in 1:M) {
      div <- n[i, j] / pi[i, j]
      deriv <- McCullagh_derivative_pi_wrt_psi(i, j, i1, j1, psi, delta, alpha, c)
      derivative <- derivative + (div - N) * deriv
    }
  }
  if (McCullagh_is_in_constraint_set(i1, j1)) {
    derivative <- McCullagh_penalized(derivative, i1, j1, n, psi, delta, alpha, c)
  } else {
    condition <- McCullagh_compute_condition(psi)
    derivative <- derivative - condition * McCullagh_derivative_omega_wrt_psi(n, i1, j1, psi, delta, alpha, c)
  }
  derivative
}


#' Second derivative of log(likelihoood) wrt psi^2.
#'
#' @param n matrix of observed counts
#' @param i1 first row index of psi
#' @param j1 first column index of psi
#' @param i2 second row index of psi
#' @param j2 second column index of psi
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_log_l_wrt_psi_2 <- function(n, i1, j1, i2, j2, psi, delta, alpha, c=1.0) {
  M <- nrow(psi)
  N <- sum(n)
  pi <- McCullagh_compute_pi(psi, delta, alpha, c)

  derivative <- 0.0
  # i and j index pi
  for (i in 1:M) {
    for (j in 1:M) {
      div <- n[i, j] / pi[i, j]
      deriv1 <- McCullagh_derivative_pi_wrt_psi(i, j, i1, j1, psi, delta, alpha, c)
      deriv2 <- McCullagh_derivative_pi_wrt_psi(i, j, i2, j2, psi, delta, alpha, c)
      derivative <- derivative - div * deriv1 * deriv2 / pi[i, j]

      derivative <- derivative + div * McCullagh_second_order_pi_wrt_psi_2(i, j, i1, j1, i2, j2, psi, delta, alpha, c)
      derivative <- derivative - N * McCullagh_second_order_pi_wrt_psi_2(i, j, i1, j1, i2, j2, psi, delta, alpha, c)
    }
  }
  derivative <- derivative - McCullagh_second_order_lagrangian_wrt_psi_2(n, i1, j1, i2, j2, psi, delta, alpha, c)
  derivative
}


#' Second derivative of log(likelihood) wrt psi[i1, j1] and scalar delta..
#'
#' @param n matrix of observed counts
#' @param i1 row index of psi
#' @param j1 column index of psi
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_log_l_wrt_psi_delta <- function(n, i1, j1, psi, delta, alpha, c=1.0) {
  M <- nrow(psi)
  N <- sum(n)
  pi <- McCullagh_compute_pi(psi, delta, alpha, c)

  derivative <- 0.0
  for (i in 1:M) {
    for (j in 1:M) {
      div = n[i, j] / pi[i, j]
      deriv1 <- McCullagh_derivative_pi_wrt_psi(i, j, i1, j1, psi, delta, alpha, c)
      deriv2 <- McCullagh_derivative_pi_wrt_delta(i, j, psi, delta, alpha, c)
      derivative <- derivative - div * deriv1 * deriv2 / pi[i, j]

      deriv <- McCullagh_second_order_pi_wrt_psi_delta(i, j, i1, j1, psi, delta, alpha, c)
      derivative <- derivative + (div - N) * deriv
    }
  }
  derivative <- derivative - McCullagh_second_order_lagrangian_wrt_psi_delta(n, i1, j1, psi, delta, alpha, c)
  derivative
}


#' Second derivative of log(likelihoood) wrt ps[i1, j1] and alpha[index].
#'
#' @param n matrix of observed counts
#' @param i1 row index of psi
#' @param j1 column index of psi
#' @param index index of alpha
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_log_l_wrt_psi_alpha <- function(n, i1, j1, index, psi, delta, alpha, c=1.0) {
  M <- nrow(psi)
  N <- sum(n)
  pi <- McCullagh_compute_pi(psi, delta, alpha, c)

  derivative <- 0.0
  # i and j index pi
  for (i in 1:M) {
    for (j in 1:M) {
      div <- n[i, j] / pi[i, j]
      deriv1 <- McCullagh_derivative_pi_wrt_psi(i, j, i1, j1, psi, delta, alpha, c)
      deriv2 <- McCullagh_derivative_pi_wrt_alpha(i, j, index, psi, delta, alpha, c)
      derivative <- derivative - div * deriv1 * deriv2 / pi[i, j]

      deriv <- McCullagh_second_order_pi_wrt_psi_alpha(i, j, i1, j1, index, psi, delta, alpha, c)
      derivative <- derivative + (div - N) * deriv
    }
  }

  derivative <- derivative - McCullagh_second_order_lagrangian_wrt_psi_alpha(n, i1, j1, index, psi, delta, alpha, c)
  derivative
}


#' Second derivative of log(likelihood) wrt psi[i1, j1] and c.
#'
#' @param n matrix of observed counts
#' @param i1 row index of psi
#' @param j1 column index of psi
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.
#' @returns derivative
McCullagh_second_order_log_l_wrt_psi_c <- function(n, i1, j1, psi, delta, alpha, c) {
  M <- nrow(psi)
  N <- sum(n)
  pi <- McCullagh_compute_pi(psi, delta, alpha, c)

  derivative <- 0.0
  for (i in 1:M) {
    for (j in 1:M) {
      div <- n[i, j] / pi[i, j]
      deriv1 <- McCullagh_derivative_pi_wrt_psi(i, j, i1, j1, psi, delta, alpha, c)
      deriv2 <- McCullagh_derivative_pi_wrt_c(i, j, psi, delta, alpha, c)
      derivative <- derivative - div * deriv1 * deriv2 / pi[i, j]

      deriv <- McCullagh_second_order_pi_wrt_psi_c(i, j, i1, j1, psi, delta, alpha, c)
      derivative <- derivative + (div - N) * deriv
    }
  }
  condition <- McCullagh_compute_condition(psi)
  derivative <- derivative - McCullagh_second_order_omega_wrt_psi_c(n, i1, j1, psi, delta, alpha, c) * condition
  der_omega <- McCullagh_derivative_omega_wrt_c(n, psi, delta, alpha, c)
  der_condition <- McCullagh_derivative_condition_wrt_psi(i1, j1)
  derivative <- derivative - der_omega * der_condition
  derivative
}


#' Second derivative of log(likelihood) wrt delta^2.
#'
#' @param n matrix of observed counts
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_log_l_wrt_delta_2 <- function(n, psi, delta, alpha, c=1.0) {
  M <- nrow(psi)
  N <- sum(n)
  pi <- McCullagh_compute_pi(psi, delta, alpha, c)

  derivative <- 0.0
  for (i in 1:M) {
    for (j in 1:M) {
      div <- n[i, j] / pi[i, j]
      deriv1 = McCullagh_derivative_pi_wrt_delta(i, j, psi, delta, alpha, c)
      derivative <- derivative - div * deriv1**2 / pi[i, j]

      # der <-
      derivative <- derivative + div * McCullagh_second_order_pi_wrt_delta_2(i, j, psi, delta, alpha, c)
      derivative <- derivative - N * McCullagh_second_order_pi_wrt_delta_2(i, j, psi, delta, alpha, c)
    }
  }
  condition = McCullagh_compute_condition(psi)
  derivative <- derivative - condition * McCullagh_second_order_omega_wrt_delta_2(n, psi, delta, alpha, c)
  derivative
}


#' Second derivative of log(likelihood) wrt delta and alpha[index].
#'
#' @param n matrix of observed counts
#' @param index index of alpha
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_log_l_wrt_delta_alpha <- function(n, index, psi, delta, alpha, c=1.0) {
  M <- nrow(psi)
  N <- sum(n)
  pi <- McCullagh_compute_pi(psi, delta, alpha, c)

  derivative <- 0.0
  for (i in 1 :M) {
    for (j in 1:M) {
      div <- n[i, j] / pi[i, j]
      deriv1 <- McCullagh_derivative_pi_wrt_delta(i, j, psi, delta, alpha, c)
      deriv2 <- McCullagh_derivative_pi_wrt_alpha(i, j, index, psi, delta, alpha, c)
      derivative <- derivative - div * deriv1 * deriv2 / pi[i, j]

      deriv <- McCullagh_second_order_pi_wrt_delta_alpha(i, j, index, psi, delta, alpha, c)
      derivative <- derivative + (div - N) * deriv
    }
  }
  condition = McCullagh_compute_condition(psi)
  derivative <- derivative - condition * McCullagh_second_order_omega_wrt_delta_alpha(n, index, psi, delta, alpha, c)
  derivative
}


#' Second derivative of log(likelihood) wrt scalar delta and c.
#'
#' @param n matrix of observed counts
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0..
#' @returns derivative
McCullagh_second_order_log_l_wrt_delta_c <- function(n, psi, delta, alpha, c) {
  M <- nrow(psi)
  N <- sum(n)
  pi <- McCullagh_compute_pi(psi, delta, alpha, c)

  derivative = 0.0
  for (i in 1:M) {
    for (j in 1:M) {
      div = n[i, j] / pi[i, j]
      deriv1 <- McCullagh_derivative_pi_wrt_delta(i, j, psi, delta, alpha, c)
      deriv2 <- McCullagh_derivative_pi_wrt_c(i, j, psi, delta, alpha, c)
      derivative <- derivative - div * deriv1 * deriv2 / pi[i, j]

      deriv <- McCullagh_second_order_pi_wrt_delta_c(i, j, psi, delta, alpha, c)
      derivative <- derivative + (div - N) * deriv
    }
  }

  condition <- McCullagh_compute_condition(psi)
  derivative <- derivative - McCullagh_second_order_omega_wrt_delta_c(n, psi, delta, alpha, c) * condition
  derivative
}


#' Second derivative of log(likelihood) wrt alpha^2.
#'
#' @param n matrix of observed counts
#' @param index_a first index of alpha
#' @param index_b second column index of alpha
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_log_l_wrt_alpha_2 <- function(n, index_a, index_b, psi, delta, alpha, c=1.0) {
  M <- nrow(psi)
  N <- sum(n)
  pi <- McCullagh_compute_pi(psi, delta, alpha, c)

  derivative = 0.0
# i and j index n and pi
  for (i in 1:M) {
    for (j in 1:M) {
      div = n[i, j] / pi[i, j]
      deriv1 <- McCullagh_derivative_pi_wrt_alpha(i, j, index_a, psi, delta, alpha, c)
      deriv2 <- McCullagh_derivative_pi_wrt_alpha(i, j, index_b, psi, delta, alpha, c)
      derivative <- derivative - div * deriv1 * deriv2 / pi[i, j]

      derivative <- derivative + div * McCullagh_second_order_pi_wrt_alpha_2(i, j, index_a, index_b, psi, delta, alpha, c)
      derivative <- derivative - N * McCullagh_second_order_pi_wrt_alpha_2(i, j, index_a, index_b, psi, delta, alpha, c)
    }
  }
  condition <- McCullagh_compute_condition(psi)
  deriv_omega <- McCullagh_second_order_omega_wrt_alpha_2(n, index_a, index_b, psi, delta, alpha, c)
  derivative <- derivative - condition * deriv_omega
  derivative
}


#' Second derivative of log(likelihood) wrt alpha[index] and c.
#'
#' @param n matrix of observed counts
#' @param index index of alpha
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.
#' @returns derivative
McCullagh_second_order_log_l_wrt_alpha_c <- function(n, index, psi, delta, alpha, c) {
  M <- nrow(psi)
  N <- sum(n)
  pi <- McCullagh_compute_pi(psi, delta, alpha, c)

  derivative <- 0.0
  for (i in 1:M) {
    for (j in 1:M) {
      div <- n[i, j] / pi[i, j]
      deriv1 <- McCullagh_derivative_pi_wrt_alpha(i, j, index, psi, delta, alpha, c)
      deriv2 <- McCullagh_derivative_pi_wrt_c(i, j, psi, delta, alpha, c)
      derivative <- derivative - div * deriv1 * deriv2 / pi[i, j]

      deriv <- McCullagh_second_order_pi_wrt_alpha_c(i, j, index, psi, delta, alpha, c)
      derivative <- derivative + (div - N) * deriv
    }
  }
  condition <- McCullagh_compute_condition(psi)
  derivative <- derivative - McCullagh_second_order_omega_wrt_alpha_c(n, index, psi, delta, alpha, c) * condition
  derivative
}


#' Second derivative of log(likelihood) wrt c^2.
#'
#' @param n matrix of observed counts
#' @param psi matrix of symmetry parameters
#' @param delta scalar asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_log_l_wrt_c_2 <- function(n, psi, delta, alpha, c) {
  M <- nrow(psi)
  N <- sum(n)
  pi <- McCullagh_compute_pi(psi, delta, alpha, c)

  derivative = 0.0
  for (i in 1:M) {
    for (j in 1:M) {
      div <- n[i, j] / pi[i, j]
      deriv = McCullagh_derivative_pi_wrt_c(i, j, psi, delta, alpha, c)
      derivative <- derivative - div * deriv**2 / pi[i, j]

      deriv = McCullagh_second_order_pi_wrt_c_2(i, j, psi, delta, alpha, c)
      derivative = derivative + (div - N) * deriv
    }
  }
  condition = McCullagh_compute_condition(psi);
  derivative <- derivative - McCullagh_second_order_omega_wrt_c_2(n, psi, delta, alpha, c) * condition
  derivative
}


#' Second derivative of log(likelihood) wrt psi[i1, j1] and delta_vec[k].
#'
#' @param n matrix of observed counts
#' @param i1 row index of psi
#' @param j1 column index of psi
#' @param k second row index of delta
#' @param psi matrix of symmetry parameters
#' @param delta_vec vector asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_log_l_wrt_psi_delta_vec <- function(n, i1, j1, k, psi, delta_vec, alpha, c=1.0) {
  if (length(delta_vec) == 1) {
    return(McCullagh_second_order_log_l_wrt_psi_delta(n, i1, j1, psi, delta_vec, alpha, c))
  }

  M <- nrow(psi)
  N <- sum(n)
  pi <- McCullagh_compute_generalized_pi(psi, delta_vec, alpha, c)

  derivative <- 0.0
  for (i in 1:M) {
    for (j in 1:M) {
      div = n[i, j] / pi[i, j]
      deriv1 <- McCullagh_derivative_pi_wrt_psi(i, j, i1, j1, psi, delta_vec, alpha, c)
      deriv2 <- McCullagh_derivative_pi_wrt_delta_vec(i, j, k, psi, delta_vec, alpha, c)
      derivative <- derivative - div * deriv1 * deriv2 / pi[i, j]

      deriv <- McCullagh_second_order_pi_wrt_psi_delta_vec(i, j, i1, j1, k, psi, delta_vec, alpha, c)
      derivative <- derivative + (div - N) * deriv
    }
  }
  derivative <- derivative - McCullagh_second_order_lagrangian_wrt_psi_delta_vec(n, i1, j1, k, psi, delta_vec, alpha, c)
  derivative
}


#' Second derivative of log(likelihood) wrt delta_vec^2.
#'
#' @param n matrix of observed counts
#' @param k1 first index of delta_vec
#' @param k2 second index of delta_vec
#' @param psi matrix of symmetry parameters
#' @param delta_vec vector asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_log_l_wrt_delta_vec_2 <- function(n, k1, k2, psi, delta_vec, alpha, c=1.0) {
  if (length(delta_vec) == 1) {
    return(McCullagh_second_order_log_l_wrt_delta_2(n, psi, delta_vec, alpha, c))
  }

  M <- nrow(psi)
  N <- sum(n)

  pi <- McCullagh_compute_generalized_pi(psi, delta_vec, alpha, c)

  derivative <- 0.0
  for (i in 1:M) {
    for (j in 1:M) {
      div <- n[i, j] / pi[i, j]
      deriv1 <- McCullagh_derivative_pi_wrt_delta_vec(i, j, k1, psi, delta_vec, alpha, c)
      deriv2 <- McCullagh_derivative_pi_wrt_delta_vec(i, j, k2, psi, delta_vec, alpha, c)
      derivative <- derivative - div * deriv1 * deriv2 / pi[i, j]

      deriv <- McCullagh_second_order_pi_wrt_delta_vec_2(i, j, k1, k2, psi, delta_vec, alpha, c)
      derivative <- derivative + (div - N) * deriv
    }
  }
  condition <- McCullagh_compute_condition(psi)
  derivative <- derivative - condition * McCullagh_second_order_omega_wrt_delta_vec_2(n, k1, k2, psi, delta_vec, alpha, c)
  derivative
}


#' Second derivative of log(likelihood) wrt delta[k] and alpha[index].
#'
#' @param n matrix of observed counts
#' @param k index of delta_vec
#' @param index index of alpha
#' @param psi matrix of symmetry parameters
#' @param delta_vec vector asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns derivative
McCullagh_second_order_log_l_wrt_delta_vec_alpha <- function(n, k, index, psi, delta_vec, alpha, c=1.0) {
  if (length(delta_vec) == 1) {
    return(McCullagh_second_order_log_l_wrt_delta_alpha(n, index, psi, delta_vec, alpha, c))
  }

  M <- nrow(psi)
  N <- sum(n)

  pi <- McCullagh_compute_generalized_pi(psi, delta_vec, alpha, c)

  derivative <- 0.0
  for (i in 1:M) {
    for (j in 1:M) {
      div <- n[i, j] / pi[i, j]
      deriv1 <- McCullagh_derivative_pi_wrt_delta_vec(i, j, k, psi, delta_vec, alpha, c)
      deriv2 <- McCullagh_derivative_pi_wrt_alpha(i, j, index, psi, delta_vec, alpha, c)
      derivative <- derivative - div * deriv1 * deriv2 / pi[i, j]

      deriv <- McCullagh_second_order_pi_wrt_delta_vec_alpha(i, j, k, index, psi, delta_vec, alpha, c)
      derivative <- derivative + (div - N) * deriv
    }
  }
  condition <- McCullagh_compute_condition(psi)
  derivative <- derivative - condition * McCullagh_second_order_omega_wrt_delta_vec_alpha(n, k, index, psi, delta_vec, alpha, c)
  derivative
}


#' Second derivative of log(likeloihood) wrt delta_vec[k] and c.
#'
#' @param n matrix of observed counts
#' @param k index of delta_vec
#' @param psi matrix of symmetry parameters
#' @param delta_vec vector asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0
#' @returns derivative
McCullagh_second_order_log_l_wrt_delta_vec_c <- function(n, k, psi, delta_vec, alpha, c) {
  if (length(delta_vec) == 1) {
    return(McCullagh_second_order_log_l_wrt_delta_c(n, psi, delta_vec, alpha, c))
  }

  M <- nrow(psi)
  N <- sum(n)
  pi <- McCullagh_compute_generalized_pi(psi, delta_vec, alpha, c)

  derivative <- 0.0
  for (i in 1:M) {
    for (j in 1:M) {
      div <- n[i, j] / pi[i, j]
      deriv1 <- McCullagh_derivative_pi_wrt_delta_vec(i, j, k, psi, delta_vec, alpha, c)
      deriv2 <- McCullagh_derivative_pi_wrt_c(i, j, psi, delta_vec, alpha, c)
      derivative <- derivative - div * deriv1 * deriv2 / pi[i, j]

      deriv <- McCullagh_second_order_pi_wrt_delta_vec_c(i, j, k, psi, delta_vec, alpha, c)
      derivative <- derivative + (div - N) * deriv
    }
  }
  condition <- McCullagh_compute_condition(psi)
  derivative <- derivative - condition * McCullagh_second_order_omega_wrt_delta_vec_c(n, k, psi, delta_vec, alpha, c)
}


#' Gradient vector of log(likelihood)
#'
#' @param n matrix of observed counts
#' @param psi matrix of symmetry parameters
#' @param delta scalar or vector asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns gradient vector of first-order partials wrt log(likelihood0)
McCullagh_gradient_log_l <- function(n, psi, delta, alpha, c=1.0) {
  M <- nrow(psi)
  n_psi <- M * (M + 1) / 2
  n_alpha <- M - 2
  n_delta <- length(delta)
  n_c <- 1
  n_params <- n_psi + n_alpha + n_delta + n_c

  gradient <- vector("numeric", n_params)

  index <- 1
  for (i in 1:M) {
    for (j in i:M) {
      gradient[index] <- McCullagh_derivative_log_l_wrt_psi(n, i, j, psi, delta, alpha, c)
      index <- index + 1
    }
  }

  for (k in 1:length(delta)) {
    if (length(delta) == 1) {
      gradient[index] <- McCullagh_derivative_log_l_wrt_delta(n, psi, delta, alpha, c)
    } else {
      gradient[index] <- McCullagh_derivative_log_l_wrt_delta_vec(n, k, psi, delta, alpha, c)
    }
    index <- index + 1
  }

  for (i in 2:(M - 1)) {
    gradient[index] <- McCullagh_derivative_log_l_wrt_alpha(n, i, psi, delta, alpha, c)
    index <- index + 1
  }

  gradient[index] <- McCullagh_derivative_log_l_wrt_c(n, psi, delta, alpha, c)
  gradient
}


#' Hessian matrix of log(likelihood)
#'
#' @param n matrix of observed counts
#' @param psi matrix of symmetry parameters
#' @param delta scalar or vector asymmetry parameter
#' @param alpha vector of asymmetry parameters
#' @param c normalizing factor to make pi sum to 1.0.  Default is 1.0.
#' @returns hessian matrix of second-order partials wrt log(likelihood0)
McCullagh_hessian_log_l <- function(n, psi, delta, alpha, c=1.0) {
  M <- nrow(psi)
  N <- sum(n)
  pi <- McCullagh_compute_pi(psi, delta, alpha, c)

  n_psi <- M * (M + 1) / 2
  delta_index <- n_psi + 1
  n_delta <- length(delta)
  alpha_index <- delta_index + n_delta
  n_alpha <- M - 2
  c_index <- alpha_index + n_alpha
  n_c <- 1
  n_params <- n_psi + n_alpha + n_delta + n_c

  hessian <- matrix(0.0, nrow=n_params, ncol=n_params)

  # d^2 / d psi^2
  index1 <- 1
  # i1 and j1 index the first psi
  for (i1 in 1:M) {
    for (j1 in i1: M) {
      index2 <- 1
      # i2 and j2 index the second psi
      for (i2 in 1:M) {
        for (j2 in i2: M) {
          hessian[index1, index2] <- McCullagh_second_order_log_l_wrt_psi_2(n, i1, j1, i2, j2, psi, delta, alpha, c)
          hessian[index2, index1] <- hessian[index1, index2]
          index2 <- index2 + 1
        }
      }

      # d^2 / d psi d delta
      # i and j index pi
      for (k in 1:n_delta) {
        hessian[index1, delta_index + k - 1] <- McCullagh_second_order_log_l_wrt_psi_delta_vec(n, i1, j1, k, psi, delta, alpha, c)
        hessian[delta_index + k - 1, index1] <- hessian[index1, delta_index + k - 1]
        index2 <- index2 + 1
      }

      # d^2 / d psi d alpha
      # index indexes alpha
      for (index in 2:(M - 1)) {
        hessian[index1, index2] <- McCullagh_second_order_log_l_wrt_psi_alpha(n, i1, j1, index, psi, delta, alpha, c)
        hessian[index2, index1] <- hessian[index1, index2]
        index2 <- index2 + 1
      }

      # d^2 / d psi d c
      hessian[index1, index2] <- McCullagh_second_order_log_l_wrt_psi_c(n, i1, j1, psi, delta, alpha, c)
      hessian[index2, index1] <- hessian[index1, index2]
      index1 <- index1 + 1
    }
  }

  # d^2 / d delta^2
  index2 <- index1
  for (k1 in 1:n_delta) {
    for (k2 in 1:n_delta) {
      hessian[index1 + k1 - 1, index2 + k2 -1] <- McCullagh_second_order_log_l_wrt_delta_vec_2(n, k1, k2, psi, delta, alpha, c)
      hessian[index2 + k2 - 1, index1 + k1 - 1] <- hessian[index1 + k1 - 1, index2 + k2 - 1]
    }
  }
  index2 <- index2 + n_delta

  # d^2 / d delta d alpha
  for (index in 2:(M - 1)) {
    for (k in 1:n_delta) {
      hessian[index1 + k - 1, index2] <- McCullagh_second_order_log_l_wrt_delta_vec_alpha(n, k, index, psi, delta, alpha, c)
      hessian[index2, index1 + k - 1] <- hessian[index1 + k - 1, index2]
    }
    index2 <- index2 + 1
  }

  # d^2 / d delta d c
  for (k in 1:n_delta) {
    hessian[index1 + k - 1, index2] <- McCullagh_second_order_log_l_wrt_delta_vec_c(n, k, psi, delta, alpha, c)
    hessian[index2, index1 + k - 1] <- hessian[index1 + k - 1, index2]
  }

  # d^2 / d alpha^2
  index1 <- alpha_index
  index2 <- index1
  # index_a and index_b index the alpha array
  # recall alpha[0] is fixed at 1.0
  for (index_a in 2:(M - 1)) {
    for (index_b in 2:(M - 1)) {
      index3 = alpha_index + index_b - 2

      hessian[index1, index3] <- McCullagh_second_order_log_l_wrt_alpha_2(n, index_a, index_b, psi, delta, alpha, c)
      hessian[index3, index1] <- hessian[index1, index3]
    }
    index1 <- index1 + 1
  }

  # d^2 / d alpha d c
  for (index_a in 2:(M - 1)) {
    index1 = alpha_index + index_a - 2
    hessian[index1, c_index] <- McCullagh_second_order_log_l_wrt_alpha_c(n, index_a, psi, delta, alpha, c)
    hessian[c_index, index1] <- hessian[index1, c_index]
  }

  # d^2 / d c^2
  hessian[c_index, c_index] <- McCullagh_second_order_log_l_wrt_c_2(n, psi, delta, alpha, c)

  hessian
}


#' Test whether pi matrix is valid, i.e., 0 < all values.
#'
#' @param pi matrix of pi values to be tested.
#' @returns TRUE if all pi > 0, FALSE otherwise.
McCullagh_is_pi_invalid <- function(pi) {
  result = FALSE
  for (pi_j in pi) {
    for (pij_qij in pi_j) {
      if (pij_qij <= 0) {
        result = TRUE
        break
      }
    }
  }
  result
}


#' Compute the Newton-Raphson update.
#'
#' @param gradient gradient vector of log(likelihood) wrt parameters
#' @param hessian hessian of log(likelihood) wrt parameters
#' @returns vector with update values for each of the parameters
McCullagh_compute_update <- function(gradient, hessian) {
  update <- solve(hessian, gradient)
  update
}


#' Update the parameters based on Newton-Raphson step.
#'
#' @param update vector of update values
#' @param step size of candidate step along direction of update
#' @param psi vector of symmetry parameters
#' @param delta scalar or vector of asymmetry parameters
#' @param alpha vector of asymmetry parameters
#' @param c normalization factor to make sum pf pi = 1.0. Default value is 1.0.
#' @returns list containing new parameters
#'    psi: matrix of symmetry parameters
#'    delta; scalar or vector of asymmetry parameters
#'    alpha: vector of asymmetry parameters
#'    c: scaling coefficient to ensure pi sums to 1.0
McCullagh_update_parameters <- function(update, step, psi, delta, alpha, c=1.0) {
  M <- nrow(psi)
  n_psi <- M * (M + 1) / 2
  n_delta <- length(delta)
  n_alpha <- M - 2
  n_c <- 1
  n_parms <- n_psi + n_delta + n_alpha + n_c

  if (length(update) != n_parms) {
     stop("Number of parameters must agree with variable sizes")
  }
  psi1 <- psi

  index <- 1
  for (i1 in 1:M) {
    for (j1 in i1:M) {
      psi1[i1, j1] <- psi1[i1, j1] - step * update[index]
      psi1[j1, i1] <- psi1[i1, j1]
      index <- index + 1
    }
  }

  delta1 <- rep(0.0, length(delta))
  for (k in 1:n_delta) {
    delta1[k] <- delta[k] - step * update[index]
    index <- index + 1
  }

  alpha1 <- alpha
  alpha1[1] <- 1.0
  for (alpha_index in 2:(M - 1)) {
    alpha1[alpha_index] <- alpha[alpha_index] - step * update[index]
    index <- index + 1
  }

  c1 <- c - step * update[index]

  list(psi=psi1, delta=delta1, alpha=alpha1, c=c1)
}


#' Newton-Raphson update.
#'
#' Using gradient and hessian, it finds the update direction. Then it tries increassingly
#' smaller step sizes until the step*update yields a valid pi matrix.
#' @param n matrix of observed counts
#' @param gradient gradient vector
#' @param hessian hessian matrix
#' @param psi matrix of symmetry parameters
#' @param delta scalar or vector of asymmetry parameters
#' @param alpha vector of asymmetry parameters
#' @param c scaling factor to ensure pi sums to 1.0. Default is 1.0
#' @param max_iter maximum number of iterations.  Default is 50.
#' @param verbose should cycle-by-cycle into be printed out. Default is FALSE,
#' do not print.
#' @returns list containing new parameters
#'    psi: matrix of symmetry parameters
#'    delta; scalar or vector of asymmetry parameters
#'    alpha: vector of asymmetry parameters
#'    c: scaling coefficient to ensure pi sums to 1.0
McCullagh_newton_raphson_update <- function(n, gradient, hessian, psi, delta, alpha,
                                            c=1.0, max_iter=50, verbose=FALSE) {
  update <- McCullagh_compute_update(gradient, hessian)
  step <- 1.0
  logL <- McCullagh_log_L(n, psi, delta, alpha, c)
  for (iter in 1:max_iter) {
    new_values <- McCullagh_update_parameters(update, step, psi, delta, alpha, c)
    psi1 <- new_values$psi
    delta1 <- new_values$delta
    alpha1 <- new_values$alpha
    c1 <- new_values$c

    if (length(delta) == 1) {
      pi <- McCullagh_compute_pi(psi1, delta1, alpha1, c1)
    } else {
      pi <- McCullagh_compute_generalized_pi(psi1, delta1, alpha1, c1)
    }

    invalid <- McCullagh_is_pi_invalid(pi)
    if (invalid) {
      step <- step / 2.0
      next
    }
    logL1 <- McCullagh_log_L(n, psi1, delta1, alpha1, c1)
    if (logL1 < logL) {
      step <- step / 2.0
      next
    }
  }

  if (verbose) {
    message(paste("old logL =", logL, ", new logL =", logL1))
  }

  list(psi=psi1, delta=delta1, alpha=alpha1, c=c1)
}


#' Generates names to label the parameters.
#'
#' @param psi matrix of symmetry parameters
#' @param delta scalar of matrix of asymmetry parameters
#' @param alpha vector of asymmetry parameters
#' @param c scling factor to ensure sup of pi is 1.0
#' @returns character vector of labels for the SE values
McCullagh_generate_names <- function(psi, delta, alpha, c) {
  M <- nrow(psi)
  n_psi <- M * (M + 1) / 2
  n_delta <- length(delta)
  n_alpha <- M - 2
  n_c <- 1
  n_parms <- n_psi + n_delta + n_alpha + n_c

  se_names <- vector("character", n_parms)
  index <- 1
  for (i1 in 1:M) {
    for (j1 in i1:M) {
      se_names[index] <- paste0("psi[", i1, " ,", j1, "]")
      index <- index + 1
    }
  }

  if (n_delta == 1) {
    se_names[index] <- "delta"
    index <- index + 1
  } else {
    for (k in 1:n_delta) {
      se_names[index] <- paste0("delta[", k, ']')
      index <- index + 1
    }
  }

  for (index1 in 2:(M - 1)) {
    se_names[index] <- paste0("alpha[", index1, "]")
    index <- index + 1
  }

  se_names[index] <- "c"

  se_names
}
