###########################################
#THIS FILE CONTAINS
#1) function for partitioning the observation window in subareas
#2) function for plotting the area partition, with or without data
#3) function for computing Batty's entropy
#4) function for computing Karlstrom and Ceccato's entropy
###########################################

###########################################
#1) build area partition

#'Area partition.
#'
#'This function partitions the observation area in a number of sub-areas,
#'and assigns the data points/pixels to the areas.
#'
#'An event of interest (in the form of a point or binary areal dataset) occurs
#'over an observation area divided into sub-areas. If the partition is random,
#'this function generates the sub-areas by randomly drawing the areas' centroids
#'over the observation window. Then, data points/pixels are assigned to the area with
#'the closest centroid.
#'
#' @param win The observation area, an object of class \code{owin}, see package `spatstat`.
#' @param G An integer if sub-areas are randomly generated, determining the number \eqn{G} of sub-areas.
#'          Alternatively, a \eqn{G}x\eqn{2} matrix with the sub-areas centroids' coordinates.
#' @param data.coords A two column matrix. If the dataset is a point pattern,
#'                    the point coordinates. If the dataset is a raster/pixel matrix,
#'                    the centroids' coordinates, provided by user or returned by [coords_pix()].
#'
#' @return A list with elements:
#'\itemize{
#'   \item `G.coords` a point pattern containing the \eqn{G} areas' centroids
#'   \item `data.assign` a three column matrix, where each pair of data coordinates is
#'   matched to one of the \eqn{G} areas (numbered 1 to \eqn{G}).
#'   }
#'
#' @examples
#' #LATTICE DATA
#' #random generation of areas
#' ccc=coords_pix(area=square(10), nrow=10, ncol=10)
#' partition=areapart(square(10), G=5, data.coords=ccc)
#'
#' #providing a pre-fixed area partition
#' win=square(10)
#' G=5
#' GG=cbind(runif(G, win$xrange[1], win$xrange[2]),
#'          runif(G, win$yrange[1], win$yrange[2]))
#' ccc=coords_pix(area=win, pixel.xsize = 2, pixel.ysize = 2)
#' partition=areapart(win, G=GG, data.coords=ccc)
#'
#' #POINT DATA
#' #random generation of areas
#' data.pp=runifpoint(100, win=square(10))
#' marks(data.pp)=sample(c("a","b","c"), 100, replace=TRUE)
#' ccc=coords(data.pp)
#' partition=areapart(square(10), G=4, data.coords=ccc)
#'
#' #providing a pre-fixed area partition
#' win=square(10)
#' G=4
#' GG=cbind(runif(G, win$xrange[1], win$xrange[2]),
#'          runif(G, win$yrange[1], win$yrange[2]))
#' data.pp=runifpoint(100, win=win)
#' marks(data.pp)=sample(c("a","b","c"), 100, replace=TRUE)
#' ccc=coords(data.pp)
#' partition=areapart(win, G=GG, data.coords=ccc)
#'
#' #for plotting the area partiton
#' ?plot_areapart
#'
#' @export

areapart=function(win, G, data.coords){
  if(length(G)==1){
    #areas' centroids
    dummy=spatstat::runifpoint(G, win)
    dummy.coord=cbind(x=dummy$x, y=dummy$y, id=1:G)} else {

      dummy=spatstat::ppp(G[,1], G[,2], win)
      dummy.coord=cbind(x=dummy$x, y=dummy$y, id=1:nrow(G))
    }

  #create data point pattern
  data.pp=spatstat::ppp(data.coords[,1], data.coords[,2], window=win)

  #match data to the nearest area centroid
  near.neigh=spatstat::nncross(data.pp, dummy)
  data.coord.area=cbind(data.coords, near.neigh$which)
  colnames(data.coord.area)=c("x", "y", "area")

  return(list(G.coords=dummy, data.assign=data.coord.area))
}
###########################################


###########################################
#2) plot the area partition

#'Plot area partition.
#'
#'This function plots an area partition into sub-areas,
#'generated by [areapart()].
#'
#'This function allows to plot a fixed or randomly generated area partition,
#'such as the one produced by [areapart()]. The plot changes according
#'to a few options: the partition may be plotted with or without data,
#'with or without colour filling. When data present multiple categories,
#'one can choose to plot the category of interest together with the
#'partition. If the data are points, the Dirichlet tessellation
#'is plotted (see \code{dirichlet} in the package `spatstat`).
#'If the data are pixels, the partition follows the pixel borders.
#'
#' @param data.assign A three column matrix, containing the data coordinates (centroids, when pixels)
#'                    and the id of the corresponding sub-area.
#'                    Provided by user or returned by [areapart()].
#' @param win The observation area, an object of class \code{owin} (see package \code{spatstat}).
#' @param is.pointdata Logical: \code{T} if data are a point pattern, \code{F} if they are pixels.
#' @param add.data Logical: \code{F} (default) if only the area partition is plotted, \code{T} if
#'   data are added to the area partition plot.
#' @param data.bin Logical, only used when `add.data=TRUE`: \code{T} (default) if
#'   the plot displays the dichotomized version of the dataset, according to the category of interest.
#' @param category A character string. The exact name of the category of interest for Batty's
#'   or Karlstrom and Ceccato's spatial entropy, as in `data`.
#'   Only used when `add.data=TRUE` and `data.bin=TRUE`.
#' @param data A data matrix for lattice data, or a \code{ppp} object for point data (see package \code{spatstat}).
#' @param G.coords A two column matrix with the coordinates of the sub-areas centroids.
#'                 Only needed if `data` is a point pattern.
#' @param main Optional, a character string with the plot main title.
#' @param ribbon Logical, whether to display a ribbon showing the colour map.
#'
#' @return A plot of the partition in sub-areas, according to the chosen options.
#'
#' @examples
#' #LATTICE DATA
#' data.lat=matrix(sample(c("a","b","c"), 100, replace=TRUE), nrow=10)
#' ccc=coords_pix(area=square(10), nrow=10, ncol=10)
#' partition=areapart(square(10), G=5, data.coords=ccc)
#' #plot without data
#' plot_areapart(partition$data.assign, square(10), is.pointdata=FALSE,
#' add.data=FALSE, data=data.lat, G.coords=partition$G.coords, main="")
#' #plot with data
#' plot_areapart(partition$data.assign, square(10), is.pointdata=FALSE,
#' add.data=TRUE, data=data.lat, G.coords=partition$G.coords, main="")
#' #plot with data - dichotomize data according to a category of interest
#' plot_areapart(partition$data.assign, square(10), is.pointdata=FALSE,
#' add.data=TRUE, data.bin=TRUE, category="a",
#' data=data.lat, G.coords=partition$G.coords, main="")
#'
#' #POINT DATA
#' data.pp=runifpoint(100, win=square(10))
#' marks(data.pp)=sample(c("a","b","c"), 100, replace=TRUE)
#' ccc=coords(data.pp)
#' partition=areapart(square(10), G=4, data.coords=ccc)
#' #plot without data
#' plot_areapart(partition$data.assign, square(10), is.pointdata=TRUE,
#' add.data=FALSE, data=data.pp, G.coords=partition$G.coords, main="")
#' #plot with data
#' plot_areapart(partition$data.assign, square(10), is.pointdata=TRUE,
#' add.data=TRUE, data=data.pp, G.coords=partition$G.coords, main="")
#' #plot with data - dichotomize data according to a category of interest
#' plot_areapart(partition$data.assign, square(10), is.pointdata=TRUE,
#' add.data=TRUE, data.bin=TRUE, category="a",
#' data=data.pp, G.coords=partition$G.coords, main="")
#'
#' @export

plot_areapart=function(data.assign, win, is.pointdata=FALSE, add.data=FALSE, data.bin=FALSE,
                       category=NULL, data,
                       G.coords=NULL, main="", ribbon=TRUE)
{
  if(is.pointdata==F) #draw areas over pixel matrix
  {
    #create segments for area plot
    nrw=length(unique(data.assign[,2]))
    ncl=length(unique(data.assign[,1]))
    halfpix.x=min(abs(diff(data.assign[,2]))[abs(diff(data.assign[,2]))>0])/2
    halfpix.y=min(abs(diff(data.assign[,1]))[abs(diff(data.assign[,1]))>0])/2
    below.bord=right.bord=numeric(nrow(data.assign))
    for (i in 1:(nrow(data.assign)-1))
      if (data.assign[i,3]!=data.assign[(i+1),3]) below.bord[i]=1
    #below.bord is 1 if a pixel is assigned to an area different than the area of the pixel below
    for (i in 1:(nrow(data.assign)-nrw))
      if (data.assign[i,3]!=data.assign[(i+nrw),3]) right.bord[i]=1
    #right.bord is 1 if a pixel is assigned to an area different than the area of the pixel at the right
    below.bord.x.start=data.assign[below.bord==1,1]-halfpix.y
    below.bord.x.end=data.assign[below.bord==1,1]+halfpix.y
    below.bord.y.start=below.bord.y.end=data.assign[below.bord==1,2]-halfpix.x
    right.bord.x.start=right.bord.x.end=data.assign[right.bord==1,1]+halfpix.y
    right.bord.y.start=data.assign[right.bord==1,2]+halfpix.x
    right.bord.y.end=data.assign[right.bord==1,2]-halfpix.x

    datamat.areas=matrix(data.assign[,3], nrow(data), ncol(data))

    #plot
    if(add.data==F)
    {
      plot_lattice(datamat.areas, win, gray.ext=c(1, 0), main=main, ribbon=ribbon)
      graphics::segments(x0=below.bord.x.start, y0=below.bord.y.start, x1=below.bord.x.end, y1=below.bord.y.end)
      graphics::segments(x0=right.bord.x.start, y0=right.bord.y.start, x1=right.bord.x.end, y1=right.bord.y.end)
    } else {
      if (data.bin==T){
        datavec=c(data)
        datavec[c(data)==category]=1
        datavec[c(data)!=category]=0
        data2=matrix(datavec, nrow(data), ncol(data))
      } else data2=data
      plot_lattice(data2, win, gray.ext=c(1, .3), main=main, ribbon=ribbon)
      graphics::segments(x0=below.bord.x.start, y0=below.bord.y.start, x1=below.bord.x.end, y1=below.bord.y.end, lwd=3)
      graphics::segments(x0=right.bord.x.start, y0=right.bord.y.start, x1=right.bord.x.end, y1=right.bord.y.end, lwd=3)
    }

  } else { #for a point pattern

    if (is.null(G.coords)) cat("ERROR: centroid coordinates of the G sub-areas must be provided")
    if (spatstat::is.ppp(G.coords)) {
      dirich=spatstat::dirichlet(G.coords)} else{
        G.pp=spatstat::ppp(G.coords[,1], G.coords[,2], window=win)
        dirich=spatstat::dirichlet(G.pp)}

    if (add.data==F){
      spatstat::plot.tess(dirich, main=main)
    } else{
      spatstat::plot.tess(dirich, main=main)
      if (data.bin==T){
        ind=which(spatstat::marks(data)==category)
        bindata.pp=spatstat::ppp(data$x[ind], data$y[ind], data$win)
        spatstat::plot.ppp(bindata.pp, add=TRUE, pch=19)
      } else spatstat::plot.ppp(data, add=TRUE)
    }
  }
}
###########################################


###########################################
#3) batty

#'Batty's entropy.
#'
#'This function computes Batty's spatial entropy, following Batty (1976), see also Altieri et al (2017)
#'(references are under the topic \code{\link{SpatEntropy}}).
#'
#'Batty's spatial entropy measures the heterogeneity in the spatial distribution
#'of a phenomenon of interest, with regard to an area partition. It is high
#'when the phenomenon is equally intense over the sub-areas, and low when
#'it concentrates in one or few sub-areas. This function starts from the output
#'of [areapart()] and allows to compute Batty's entropy as
#'\deqn{H=\sum p_g \log(T_g/p_g)}
#'where \eqn{p_g} is the probability of occurrence of the phenomenon over sub-area \eqn{g},
#'and \eqn{T_g} is the sub-area size.
#'When data are categorical, the phenomenon of interest corresponds to
#'one category, which must be specified. If data are an unmarked
#'point pattern, a fake mark vector must be created with the same category for all points.
#'
#' @param data A data matrix or vector, can be numeric, factor, character, ...
#'   If the dataset is a point pattern, `data` is the mark vector.
#' @param data.assign A three column matrix, containing the data coordinates (centroids when pixels)
#'   and the id of the corresponding sub-area. Provided by user or returned by [areapart()].
#' @param is.pointdata Logical: \code{T} if data are a point pattern, \code{F} if they are pixels.
#' @param category A character string, the exact name of the category for which Batty's spatial
#'                 entropy is computed, as in `data`.
#' @param win An \code{owin} object (see package \code{spatstat}), the observation area.
#'            Only needed for lattice data.
#' @param G.coords A point pattern (an object of class \code{ppp} see package \code{spatstat}),
#'                 or a two column matrix with the area centroids' coordinates.  Provided by user or returned by [areapart()].
#'
#' @return Batty's spatial entropy value, as well as a table with information
#'   about each sub-area:
#'\itemize{
#'   \item `area.id` the sub-area id
#'   \item `abs.freq` the number of points/pixels presenting the category of interest
#'     for each sub-area
#'   \item `rel.freq` the proportion of points/pixels presenting the category of interest
#'     in each sub-area, with regard to the total number of points/pixels with the
#'     category of interest
#'   \item `Tg` the sub-area size.
#'}
#'
#' @examples
#' #LATTICE DATA
#' data.lat=matrix(sample(c("a","b","c"), 100, replace=TRUE), nrow=10)
#' ccc=coords_pix(area=square(10), nrow=10, ncol=10)
#' partition=areapart(square(10), G=5, data.coords=ccc)
#' batty(data.lat, partition$data.assign, category="a",
#' win=square(10), G.coords=partition$G.coords)
#' plot_areapart(partition$data.assign, square(10), is.pointdata=FALSE,
#' add.data=TRUE, data.bin=TRUE, category="a",
#' data=data.lat, G.coords=partition$G.coords, main="")
#'
#' #POINT DATA
#' data.pp=runifpoint(100, win=square(10))
#' marks(data.pp)=sample(c("a","b","c"), 100, replace=TRUE)
#' ccc=coords(data.pp)
#' partition=areapart(square(10), G=4, data.coords=ccc)
#' batty(marks(data.pp), partition$data.assign, is.pointdata=TRUE,
#' category="b", G.coords=partition$G.coords)
#' plot_areapart(partition$data.assign, square(10), is.pointdata=TRUE,
#' add.data=TRUE, data.bin=TRUE, category="b",
#' data=data.pp, G.coords=partition$G.coords, main="")
#'
#' @export

batty=function(data, data.assign, is.pointdata=FALSE, category, win=NULL, G.coords){

  #dichotomize dataset according to the category of interest
  if(is.factor(data)) data=as.character(data)
  datavec=c(data)
  datavec[c(data)==category]=1
  datavec[c(data)!=category]=0
  datavec=as.numeric(datavec)

  #match data values and coordinates
  data.cat=cbind(data.assign, datavec)
  ch=sum(datavec) #total number of 1s

  #Gx2 table matching each area id to the number of 1 values in it
  if(spatstat::is.ppp(G.coords))
    G.count=1:(spatstat::npoints(G.coords)) else G.count=1:nrow(G.coords)
  sums=by(data.cat[,4],data.cat[,3], sum); sums=cbind(as.numeric(names(sums)), as.numeric(sums))
  col2=numeric(length(G.count))
  for(i in 1:length(G.count))
  { ind=which(sums[,1]==i)
  if(length(ind)>0) col2[i]=sums[which(sums[,1]==i),2]}
  G.count=cbind(G.count, col2)

  #add relative frequencies
  G.count=cbind(G.count, G.count[,2]/ch)

  #add Tg=sub-areas size
  if(is.pointdata==F){
    pix.size=spatstat::area.owin(win)/length(c(data))
    Tg=as.numeric(table(data.cat[,3]))*pix.size
    G.count=cbind(G.count, Tg)
  } else{
    if (spatstat::is.ppp(G.coords)) {
      dirich=spatstat::dirichlet(G.coords)} else{
        G.pp=spatstat::ppp(G.coords[,1], G.coords[,2], window=win)
        dirich=spatstat::dirichlet(G.pp)}
    Tg=as.numeric(lapply(dirich$tiles, spatstat::area.owin))
    G.count=cbind(G.count, Tg)
  }

  colnames(G.count)=c("area.id", "abs.freq", "rel.freq", "Tg")

  #batty
  batty.terms=ifelse(G.count[,3]>0,G.count[,3]*log(G.count[,4]/G.count[,3]),0)
  batty.ent=sum(batty.terms)

  return(list(batty.table=G.count, batty.ent=batty.ent))
}
###########################################


###########################################
#4) karlstrom

#'Karlstrom and Ceccato's entropy.
#'
#'This function computes Karlstrom and Ceccato's spatial entropy for a
#'chosen neighbourhood distance,
#'following Karlstrom and Ceccato (2002), see also Altieri et al (2017)
#'(references are under the topic \code{\link{SpatEntropy}}).
#'
#'Karlstrom and Ceccato's spatial entropy measures the heterogeneity in the spatial distribution
#'of a phenomenon of interest, with regard to an area partition and accounting for the neighbourhood.
#'It is similar to Batty's entropy (see [batty()]) discarding the sub-area size,
#'with the difference that the probability of occurrence of the phenomenon over area \eqn{g}
#'is actually a weighted sum of the neighbouring probabilities.
#'When data are categorical, the phenomenon of interest corresponds to
#'one category, which must be specified. If data are an unmarked
#'point pattern, a fake mark vector must be created with the same category for all points.
#'
#' @param data A data matrix or vector, can be numeric, factor, character, ...
#'   If the dataset is a point pattern, `data` is the mark vector.
#' @param data.assign A three column matrix, containing the data coordinates (centroids when pixels)
#'   and the id of the corresponding sub-area. Provided by user or returned by [areapart()].
#' @param category A character string, the exact name of the category for which Karlstrom and Ceccato's spatial
#'   entropy is computed, as in `data`.
#' @param G.coords A point pattern (an object of class \code{ppp} see package \code{spatstat}),
#'                 or a two column matrix with the area centroids' coordinates.  Provided by user or returned by [areapart()].
#' @param neigh.dist A scalar, the chosen neighbourhood Euclidean distance.
#'


#' @return Karlstrom and Ceccato's spatial entropy value as well as a table with information about each sub-area:
#'\itemize{
#'   \item `area.id` the sub-area id
#'   \item `abs.freq` the number of points/pixels presenting the category of interest
#'     for each sub-area
#'   \item `rel.freq` the proportion of points/pixels presenting the category of interest
#'     in each sub-area, with regard to the total number of points/pixels with the
#'     category of interest
#'   \item `p.tilde` the probability of occurrence over area \eqn{g} weighted with its neighbours.
#'}
#'
#' @examples
#' #LATTICE DATA
#' data.lat=matrix(sample(c("a","b","c"), 100, replace=TRUE), nrow=10)
#' ccc=coords_pix(area=square(10), nrow=10, ncol=10)
#' partition=areapart(square(10), G=5, data.coords=ccc)
#' karlstrom(data.lat, partition$data.assign, category="a",
#' G.coords=partition$G.coords, neigh.dist=2)
#' plot_areapart(partition$data.assign, square(10), is.pointdata=FALSE,
#' add.data=TRUE, data.bin=TRUE, category="a",
#' data=data.lat, G.coords=partition$G.coords, main="")
#'
#' #POINT DATA
#' data.pp=runifpoint(100, win=square(10))
#' marks(data.pp)=sample(c("a","b","c"), 100, replace=TRUE)
#' ccc=coords(data.pp)
#' partition=areapart(square(10), G=4, data.coords=ccc)
#' karlstrom(marks(data.pp), partition$data.assign,
#' category="b", G.coords=partition$G.coords, neigh.dist=4)
#' plot_areapart(partition$data.assign, square(10), is.pointdata=TRUE,
#' add.data=TRUE, data.bin=TRUE, category="b",
#' data=data.pp, G.coords=partition$G.coords, main="")
#'
#' @export

karlstrom=function(data, data.assign, category, G.coords, neigh.dist){

  #dichotomize dataset according to the category of interest
  if(is.factor(data)) data=as.character(data)
  datavec=c(data)
  datavec[c(data)==category]=1
  datavec[c(data)!=category]=0
  datavec=as.numeric(datavec)

  #match data values and coordinates
  data.cat=cbind(data.assign, datavec)
  ch=sum(datavec) #total number of 1s

  #Gx2 table matching each area id to the number of 1 values in it
  if(spatstat::is.ppp(G.coords))
    G.count=1:(spatstat::npoints(G.coords)) else G.count=1:nrow(G.coords)
  sums=by(data.cat[,4],data.cat[,3], sum); sums=cbind(as.numeric(names(sums)), as.numeric(sums))
  col2=numeric(length(G.count))
  for(i in 1:length(G.count))
  { ind=which(sums[,1]==i)
  if(length(ind)>0) col2[i]=sums[which(sums[,1]==i),2]}
  G.count=cbind(G.count, col2)

  #add relative frequencies
  G.count=cbind(G.count, G.count[,2]/ch)

  #compute p tilde according to neighbourhood
  distances=spatstat::pairdist(G.coords)
  G=spatstat::npoints(G.coords)
  p.tilde=rep(1,G)
  for (g in 1:G)
  {
    which.ind=which(distances[g,]<=neigh.dist)
    neigh.prob=numeric(length(which.ind))
    for(j in 1:length(which.ind)) neigh.prob[j]=G.count[which.ind[j], 3]
    p.tilde[g]=mean(neigh.prob)
  }
  G.count=cbind(G.count, p.tilde)

  colnames(G.count)=c("area.id", "abs.freq", "rel.freq", "p.tilde")

  #karlstrom
  karlstrom.terms=ifelse(G.count[,3]>0,G.count[,3]*log(1/G.count[,4]),0)
  karlstrom.ent=sum(karlstrom.terms)

  return(list(karlstrom.table=G.count, karlstrom.ent=karlstrom.ent))
}
###########################################
