% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/predextract.R
\name{pred_extract}
\alias{pred_extract}
\title{Preliminary data cleaning including removing duplicates, records
  outside a particular basin, and NAs.}
\usage{
pred_extract(
  data,
  raster,
  lat = NULL,
  lon = NULL,
  bbox = NULL,
  colsp,
  minpts = 10,
  mp = TRUE,
  rm_duplicates = TRUE,
  na.rm = TRUE,
  na.inform = FALSE,
  list = TRUE,
  merge = FALSE,
  verbose = FALSE,
  warn = FALSE,
  coords = FALSE
)
}
\arguments{
\item{data}{\code{dataframe}. Data frame with multiple species or only one
species for checking records with no coordinates, duplicates, and check for
records that fall on land, sea, country or city centroids, and geographical
outliers(Zzika et al., 2022).}

\item{raster}{\code{raster}. Environmental layers from different providers
such as WORLDCLIM (), Hydrogaphy90m (), CHELSA, Copernicus ().}

\item{lat, lon}{\code{coordinates}. variable for latitude and longitude column
names.}

\item{bbox}{\code{sf} or \code{vector}. Object of class 'shapefile' If only a particular basin is
considered. Bounding box vector points can also be provided in the form
\code{"c(xmin, ymin, xmax, ymax)"}. \code{xmin} is the minimum longitude,
\code{ymin} is the minimum latitude, \code{xmax} is the maximum longitude
and \code{xmax} is the minimum latitude.}

\item{colsp}{\code{string}. variable already in the data that determine the groups to
considered when extracting data.}

\item{minpts}{\code{numeric}. Minimum number of records for the species after
removing duplicates and those within a particular basin.}

\item{mp}{\code{logical}. If \code{TRUE}, then number of minimum records \code{minpts} should be provided to allow dropping groups
with less records. This is significant if species distribution are going to be fitted.}

\item{rm_duplicates}{\code{logical} TRUE if the duplicates will removed based species coordinates and names. Default \code{TRUE}.}

\item{na.rm}{\code{logical} If TRUE, the missing values will be discarded after data extracted.
DEFAULT TRUE.}

\item{na.inform}{\code{logical} If TRUE, the missing values will be discarded after data extracted and message will
be returned. DEFAULT FALSE.}

\item{list}{\code{logical}. If TRUE the a list of multiple species data frames will be
generated and FALSE for a dataframe of species data sets. Default TRUE}

\item{merge}{\code{logical}. To add the other columns in the species data after data
extraction. Default \strong{TRUE}.}

\item{verbose}{\code{logical}. if TRUE message and warnings will be produced. Default \code{TRUE}.}

\item{warn}{\code{logical}. indicating to whether to show implementation warning or
not. Default \code{FALSE}.}

\item{coords}{\code{logical}. If TRUE, the original coordinates are also returned attached on the extracted dataset. Default FALSE.}
}
\value{
\code{dataframe} or \code{list} of precleaned data sets for single or multiple species.
}
\description{
Preliminary data cleaning including removing duplicates, records
  outside a particular basin, and NAs.
}
\examples{
\donttest{
data("efidata")

danube <- system.file('extdata/danube.shp.zip', package='specleanr')

danubebasin <- sf::st_read(danube, quiet=TRUE)

#Get environmental data

worldclim <- terra::rast(system.file('extdata/worldclim.tiff', package='specleanr'))

referencedata <- pred_extract(data = efidata,
                          raster= worldclim ,
                          lat ="decimalLatitude",
                          lon = 'decimalLongitude',
                          colsp = 'scientificName',
                          bbox = danubebasin,
                          list= TRUE, #list will be generated for all species
                          minpts = 7, merge=TRUE)
}
}
