% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/extract_text.R
\name{extract_text}
\alias{extract_text}
\title{extract_text}
\usage{
extract_text(file, pages = NULL, area = NULL, password = NULL,
  encoding = NULL, copy = FALSE)
}
\arguments{
\item{file}{A character string specifying the path or URL to a PDF file.}

\item{pages}{An optional integer vector specifying pages to extract from.}

\item{area}{An optional list, of length equal to the number of pages specified, where each entry contains a four-element numeric vector of coordinates (top,left,bottom,right) containing the table for the corresponding page. As a convenience, a list of length 1 can be used to extract the same area from all (specified) pages.}

\item{password}{Optionally, a character string containing a user password to access a secured PDF.}

\item{encoding}{Optionally, a character string specifying an encoding for the text, to be passed to the assignment method of \code{\link[base]{Encoding}}.}

\item{copy}{Specifies whether the original local file(s) should be copied to
\code{tempdir()} before processing. \code{FALSE} by default. The argument is
ignored if \code{file} is URL.}
}
\value{
If \code{pages = NULL} (the default), a length 1 character vector, otherwise a vector of length \code{length(pages)}.
}
\description{
Extract text from a file
}
\details{
This function converts the contents of a PDF file into a single unstructured character string.
}
\examples{
\donttest{
# simple demo file
f <- system.file("examples", "text.pdf", package = "tabulizer")

# extract all text
extract_text(f)

# extract all text from page 1 only
extract_text(f, pages = 1)

# extract text from selected area only
extract_text(f, area = list(c(209.4, 140.5, 304.2, 500.8)))

}
}
\seealso{
\code{\link{extract_tables}}, \code{\link{extract_areas}}, \code{\link{split_pdf}}
}
\author{
Thomas J. Leeper <thosjleeper@gmail.com>
}
