% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/02_method_query.R
\docType{methods}
\name{query}
\alias{query}
\alias{query,kRp.corp.freq-method}
\alias{query,kRp.text-method}
\alias{query,data.frame-method}
\title{A method to get information out of koRpus objects}
\usage{
query(obj, ...)

\S4method{query}{kRp.corp.freq}(
  obj,
  var = NULL,
  query,
  rel = c("eq", "gt", "ge", "lt", "le"),
  as.df = TRUE,
  ignore.case = TRUE,
  perl = FALSE,
  regexp_var = "word"
)

\S4method{query}{kRp.text}(
  obj,
  var,
  query,
  rel = c("eq", "gt", "ge", "lt", "le"),
  as.df = TRUE,
  ignore.case = TRUE,
  perl = FALSE,
  regexp_var = "token"
)

\S4method{query}{data.frame}(
  obj,
  var,
  query,
  rel = c("eq", "gt", "ge", "lt", "le"),
  as.df = TRUE,
  ignore.case = TRUE,
  perl = FALSE,
  regexp_var = "token"
)
}
\arguments{
\item{obj}{An object of class \code{\link[koRpus:kRp.corp.freq-class]{kRp.corp.freq}},
\code{\link[koRpus:kRp.text-class]{kRp.text}}, or \code{data.frame}.}

\item{...}{Optional arguments, see above.}

\item{var}{A character string naming a variable in the object (i.e., colname). If set to
\code{"regexp"}, \code{grepl} is called on the column specified by \code{regexp_var}.}

\item{query}{A character vector (for words), regular expression,
      or single number naming values to be matched in the variable.
Can also be a vector of two numbers to query a range of frequency data,
      or a list of named lists for multiple queries (see
"Query lists" section in details).}

\item{rel}{A character string defining the relation of the queried value and desired results.
Must either be \code{"eq"} (equal, the default), \code{"gt"} (greater than),
      \code{"ge"} (greater of equal),
\code{"lt"} (less than) or \code{"le"} (less or equal). If \code{var="word"},
      is always interpreted as \code{"eq"}}

\item{as.df}{Logical, if \code{TRUE}, returns a data.frame, otherwise an object of
the input class. Ignored if \code{obj} is a data frame already.}

\item{ignore.case}{Logical, passed through to \code{grepl} if \code{var="regexp"}.}

\item{perl}{Logical, passed through to \code{grepl} if \code{var="regexp"}.}

\item{regexp_var}{A character string naming the column to query if \code{var="regexp"}.}
}
\value{
Depending on the arguments, might include whole objects, lists, single values etc.
}
\description{
The method \code{query} returns query information from objects of classes \code{\link[koRpus:kRp.corp.freq-class]{kRp.corp.freq}} and
\code{\link[koRpus:kRp.text-class]{kRp.text}}.
}
\details{
\emph{kRp.corp.freq:} Depending on the setting of the \code{var} parameter,
      will return entries with a matching character (\code{var="word"}),
or all entries of the desired frequency (see the examples). A special case is the need for a range of frequencies,
which can be achieved by providing a nomerical vector of two values as the \code{query} value,
      for start and end of
the range, respectively. In these cases,
      if \code{rel} is set to \code{"gt"} or \code{"lt"},
the given range borders are excluded, otherwise they will be included as true matches.

\emph{kRp.text:} \code{var} can be any of the variables in slot \code{tokens}. If \code{rel="num"},
a vector with the row numbers in which the query was found is returned.
}
\section{Query lists}{
 You can combine an arbitrary number of queries in a simple way by providing a list of named lists to the
\code{query} parameter, where each list contains one query request. In each list,
      the first element name represents the
\code{var} value of the request,
      and its value is taken as the \code{query} argument. You can also assign \code{rel}, 
\code{ignore.case} and \code{perl} for each request individually, and if you don't,
      the settings of the main query call are 
taken as default (\code{as.df} only applies to the final query). The filters will be applied in the order given,
      i.e., the
second query will be made to the results of the first.

This method calls \code{\link[base]{subset}},
      which might actually be even more flexible if you need more control.
}

\examples{
# code is only run when the english language package can be loaded
if(require("koRpus.lang.en", quietly = TRUE)){
  sample_file <- file.path(
    path.package("koRpus"), "examples", "corpus", "Reality_Winner.txt"
  )
  tokenized.obj <- tokenize(
    txt=sample_file,
    lang="en"
  )
  en_corp <- read.corp.custom(
    tokenized.obj,
    caseSens=FALSE
  )

  # look up frequencies for the word "winner"
  query(en_corp, var="word", query="winner")

  # show all entries with a frequency of exactly 3 in the corpus
  query(en_corp, "freq", 3)

  # now, which tokens appear more than 40000 times in a million?
  query(en_corp, "pmio", 40000, "gt")

  # example for a range request: tokens with a log10 between 4.2 and 4.7
  # (including these two values)
  query(en_corp, "log10", c(4.2, 4.7))
  # (and without them)
  query(en_corp, "log10", c(4.2, 4.7), "gt")

  # example for a list of queries: get words with a frequency between
  # 10000 and 25000 per million and at least four letters
  query(en_corp, query=list(
    list(pmio=c(10000, 25000)),
    list(lttr=4, rel="ge"))
  )

  # get all instances of "the" in a tokenized text object
  query(tokenized.obj, "token", "the")
} else {}
}
\seealso{
\code{\link[koRpus:kRp.corp.freq-class]{kRp.corp.freq}}, \code{\link[base]{subset}}
}
\keyword{methods}
