% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/as.speeches.R
\name{as.speeches}
\alias{as.speeches}
\alias{as.speeches,partition-method}
\alias{as.speeches,subcorpus-method}
\alias{as.speeches,corpus-method}
\alias{as.speeches,character-method}
\title{Split corpus or partition into speeches.}
\usage{
as.speeches(.Object, ...)

\S4method{as.speeches}{partition}(
  .Object,
  s_attribute_date = grep("date", s_attributes(.Object), value = TRUE),
  s_attribute_name = grep("name", s_attributes(.Object), value = TRUE),
  gap = 500,
  mc = FALSE,
  verbose = TRUE,
  progress = TRUE
)

\S4method{as.speeches}{subcorpus}(
  .Object,
  s_attribute_date = grep("date", s_attributes(.Object), value = TRUE),
  s_attribute_name = grep("name", s_attributes(.Object), value = TRUE),
  gap = 500,
  mc = FALSE,
  verbose = TRUE,
  progress = TRUE
)

\S4method{as.speeches}{corpus}(
  .Object,
  s_attribute_date = grep("date", s_attributes(.Object), value = TRUE),
  s_attribute_name = grep("name", s_attributes(.Object), value = TRUE),
  gap = 500,
  mc = FALSE,
  verbose = TRUE,
  progress = TRUE
)

\S4method{as.speeches}{character}(
  .Object,
  s_attribute_date = grep("date", s_attributes(.Object), value = TRUE),
  s_attribute_name = grep("name", s_attributes(.Object), value = TRUE),
  gap = 500,
  mc = FALSE,
  verbose = TRUE,
  progress = TRUE
)
}
\arguments{
\item{.Object}{A \code{partition}, or length-one \code{character} vector indicating a
CWB corpus.}

\item{...}{Further arguments.}

\item{s_attribute_date}{A length-one \code{character} vector, the s-attribute that
provides the dates of sessions.}

\item{s_attribute_name}{A length-one \code{character} vector, the s-attribute that
provides the names of speakers.}

\item{gap}{An \code{integer} value, the number of tokens between strucs
assumed to make the difference whether a speech has been interrupted (by an
interjection or question), or whether to assume seperate speeches.}

\item{mc}{Whether to use multicore, defaults to \code{FALSE}. If \code{progress} is
\code{TRUE}, argument \code{mc} is passed into \code{pblapply} as argument \code{cl}. If
\code{progress} is \code{FALSE}, \code{mc} is passed into \code{mclapply()} as argument
\code{mc.cores}.}

\item{verbose}{A \code{logical} value, defaults to \code{TRUE}.}

\item{progress}{A \code{logical} value, whether to show progress bar.}
}
\value{
A \code{partition_bundle}, the names of the objects in the bundle are
the speaker name, the date of the speech and an index for the number of the
speech on a given day, concatenated by underscores.
}
\description{
Split entire corpus or a partition into speeches. The heuristic is to split
the corpus/partition into partitions on day-to-day basis first, using the
s-attribute provided by \code{s_attribute_date}. These subcorpora are then
splitted into speeches by speaker name, using s-attribute \code{s_attribute_name}.
If there is a gap larger than the number of tokens supplied by argument
\code{gap}, contributions of a speaker are assumed to be two seperate speeches.
}
\examples{
use("polmineR")
speeches <- as.speeches(
  "GERMAPARLMINI",
  s_attribute_date = "date", s_attribute_name = "speaker"
)
speeches_count <- count(speeches, p_attribute = "word")
tdm <- as.TermDocumentMatrix(speeches_count, col = "count")

bt <- partition("GERMAPARLMINI", date = "2009-10-27")
speeches <- as.speeches(bt, s_attribute_name = "speaker")
summary(speeches)
sp <- as.speeches(.Object = corpus("GERMAPARLMINI"), s_attribute_name = "speaker")
}
