% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/alluvial-data.r
\name{alluvial-data}
\alias{alluvial-data}
\alias{is_lodes_form}
\alias{is_alluvia_form}
\alias{to_lodes_form}
\alias{to_alluvia_form}
\title{Check for alluvial structure and convert between alluvial formats}
\usage{
is_lodes_form(data, key, value, id, weight = NULL, logical = TRUE,
  silent = FALSE)

is_alluvia_form(data, ..., axes = NULL, weight = NULL,
  logical = TRUE, silent = FALSE)

to_lodes_form(data, ..., axes = NULL, key = "x", value = "stratum",
  id = "alluvium", diffuse = FALSE, discern = FALSE)

to_alluvia_form(data, key, value, id, distill = FALSE)
}
\arguments{
\item{data}{A data frame.}

\item{key, value, id}{In \code{to_lodes_form}, handled as in
\code{\link[tidyr:gather]{tidyr::gather()}} and used to name the new axis (key), stratum
(value), and alluvium (identifying) variables. In \code{to_alluvia_form},
handled as in \code{\link[tidyr:spread]{tidyr::spread()}} and used to identify the fields
of \code{data} to be used as the axis (key), stratum (value), and alluvium
(identifying) variables.}

\item{weight}{Optional field of \code{data}, handled using
\code{\link[rlang:enquo]{rlang::enquo()}}, to be used as heights or depths of the alluvia
or lodes.}

\item{logical}{Deprecated. Whether to return a logical value or a character
string indicating the type of alluvial structure ("none", "lodes", or
"alluvia").}

\item{silent}{Whether to print messages.}

\item{...}{Used in \code{is_alluvia_form} and \code{to_lodes_form} as in
\code{\link[dplyr:select]{dplyr::select()}} to determine axis variables, as an alternative
to \code{axes}. Ignored when \code{axes} is provided.}

\item{axes}{In \code{*_alluvia_form}, handled as in
\code{\link[dplyr:select]{dplyr::select()}} and used to identify the field(s) of
\code{data} to be used as axes.}

\item{diffuse}{Fields of \code{data}, handled using
\code{\link[tidyselect:vars_select]{tidyselect::vars_select()}}, to merge into the reshapen data by
\code{id}. They must be a subset of the axis variables. Alternatively, a
logical value indicating whether to merge all (\code{TRUE}) or none
(\code{FALSE}) of the axis variables.}

\item{discern}{Logical value indicating whether to suffix values of the
variables used as axes that appear at more than one variable in order to
distinguish their factor levels. This forces the levels of the combined
factor variable \code{value} to be in the order of the axes.}

\item{distill}{A logical value indicating whether to include variables, other
than those passed to \code{key} and \code{value}, that vary within values
of \code{id}. Alternatively, a function (or its name) to be used to distill
each such variable to a single value. In addition to existing functions,
\code{distill} accepts the character values \code{"first"} (used if
\code{distill} is \code{TRUE}), \code{"last"}, and \code{"most"} (which
returns the modal value).}
}
\description{
Alluvial diagrams consist of multiple horizontally-distributed columns (axes)
representing factor variables, vertical divisions (strata) of these axes
representing these variables' values; and splines (alluvial flows) connecting
vertical subdivisions (lodes) within strata of adjacent axes representing
subsets or amounts of observations that take the corresponding values of the
corresponding variables. This function checks a data frame for either of two
types of alluvial structure:
}
\details{
\itemize{
\item One row per \strong{lode}, wherein each row encodes a subset or amount of
observations having a specific profile of axis values, a \code{key} field
encodes the axis, a \code{value} field encodes the value within each axis, and a
\code{id} column identifies multiple lodes corresponding to the same subset or
amount of observations. \code{is_lodes_form} tests for this structure.
\item One row per \strong{alluvium}, wherein each row encodes a subset or amount of
observations having a specific profile of axis values and a set \code{axes} of
fields encodes its values at each axis variable. \code{is_alluvia_form} tests
for this structure.
}

\code{to_lodes_form} takes a data frame with several designated variables to
be used as axes in an alluvial diagram, and reshapes the data frame so that
the axis variable names constitute a new factor variable and their values
comprise another. Other variables' values will be repeated, and a
row-grouping variable can be introduced. This function invokes
\code{\link[tidyr:gather]{tidyr::gather()}}.

\code{to_alluvia_form} takes a data frame with axis and axis value variables
to be used in an alluvial diagram, and reshape the data frame so that the
axes constitute separate variables whose values are given by the value
variable. This function invokes \code{\link[tidyr:spread]{tidyr::spread()}}.
}
\examples{
# Titanic data in alluvia format
titanic_alluvia <- as.data.frame(Titanic)
head(titanic_alluvia)
is_alluvia_form(titanic_alluvia,
                weight = "Freq")
# Titanic data in lodes format
titanic_lodes <- to_lodes_form(titanic_alluvia,
                               key = "x", value = "stratum", id = "alluvium",
                               axes = 1:4)
head(titanic_lodes)
is_lodes_form(titanic_lodes,
              key = "x", value = "stratum", id = "alluvium",
              weight = "Freq")
# again in lodes format, this time diffusing the `Class` variable
titanic_lodes2 <- to_lodes_form(titanic_alluvia,
                                key = variable, value = value,
                                id = passenger,
                                1:3, diffuse = Class)
head(titanic_lodes2)
is_lodes_form(titanic_lodes2,
              key = variable, value = value, id = passenger,
              weight = Freq)

# curriculum data in lodes format
data(majors)
head(majors)
is_lodes_form(majors,
              key = "semester", value = "curriculum", id = "student")
# curriculum data in alluvia format
majors_alluvia <- to_alluvia_form(majors,
                                  key = "semester", value = "curriculum",
                                  id = "student")
head(majors_alluvia)
is_alluvia_form(majors_alluvia, tidyselect::starts_with("CURR"))

# distill variables that vary within `id` values
set.seed(1)
majors$hypo_grade <- LETTERS[sample(5, size = nrow(majors), replace = TRUE)]
majors_alluvia2 <- to_alluvia_form(majors,
                                   key = "semester", value = "curriculum",
                                   id = "student",
                                   distill = "most")
head(majors_alluvia2)

# options to distinguish strata at different axes
gg <- ggplot(majors_alluvia,
             aes(axis1 = CURR1, axis2 = CURR7, axis3 = CURR13))
gg +
  geom_alluvium(aes(fill = as.factor(student)), width = 2/5, discern = TRUE) +
  geom_stratum(width = 2/5, discern = TRUE) +
  geom_text(stat = "stratum", discern = TRUE, label.strata = TRUE)
gg +
  geom_alluvium(aes(fill = as.factor(student)), width = 2/5, discern = FALSE) +
  geom_stratum(width = 2/5, discern = FALSE) +
  geom_text(stat = "stratum", discern = FALSE, label.strata = TRUE)
# warning when inappropriate
ggplot(majors[majors$semester \%in\% paste0("CURR", c(1, 7, 13)), ],
       aes(x = semester, stratum = curriculum, alluvium = student,
           label = curriculum)) +
  geom_alluvium(aes(fill = as.factor(student)), width = 2/5, discern = TRUE) +
  geom_stratum(width = 2/5, discern = TRUE) +
  geom_text(stat = "stratum", discern = TRUE)
}
\seealso{
Other alluvial data manipulation: \code{\link{self-adjoin}}
}
\concept{alluvial data manipulation}
