% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/PlotFeatureRanking.R
\name{plot_variable_importance}
\alias{plot_variable_importance}
\alias{plot_variable_importance,ANY-method}
\alias{plot_variable_importance,familiarCollection-method}
\alias{plot_feature_selection_occurrence}
\alias{plot_feature_selection_variable_importance}
\alias{plot_model_signature_occurrence}
\alias{plot_model_signature_variable_importance}
\title{Plot variable importance scores of features during feature selection or
after training a model.}
\usage{
plot_variable_importance(
  object,
  type,
  feature_cluster_method = waiver(),
  feature_linkage_method = waiver(),
  feature_cluster_cut_method = waiver(),
  feature_similarity_threshold = waiver(),
  aggregation_method = waiver(),
  rank_threshold = waiver(),
  draw = FALSE,
  dir_path = NULL,
  split_by = NULL,
  color_by = NULL,
  facet_by = NULL,
  facet_wrap_cols = NULL,
  show_cluster = TRUE,
  ggtheme = NULL,
  discrete_palette = NULL,
  gradient_palette = waiver(),
  x_label = "feature",
  rotate_x_tick_labels = waiver(),
  y_label = waiver(),
  legend_label = waiver(),
  plot_title = waiver(),
  plot_sub_title = waiver(),
  caption = NULL,
  y_range = NULL,
  y_n_breaks = 5,
  y_breaks = NULL,
  width = waiver(),
  height = waiver(),
  units = waiver(),
  export_collection = FALSE,
  ...
)

\S4method{plot_variable_importance}{ANY}(
  object,
  type,
  feature_cluster_method = waiver(),
  feature_linkage_method = waiver(),
  feature_cluster_cut_method = waiver(),
  feature_similarity_threshold = waiver(),
  aggregation_method = waiver(),
  rank_threshold = waiver(),
  draw = FALSE,
  dir_path = NULL,
  split_by = NULL,
  color_by = NULL,
  facet_by = NULL,
  facet_wrap_cols = NULL,
  show_cluster = TRUE,
  ggtheme = NULL,
  discrete_palette = NULL,
  gradient_palette = waiver(),
  x_label = "feature",
  rotate_x_tick_labels = waiver(),
  y_label = waiver(),
  legend_label = waiver(),
  plot_title = waiver(),
  plot_sub_title = waiver(),
  caption = NULL,
  y_range = NULL,
  y_n_breaks = 5,
  y_breaks = NULL,
  width = waiver(),
  height = waiver(),
  units = waiver(),
  export_collection = FALSE,
  ...
)

\S4method{plot_variable_importance}{familiarCollection}(
  object,
  type,
  feature_cluster_method = waiver(),
  feature_linkage_method = waiver(),
  feature_cluster_cut_method = waiver(),
  feature_similarity_threshold = waiver(),
  aggregation_method = waiver(),
  rank_threshold = waiver(),
  draw = FALSE,
  dir_path = NULL,
  split_by = NULL,
  color_by = NULL,
  facet_by = NULL,
  facet_wrap_cols = NULL,
  show_cluster = TRUE,
  ggtheme = NULL,
  discrete_palette = NULL,
  gradient_palette = waiver(),
  x_label = "feature",
  rotate_x_tick_labels = waiver(),
  y_label = waiver(),
  legend_label = waiver(),
  plot_title = waiver(),
  plot_sub_title = waiver(),
  caption = NULL,
  y_range = NULL,
  y_n_breaks = 5,
  y_breaks = NULL,
  width = waiver(),
  height = waiver(),
  units = waiver(),
  export_collection = FALSE,
  ...
)

plot_feature_selection_occurrence(...)

plot_feature_selection_variable_importance(...)

plot_model_signature_occurrence(...)

plot_model_signature_variable_importance(...)
}
\arguments{
\item{object}{A \code{familiarCollection} object, or other other objects from which
a \code{familiarCollection} can be extracted. See details for more information.}

\item{type}{Determine what variable importance should be shown. Can be
\code{feature_selection} or \code{model} for the variable importance after the
feature selection step and after the model training step, respectively.}

\item{feature_cluster_method}{The method used to perform clustering. These are
the same methods as for the \code{cluster_method} configuration parameter:
\code{none}, \code{hclust}, \code{agnes}, \code{diana} and \code{pam}.

\code{none} cannot be used when extracting data regarding mutual correlation or
feature expressions.

If not provided explicitly, this parameter is read from settings used at
creation of the underlying \code{familiarModel} objects.}

\item{feature_linkage_method}{The method used for agglomerative clustering in
\code{hclust} and \code{agnes}. These are the same methods as for the
\code{cluster_linkage_method} configuration parameter: \code{average}, \code{single},
\code{complete}, \code{weighted}, and \code{ward}.

If not provided explicitly, this parameter is read from settings used at
creation of the underlying \code{familiarModel} objects.}

\item{feature_cluster_cut_method}{The method used to divide features into
separate clusters. The available methods are the same as for the
\code{cluster_cut_method} configuration parameter: \code{silhouette}, \code{fixed_cut} and
\code{dynamic_cut}.

\code{silhouette} is available for all cluster methods, but \code{fixed_cut} only
applies to methods that create hierarchical trees (\code{hclust}, \code{agnes} and
\code{diana}). \code{dynamic_cut} requires the \code{dynamicTreeCut} package and can only
be used with \code{agnes} and \code{hclust}.

If not provided explicitly, this parameter is read from settings used at
creation of the underlying \code{familiarModel} objects.}

\item{feature_similarity_threshold}{The threshold level for pair-wise
similarity that is required to form feature clusters with the \code{fixed_cut}
method.

If not provided explicitly, this parameter is read from settings used at
creation of the underlying \code{familiarModel} objects.}

\item{aggregation_method}{(\emph{optional}) The method used to aggregate variable
importances over different data subsets, e.g. bootstraps. The following
methods can be selected:
\itemize{
\item \code{mean} (default): Use the mean rank of a feature over the subsets to
determine the aggregated feature rank.
\item \code{median}: Use the median rank of a feature over the subsets to determine
the aggregated feature rank.
\item \code{best}: Use the best rank the feature obtained in any subset to determine
the aggregated feature rank.
\item \code{worst}: Use the worst rank the feature obtained in any subset to
determine the aggregated feature rank.
\item \code{stability}: Use the frequency of the feature being in the subset of
highly ranked features as measure for the aggregated feature rank
(Meinshausen and Buehlmann, 2010).
\item \code{exponential}: Use a rank-weighted frequence of occurrence in the subset
of highly ranked features as measure for the aggregated feature rank (Haury
et al., 2011).
\item \code{borda}: Use the borda count as measure for the aggregated feature rank
(Wald et al., 2012).
\item \code{enhanced_borda}: Use an occurrence frequency-weighted borda count as
measure for the aggregated feature rank (Wald et al., 2012).
\item \code{truncated_borda}: Use borda count computed only on features within the
subset of highly ranked features.
\item \code{enhanced_truncated_borda}: Apply both the enhanced borda method and the
truncated borda method and use the resulting borda count as the aggregated
feature rank.
}}

\item{rank_threshold}{(\emph{optional}) The threshold used to define the subset of
highly important features. If not set, this threshold is determined by
maximising the variance in the occurrence value over all features over the
subset size.

This parameter is only relevant for \code{stability}, \code{exponential},
\code{enhanced_borda}, \code{truncated_borda} and \code{enhanced_truncated_borda} methods.}

\item{draw}{(\emph{optional}) Draws the plot if TRUE.}

\item{dir_path}{(\emph{optional}) Path to the directory where created figures are
saved to. Output is saved in the \code{variable_importance} subdirectory. If
\code{NULL} no figures are saved, but are returned instead.}

\item{split_by}{(\emph{optional}) Splitting variables. This refers to column names
on which datasets are split. A separate figure is created for each split.
See details for available variables.}

\item{color_by}{(\emph{optional}) Variables used to determine fill colour of plot
objects. The variables cannot overlap with those provided to the \code{split_by}
argument, but may overlap with other arguments. See details for available
variables.}

\item{facet_by}{(\emph{optional}) Variables used to determine how and if facets of
each figure appear. In case the \code{facet_wrap_cols} argument is \code{NULL}, the
first variable is used to define columns, and the remaing variables are
used to define rows of facets. The variables cannot overlap with those
provided to the \code{split_by} argument, but may overlap with other arguments.
See details for available variables.}

\item{facet_wrap_cols}{(\emph{optional}) Number of columns to generate when facet
wrapping. If NULL, a facet grid is produced instead.}

\item{show_cluster}{(\emph{optional}) Show which features were clustered together.
Currently not available in combination with variable importance obtained
during feature selection.}

\item{ggtheme}{(\emph{optional}) \code{ggplot} theme to use for plotting.}

\item{discrete_palette}{(\emph{optional}) Palette to use for coloring bar plots, in
case a non-singular variable was provided to the \code{color_by} argument.}

\item{gradient_palette}{(\emph{optional}) Palette to use for filling the bars in
case the \code{color_by} argument is not set. The bars are then coloured
according to the occurrence of features. By default, no gradient is used,
and the bars are not filled according to occurrence. Use \code{NULL} to fill the
bars using the default palette in \code{familiar}.}

\item{x_label}{(\emph{optional}) Label to provide to the x-axis. If NULL, no label
is shown.}

\item{rotate_x_tick_labels}{(\emph{optional}) Rotate tick labels on the x-axis by
90 degrees. Defaults to \code{TRUE}. Rotation of x-axis tick labels may also be
controlled through the \code{ggtheme}. In this case, \code{FALSE} should be provided
explicitly.}

\item{y_label}{(\emph{optional}) Label to provide to the y-axis. If NULL, no label
is shown.}

\item{legend_label}{(\emph{optional}) Label to provide to the legend. If NULL, the
legend will not have a name.}

\item{plot_title}{(\emph{optional}) Label to provide as figure title. If NULL, no
title is shown.}

\item{plot_sub_title}{(\emph{optional}) Label to provide as figure subtitle. If
NULL, no subtitle is shown.}

\item{caption}{(\emph{optional}) Label to provide as figure caption. If NULL, no
caption is shown.}

\item{y_range}{(\emph{optional}) Value range for the y-axis.}

\item{y_n_breaks}{(\emph{optional}) Number of breaks to show on the y-axis of the
plot. \code{y_n_breaks} is used to determine the \code{y_breaks} argument in case it
is unset.}

\item{y_breaks}{(\emph{optional}) Break points on the y-axis of the plot.}

\item{width}{(\emph{optional}) Width of the plot. A default value is derived from
the number of facets and the number of features.}

\item{height}{(\emph{optional}) Height of the plot. A default value is derived from
number of facets, and the length of the longest feature name (if
\code{rotate_x_tick_labels} is \code{TRUE}).}

\item{units}{(\emph{optional}) Plot size unit. Either \code{cm} (default), \code{mm} or \verb{in}.}

\item{export_collection}{(\emph{optional}) Exports the collection if TRUE.}

\item{...}{
  Arguments passed on to \code{\link[=as_familiar_collection]{as_familiar_collection}}, \code{\link[ggplot2:ggsave]{ggplot2::ggsave}}, \code{\link[=extract_fs_vimp]{extract_fs_vimp}}
  \describe{
    \item{\code{familiar_data_names}}{Names of the dataset(s). Only used if the \code{object} parameter
is one or more \code{familiarData} objects.}
    \item{\code{collection_name}}{Name of the collection.}
    \item{\code{filename}}{File name to create on disk.}
    \item{\code{plot}}{Plot to save, defaults to last plot displayed.}
    \item{\code{device}}{Device to use. Can either be a device function
(e.g. \link{png}), or one of "eps", "ps", "tex" (pictex),
"pdf", "jpeg", "tiff", "png", "bmp", "svg" or "wmf" (windows only).}
    \item{\code{path}}{Path of the directory to save plot to: \code{path} and \code{filename}
are combined to create the fully qualified file name. Defaults to the
working directory.}
    \item{\code{scale}}{Multiplicative scaling factor.}
    \item{\code{dpi}}{Plot resolution. Also accepts a string input: "retina" (320),
"print" (300), or "screen" (72). Applies only to raster output types.}
    \item{\code{limitsize}}{When \code{TRUE} (the default), \code{ggsave()} will not
save images larger than 50x50 inches, to prevent the common error of
specifying dimensions in pixels.}
    \item{\code{bg}}{Background colour. If \code{NULL}, uses the \code{plot.background} fill value
from the plot theme.}
    \item{\code{verbose}}{Flag to indicate whether feedback should be provided on the
computation and extraction of various data elements.}
    \item{\code{message_indent}}{Number of indentation steps for messages shown during
computation and extraction of various data elements.}
  }}
}
\value{
\code{NULL} or list of plot objects, if \code{dir_path} is \code{NULL}.
}
\description{
This function plots variable importance based data obtained
during feature selection or after training a model, which are stored in a
\code{familiarCollection} object.
}
\details{
This function generates a barplot based on variable importance of
features.

The only allowed values for \code{split_by}, \code{color_by} or \code{facet_by} are
\code{fs_method} and \code{learner}, but note that \code{learner} has no effect when
plotting variable importance of features acquired during feature selection.

Available palettes for \code{discrete_palette} and \code{gradient_palette} are those
listed by \code{grDevices::palette.pals()} (requires R >= 4.0.0),
\code{grDevices::hcl.pals()} (requires R >= 3.6.0) and \code{rainbow}, \code{heat.colors},
\code{terrain.colors}, \code{topo.colors} and \code{cm.colors}, which correspond to the
palettes of the same name in \code{grDevices}. If not specified, a default
palette based on palettes in Tableau are used. You may also specify your own
palette by using colour names listed by \code{grDevices::colors()} or through
hexadecimal RGB strings.

Labeling methods such as \code{set_feature_names} or \code{set_fs_method_names} can be
applied to the \code{familiarCollection} object to update labels, and order the
output in the figure.
}
