% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data.R
\docType{data}
\name{nlswork_subset}
\alias{nlswork_subset}
\title{National Longitudinal Survey of Young Women (Subset)}
\format{
A data frame with approximately 2,400-2,700 observations (depending on
sampling) and the following variables:
\describe{
\item{idcode}{Individual identifier (numeric)}
\item{year}{Survey year (numeric)}
\item{birth_yr}{Year of birth (numeric)}
\item{age}{Age in current year (numeric)}
\item{race}{Race: 1=white, 2=black, 3=other (numeric or labeled)}
\item{msp}{Marital status: 1=never married, 2=married, 3=separated/divorced/widowed (numeric or labeled)}
\item{nev_mar}{1 if never married (numeric)}
\item{grade}{Current grade completed (numeric)}
\item{collgrad}{1 if college graduate (numeric)}
\item{not_smsa}{1 if not in SMSA (Standard Metropolitan Statistical Area) (numeric)}
\item{c_city}{1 if in central city (numeric)}
\item{south}{1 if in south (numeric)}
\item{ind_code}{Industry code (numeric)}
\item{occ_code}{Occupation code (numeric)}
\item{union}{1 if union member (numeric)}
\item{wks_ue}{Weeks unemployed last year (numeric)}
\item{ttl_exp}{Total work experience (years) (numeric)}
\item{tenure}{Job tenure in years (numeric)}
\item{hours}{Usual hours worked per week (numeric)}
\item{wks_work}{Weeks worked last year (numeric)}
\item{ln_wage}{Natural log of hourly wage (numeric)}
}
}
\source{
Original data from Stata Press:
\url{https://www.stata-press.com/data/r19/nlswork.dta}

National Longitudinal Survey of Young Women, 1968-1988.
U.S. Bureau of Labor Statistics.
}
\usage{
nlswork_subset
}
\description{
A subset of 300 randomly sampled women from the National Longitudinal Survey
of Young Women, 1968-1988. This is a subsample of the full nlswork dataset
commonly used in Stata examples. The data contains labor market information
for young women tracked over multiple years.
}
\details{
This dataset is a subset of the nlswork data available from Stata Press.
It contains 300 randomly sampled individuals from the original 5,159 women,
preserving all time periods for the selected individuals. The data is an
unbalanced panel with varying numbers of observations per individual.

The subset was created using:
\preformatted{
set.seed(123)
unique_ids <- unique(nlswork$idcode)
sampled_ids <- sample(unique_ids, size = 300, replace = FALSE)
nlswork_subset <- nlswork[nlswork$idcode \%in\% sampled_ids, ]
}
}
\examples{
# Load the data
data(nlswork_subset)

# Examine structure
str(nlswork_subset)

# Summary statistics
summary(nlswork_subset$ln_wage)

# Panel structure
table(table(nlswork_subset$idcode))  # Distribution of obs per individual

\dontrun{
# Example analysis with xtvfreg
# Create race groups
nlswork_subset$race_group <- factor(nlswork_subset$race,
                                    levels = 1:2,
                                    labels = c("white", "black"))

# Create within and between components for tenure
nlswork_subset$m_tenure <- ave(nlswork_subset$tenure,
                               nlswork_subset$idcode,
                               FUN = function(x) mean(x, na.rm = TRUE))
nlswork_subset$d_tenure <- nlswork_subset$tenure - nlswork_subset$m_tenure

# Estimate varying effects model
result <- xtvfreg(
  formula = ln_wage ~ 1,
  data = subset(nlswork_subset, !is.na(ln_wage) & race \%in\% 1:2),
  group = "race_group",
  panel_id = "idcode",
  mean_vars = c("m_tenure", "d_tenure", "age"),
  var_vars = c("m_tenure", "age"),
  verbose = TRUE
)

# View results
summary(result)
}

}
\references{
Center for Human Resource Research. (2002). NLS Handbook 2001.
Columbus, OH: The Ohio State University.
}
\keyword{datasets}
