## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 6,
  fig.height = 4
)
library(joinspy)

## -----------------------------------------------------------------------------
orders <- data.frame(
  customer_id = c(1, 2, 2, 3),
  amount = c(100, 50, 75, 200)
)

addresses <- data.frame(
  customer_id = c(1, 2, 2, 3),
  address = c("NYC", "LA", "SF", "Chicago")
)

join_spy(orders, addresses, by = "customer_id")

## -----------------------------------------------------------------------------
key_duplicates(orders, by = "customer_id")
key_duplicates(addresses, by = "customer_id")

## -----------------------------------------------------------------------------
sales <- data.frame(
  product = c("Widget", "Gadget ", " Gizmo"),
  units = c(10, 20, 30),
  stringsAsFactors = FALSE
)

inventory <- data.frame(
  product = c("Widget", "Gadget", "Gizmo"),
  stock = c(100, 200, 300),
  stringsAsFactors = FALSE
)

join_spy(sales, inventory, by = "product")

## -----------------------------------------------------------------------------
sales_fixed <- join_repair(sales, by = "product")
key_check(sales_fixed, inventory, by = "product")

## -----------------------------------------------------------------------------
left <- data.frame(
  code = c("ABC", "def", "GHI"),
  value = 1:3,
  stringsAsFactors = FALSE
)

right <- data.frame(
  code = c("abc", "DEF", "ghi"),
  label = c("A", "D", "G"),
  stringsAsFactors = FALSE
)

join_spy(left, right, by = "code")

## -----------------------------------------------------------------------------
repaired <- join_repair(left, right, by = "code", standardize_case = "upper")
key_check(repaired$x, repaired$y, by = "code")

## -----------------------------------------------------------------------------
orders <- data.frame(
  customer_id = c(1, NA, 3, NA),
  amount = c(100, 200, 300, 400)
)

customers <- data.frame(
  customer_id = c(1, 2, 3, NA),
  name = c("Alice", "Bob", "Carol", "Unknown")
)

join_spy(orders, customers, by = "customer_id")

## -----------------------------------------------------------------------------
# Option 1: Remove rows with NA keys
orders_clean <- orders[!is.na(orders$customer_id), ]

# Option 2: Replace NA with placeholder
orders$customer_id[is.na(orders$customer_id)] <- -999

## -----------------------------------------------------------------------------
system_a <- data.frame(
  user_id = c("USR001", "USR002", "USR003"),
  score = c(85, 90, 78),
  stringsAsFactors = FALSE
)

system_b <- data.frame(
  user_id = c("1", "2", "3"),
  department = c("Sales", "Marketing", "Engineering"),
  stringsAsFactors = FALSE
)

report <- join_spy(system_a, system_b, by = "user_id")

## -----------------------------------------------------------------------------
# Extract numeric part
system_a$user_num <- gsub("USR0*", "", system_a$user_id)
key_check(system_a, system_b, by = c("user_num" = "user_id"))

## -----------------------------------------------------------------------------
order_items <- data.frame(
  order_id = c(1, 1, 2, 2, 2),
  item = c("A", "B", "C", "D", "E")
)

order_payments <- data.frame(
  order_id = c(1, 1, 2, 2),
  payment = c("CC1", "CC2", "Cash", "Check")
)

report <- join_spy(order_items, order_payments, by = "order_id")

## -----------------------------------------------------------------------------
check_cartesian(order_items, order_payments, by = "order_id")

## ----error = TRUE-------------------------------------------------------------
try({
# Enforce cardinality to catch this
join_strict(order_items, order_payments, by = "order_id", expect = "1:m")
})

## -----------------------------------------------------------------------------
orders <- data.frame(
  product_id = c(1, 2, 3),
  quantity = c(10, 20, 30)
)

products <- data.frame(
  product_id = c("1", "2", "3"),
  name = c("Widget", "Gadget", "Gizmo"),
  stringsAsFactors = FALSE
)

join_spy(orders, products, by = "product_id")

## -----------------------------------------------------------------------------
orders$product_id <- as.character(orders$product_id)
key_check(orders, products, by = "product_id")

## -----------------------------------------------------------------------------
left <- data.frame(
  id = c("A", "", "C"),
  value = 1:3,
  stringsAsFactors = FALSE
)

right <- data.frame(
  id = c("A", "B", ""),
  label = c("Alpha", "Beta", "Empty"),
  stringsAsFactors = FALSE
)

join_spy(left, right, by = "id")

## -----------------------------------------------------------------------------
left_fixed <- join_repair(left, by = "id", empty_to_na = TRUE)
left_fixed$id

## ----error = TRUE-------------------------------------------------------------
try({
products <- data.frame(id = 1:3, name = c("A", "B", "C"))
prices <- data.frame(id = c(1, 2, 2, 3), price = c(10, 20, 25, 30))

join_strict(products, prices, by = "id", expect = "1:1")
})

## -----------------------------------------------------------------------------
orders <- data.frame(id = c(1, 1, 2, 3), item = c("A", "B", "C", "D"))
customers <- data.frame(id = 1:3, name = c("Alice", "Bob", "Carol"))

detect_cardinality(orders, customers, by = "id")

## -----------------------------------------------------------------------------
sessionInfo()

