test_that("flag_top_anomalies works with contamination rate", {
  data <- data.frame(
    patient_id = 1:100,
    age = rnorm(100, 50, 15),
    cost = rnorm(100, 10000, 5000)
  )
  
  scored_data <- score_anomaly(data, contamination = 0.05)
  flagged_data <- flag_top_anomalies(scored_data, contamination = 0.05)
  
  expect_true("is_anomaly" %in% names(flagged_data))
  expect_true(is.logical(flagged_data$is_anomaly))
  expect_true(sum(flagged_data$is_anomaly) <= ceiling(100 * 0.05) + 1)  # Allow small rounding
})

test_that("flag_top_anomalies works with fixed threshold", {
  data <- data.frame(
    patient_id = 1:100,
    age = rnorm(100, 50, 15),
    cost = rnorm(100, 10000, 5000)
  )
  
  scored_data <- score_anomaly(data)
  flagged_data <- flag_top_anomalies(scored_data, threshold = 0.9)
  
  expect_true("is_anomaly" %in% names(flagged_data))
  expect_true(all(flagged_data$is_anomaly[flagged_data$anomaly_score >= 0.9]))
  expect_true(all(!flagged_data$is_anomaly[flagged_data$anomaly_score < 0.9]))
})

test_that("flag_top_anomalies uses contamination from attributes", {
  data <- data.frame(
    patient_id = 1:100,
    age = rnorm(100, 50, 15),
    cost = rnorm(100, 10000, 5000)
  )
  
  scored_data <- score_anomaly(data, contamination = 0.10)
  flagged_data <- flag_top_anomalies(scored_data)
  
  expect_true("is_anomaly" %in% names(flagged_data))
  expect_true(!is.null(attr(flagged_data, "anomaly_threshold")))
})

test_that("flag_top_anomalies errors on invalid inputs", {
  data <- data.frame(
    patient_id = 1:100,
    age = rnorm(100, 50, 15)
  )
  
  expect_error(flag_top_anomalies(data))  # No anomaly_score column
  expect_error(flag_top_anomalies("not a data frame"))
  
  scored_data <- score_anomaly(data)
  expect_error(flag_top_anomalies(scored_data, threshold = -1))
  expect_error(flag_top_anomalies(scored_data, threshold = 2))
  expect_error(flag_top_anomalies(scored_data, contamination = -1))
  expect_error(flag_top_anomalies(scored_data, contamination = 1.5))
})

test_that("flag_top_anomalies stores threshold as attribute", {
  data <- data.frame(
    patient_id = 1:100,
    age = rnorm(100, 50, 15),
    cost = rnorm(100, 10000, 5000)
  )
  
  scored_data <- score_anomaly(data, contamination = 0.05)
  flagged_data <- flag_top_anomalies(scored_data, contamination = 0.05)
  
  threshold <- attr(flagged_data, "anomaly_threshold")
  expect_false(is.null(threshold))
  expect_true(is.numeric(threshold))
  expect_true(threshold >= 0 && threshold <= 1)
})

test_that("flag_top_anomalies handles edge cases", {
  # All scores the same
  data <- data.frame(
    patient_id = 1:10,
    age = rep(50, 10),
    cost = rep(10000, 10)
  )
  
  scored_data <- score_anomaly(data)
  flagged_data <- flag_top_anomalies(scored_data, contamination = 0.2)
  
  expect_true("is_anomaly" %in% names(flagged_data))
})

