When ground truth labels are available, autoFlagR can
evaluate the performance of anomaly detection algorithms using standard
metrics. This is essential for validation studies and publication.
set.seed(123)
# Create dataset
n <- 1000
data <- data.frame(
id = 1:n,
feature1 = rnorm(n, 100, 15),
feature2 = rnorm(n, 50, 10),
feature3 = rpois(n, 5),
category = sample(c("A", "B", "C"), n, replace = TRUE)
)
# Introduce known anomalies (ground truth)
anomaly_indices <- c(1:20, 50:55, 100:110)
data$feature1[anomaly_indices] <- data$feature1[anomaly_indices] * 5 # Extreme values
data$feature2[anomaly_indices] <- data$feature2[anomaly_indices] * 3
data$feature3[anomaly_indices] <- data$feature3[anomaly_indices] * 10
# Create ground truth labels and add to data
data$is_error <- rep(FALSE, n)
data$is_error[anomaly_indices] <- TRUE
cat("Total anomalies in ground truth:", sum(data$is_error), "\n")
#> Total anomalies in ground truth: 37# Score anomalies with ground truth for benchmarking
scored_data <- score_anomaly(
data,
method = "iforest",
contamination = 0.05,
ground_truth_col = "is_error"
)
#> Warning in (function (data, sample_size = min(nrow(data), 10000L), ntrees =
#> 500, : Attempting to use more than 1 thread, but package was compiled without
#> OpenMP support. See
#> https://github.com/david-cortes/installing-optimized-libraries#4-macos-install-and-enable-openmp
# Check if benchmarking was performed
if (!is.null(attr(scored_data, "benchmark_metrics"))) {
cat("Benchmarking metrics available!\n")
}
#> Benchmarking metrics available!# Extract benchmark metrics
metrics <- extract_benchmark_metrics(scored_data)
# Display metrics
cat("AUC-ROC:", round(metrics$auc_roc, 4), "\n")
#> AUC-ROC: 1
cat("AUC-PR:", round(metrics$auc_pr, 4), "\n")
#> AUC-PR: 0.0187
cat("Contamination Rate:", round(metrics$contamination_rate * 100, 2), "%\n")
#> Contamination Rate: 5 %
cat("\nTop-K Recall:\n")
#>
#> Top-K Recall:
for (k_name in names(metrics$top_k_recall)) {
k_value <- gsub("top_", "", k_name)
recall <- metrics$top_k_recall[[k_name]]
cat(" Top", k_value, ":", round(recall * 100, 2), "%\n")
}
#> Top 10 : 0 %
#> Top 50 : 0 %
#> Top 100 : 0 %
#> Top 500 : 0 %# Compare Isolation Forest vs LOF
methods <- c("iforest", "lof")
results <- list()
for (method in methods) {
scored <- score_anomaly(
data,
method = method,
contamination = 0.05,
ground_truth_col = "is_error"
)
metrics <- extract_benchmark_metrics(scored)
results[[method]] <- metrics
}
#> Warning in (function (data, sample_size = min(nrow(data), 10000L), ntrees =
#> 500, : Attempting to use more than 1 thread, but package was compiled without
#> OpenMP support. See
#> https://github.com/david-cortes/installing-optimized-libraries#4-macos-install-and-enable-openmp
# Create comparison table
comparison <- data.frame(
Method = c("Isolation Forest", "Local Outlier Factor"),
AUC_ROC = c(results$iforest$auc_roc, results$lof$auc_roc),
AUC_PR = c(results$iforest$auc_pr, results$lof$auc_pr),
Top_10_Recall = c(results$iforest$top_k_recall$top_10, results$lof$top_k_recall$top_10)
)
comparison
#> Method AUC_ROC AUC_PR Top_10_Recall
#> 1 Isolation Forest 1.0000000 0.01873248 0.0
#> 2 Local Outlier Factor 0.4900508 0.04805959 0.1AUC-ROC: Area under the ROC curve. Higher is better (max = 1.0). Measures the ability to distinguish between normal and anomalous records.
AUC-PR: Area under the Precision-Recall curve. Higher is better (max = 1.0). More informative than AUC-ROC when classes are imbalanced.
Top-K Recall: Percentage of true anomalies found in the top K highest-scoring records. Useful for prioritizing manual review.
Benchmarking with ground truth labels allows you to: - Quantitatively evaluate algorithm performance - Compare different methods and parameters - Report publication-quality metrics - Validate detection capabilities before deployment
For more details, see the Function Reference.