#!/usr/bin/env Rscript `%||%` <- function(lhs, rhs) { if (is.null(lhs) || is.na(lhs)) rhs else lhs } suppressPackageStartupMessages({ library(jsonlite) library(readr) }) args <- commandArgs(trailingOnly = TRUE) if (length(args) < 2) { stop("Expected arguments: ") } data_file_path <- args[1] params_json <- args[2] if (!file.exists(data_file_path)) { stop(sprintf("Data file not found at %s", data_file_path)) } params <- tryCatch( fromJSON(params_json), error = function(e) list() ) target_column <- params$column if (is.null(target_column) || target_column == "") { stop("Parameter 'column' is required for category_frequency.R") } top_n <- as.integer(params$top_n %||% 10) include_missing <- isTRUE(params$include_missing) data <- tryCatch( read_csv( file = data_file_path, show_col_types = FALSE, progress = FALSE, locale = locale(encoding = params$encoding %||% "UTF-8") ), error = function(e) stop(sprintf("Failed to read CSV: %s", e$message)) ) if (!target_column %in% names(data)) { stop(sprintf("Column '%s' not found in dataset", target_column)) } column_vector <- data[[target_column]] if (!include_missing) { column_vector <- column_vector[!is.na(column_vector)] } frequency_table <- sort(table(column_vector, useNA = if (include_missing) "always" else "no"), decreasing = TRUE) freq_df <- head(as.data.frame(frequency_table, stringsAsFactors = FALSE), top_n) names(freq_df) <- c("value", "count") output <- list( message = sprintf("Top %s frequency distribution for '%s'.", top_n, target_column), analyzed_column = target_column, top_n = top_n, include_missing = include_missing, frequencies = freq_df ) cat( toJSON( output, pretty = TRUE, auto_unbox = TRUE, na = "null", dataframe = "rows" ) )