#!/usr/bin/env Rscript `%||%` <- function(lhs, rhs) { if (is.null(lhs) || is.na(lhs)) rhs else lhs } suppressPackageStartupMessages({ library(jsonlite) library(readr) library(dplyr) }) args <- commandArgs(trailingOnly = TRUE) if (length(args) < 2) { stop("Expected arguments: ") } data_file_path <- args[1] params_json <- args[2] if (!file.exists(data_file_path)) { stop(sprintf("Data file not found at %s", data_file_path)) } params <- tryCatch( fromJSON(params_json), error = function(e) list() ) read_opts <- list( locale = locale(encoding = params$encoding %||% "UTF-8") ) data <- tryCatch( read_csv( file = data_file_path, show_col_types = FALSE, progress = FALSE, guess_max = params$guess_max %||% 1000, locale = read_opts$locale ), error = function(e) stop(sprintf("Failed to read CSV: %s", e$message)) ) numeric_columns <- select(data, where(is.numeric)) column_stats <- lapply(numeric_columns, function(column) { list( count = sum(!is.na(column)), mean = mean(column, na.rm = TRUE), median = median(column, na.rm = TRUE), sd = sd(column, na.rm = TRUE), min = min(column, na.rm = TRUE), max = max(column, na.rm = TRUE), missing = sum(is.na(column)) ) }) message_text <- if (length(column_stats) > 0) { "Descriptive statistics generated successfully." } else { "No numeric columns detected; returning dataset preview only." } preview_rows <- head(data, n = min(5, nrow(data))) output <- list( message = message_text, numeric_columns = column_stats, sample_rows = preview_rows, params = params ) cat( toJSON( output, pretty = TRUE, auto_unbox = TRUE, na = "null", dataframe = "rows" ) )