From 603986d8bf790b425bd76415422692d784293c56 Mon Sep 17 00:00:00 2001 From: Kara Woo Date: Tue, 24 Feb 2026 10:11:22 -0800 Subject: [PATCH 1/4] improve performance of parse_connectapi_typed --- R/parse.R | 78 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 27 deletions(-) diff --git a/R/parse.R b/R/parse.R index c917f2da..2e141d37 100644 --- a/R/parse.R +++ b/R/parse.R @@ -47,13 +47,12 @@ ensure_columns <- function(.data, ptype, strict = FALSE) { ensure_column <- function(data, default, name) { stopifnot(length(default) == 1) col <- data[[name]] - scoped_experimental_silence() if (rlang::is_null(col)) { col <- vctrs::vec_rep(default, nrow(data)) col <- vctrs::vec_cast(col, default) } else { if ( - vctrs::vec_is(default, NA_datetime_) && !vctrs::vec_is(col, NA_datetime_) + inherits(default, "POSIXct") && !inherits(col, "POSIXct") ) { # manual fix because vctrs::vec_cast cannot cast double -> datetime or char -> datetime col <- coerce_datetime(col, default, name = name) @@ -89,19 +88,31 @@ parse_connectapi_typed <- function(data, ptype, strict = FALSE) { parse_connectapi <- function(data) { if (length(data) == 0) return(tibble::tibble()) - all_names <- unique(unlist(lapply(data, names))) - cols <- stats::setNames(lapply(all_names, function(nm) { - # NULL / missing fields become NA; unlist() will coerce to the right type - values <- lapply(data, function(row) row[[nm]] %||% NA) - if (any(vapply(values, function(v) is.list(v) || length(v) > 1, logical(1)))) { - # List column: wrap scalars so every element is a list + all_names <- unique(unlist(lapply(data, names), use.names = FALSE)) + n <- length(data) + + cols <- lapply(all_names, function(nm) { + # .subset2 is the internal no-dispatch version of `[[` + values <- lapply(data, .subset2, nm) + nulls <- vapply(values, is.null, logical(1)) + + # Determine column type from first non-NULL value + is_list_col <- FALSE + if (!all(nulls)) { + first_val <- values[[which.min(nulls)]] + is_list_col <- is.list(first_val) || length(first_val) > 1L + } + + values[nulls] <- list(NA) + + if (is_list_col) { lapply(values, function(v) if (is.list(v)) v else list(v)) } else { - # Scalar column: simplify to a vector - unlist(values) + unlist(values, use.names = FALSE) } - }), all_names) - tibble::as_tibble(cols) + }) + names(cols) <- all_names + tibble::new_tibble(cols, nrow = n) } coerce_fsbytes <- function(x, to, ...) { @@ -157,21 +168,34 @@ coerce_datetime <- function(x, to, ...) { # - "2020-01-01T00:02:03-01:00" # nolint end parse_connect_rfc3339 <- function(x) { - # Convert timestamps with offsets to a format recognized by `strptime`. - x <- gsub("([+-]\\d\\d):(\\d\\d)$", "\\1\\2", x) - x <- gsub("Z$", "+0000", x) - - # Parse with an inner call to `strptime()`, which returns a POSIXlt object, - # and convert that to `POSIXct`. - # - # We must specify `tz` in the inner call to correctly compute date math. - # Specifying `tz` when in the outer call just changes the time zone without - # doing any date math! - # - # > xlt [1] "2024-08-29 16:36:33 EDT" tzone(xlt) [1] "America/New_York" - # as.POSIXct(xlt, tz = "UTC") [1] "2024-08-29 16:36:33 UTC" - format_string <- "%Y-%m-%dT%H:%M:%OS%z" - as.POSIXct(x, format = format_string, tz = Sys.timezone()) + if (length(x) == 0) return(.POSIXct(double(), tz = Sys.timezone())) + + # The date portion is always at fixed positions: YYYY-MM-DDTHH:MM:SS + dates <- as.Date(substr(x, 1, 10)) + hour <- as.integer(substr(x, 12, 13)) + min <- as.integer(substr(x, 15, 16)) + + # Seconds (with optional fractional part) run from position 18 to just before + # the timezone suffix. The suffix is either "Z" (1 char) or "+HH:MM" (6 chars). + nc <- nchar(x) + is_utc <- endsWith(x, "Z") + tz_len <- ifelse(is_utc, 1L, 6L) + sec <- as.double(substr(x, 18, nc - tz_len)) + + # Compute timezone offset in seconds for non-UTC timestamps + tz_offset <- rep(0, length(x)) + non_utc <- which(!is_utc) + if (length(non_utc) > 0) { + tz_str <- substr(x[non_utc], nc[non_utc] - 5, nc[non_utc]) + tz_sign <- ifelse(substr(tz_str, 1, 1) == "+", 1, -1) + tz_h <- as.integer(substr(tz_str, 2, 3)) + tz_m <- as.integer(substr(tz_str, 5, 6)) + tz_offset[non_utc] <- tz_sign * (tz_h * 3600L + tz_m * 60L) + } + + # Build epoch seconds directly: date days * 86400 + time of day - tz offset + epoch <- as.double(dates) * 86400 + hour * 3600 + min * 60 + sec - tz_offset + .POSIXct(epoch, tz = Sys.timezone()) } vec_cast.POSIXct.double <- # nolint: object_name_linter From d6cf43b156fe4b2aeed33405fef6b3b007838679 Mon Sep 17 00:00:00 2001 From: Kara Woo Date: Tue, 24 Feb 2026 12:04:47 -0800 Subject: [PATCH 2/4] remove ptype and simplify parsing --- DESCRIPTION | 1 - R/connect.R | 28 +++- R/connectapi.R | 2 - R/content.R | 47 ++++-- R/get.R | 34 +++-- R/groups.R | 9 +- R/integrations.R | 5 +- R/lazy.R | 70 ++++++--- R/parse.R | 192 ++++++++++-------------- R/ptype.R | 269 ---------------------------------- R/remote.R | 2 +- R/user.R | 1 + R/variant.R | 9 +- man/PositConnect.Rd | 18 ++- tests/integrated/helper.R | 25 +++- tests/integrated/test-get.R | 48 +++--- tests/integrated/test-lazy.R | 35 +++-- tests/testthat/setup.R | 12 +- tests/testthat/test-content.R | 55 +------ tests/testthat/test-get.R | 28 +--- tests/testthat/test-parse.R | 237 ++++++++++++++++-------------- 21 files changed, 445 insertions(+), 682 deletions(-) delete mode 100644 R/ptype.R diff --git a/DESCRIPTION b/DESCRIPTION index 9191dc5a..19c4ee61 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -87,7 +87,6 @@ Collate: 'page.R' 'parse.R' 'promote.R' - 'ptype.R' 'remote.R' 'runtime-caches.R' 'schedule.R' diff --git a/R/connect.R b/R/connect.R index 0701d670..4c52b807 100644 --- a/R/connect.R +++ b/R/connect.R @@ -138,8 +138,11 @@ Connect <- R6::R6Class( #' @param parser How the response is parsed. If `NULL`, the `httr_response` #' will be returned. Otherwise, the argument is forwarded to #' `httr::content(res, as = parser)`. + #' @param simplify Logical; if `TRUE`, JSON arrays of objects are + #' simplified to data frames by jsonlite. Default `FALSE` preserves + #' list-of-lists for compatibility with pagination helpers. #' @param ... Additional arguments passed to the request function - request = function(method, url, ..., parser = "parsed") { + request = function(method, url, ..., parser = "parsed", simplify = FALSE) { old_opt <- options(scipen = 999) on.exit(options(old_opt), add = TRUE) @@ -161,7 +164,21 @@ Connect <- R6::R6Class( res } else { self$raise_error(res) - httr::content(res, as = parser) + if (parser != "parsed") { + return(httr::content(res, as = parser)) + } + if (is.null(res$content) || length(res$content) == 0) { + return(NULL) + } + content_text <- httr::content(res, as = "text", encoding = "UTF-8") + if (is.null(content_text) || nchar(content_text) == 0) { + return(NULL) + } + jsonlite::fromJSON( + content_text, + simplifyVector = simplify, + simplifyDataFrame = simplify + ) } }, @@ -173,8 +190,11 @@ Connect <- R6::R6Class( #' @param parser How the response is parsed. If `NULL`, the `httr_response` #' will be returned. Otherwise, the argument is forwarded to #' `httr::content(res, as = parser)`. - GET = function(path, ..., url = self$api_url(path), parser = "parsed") { - self$request("GET", url, parser = parser, ...) + #' @param simplify Logical; if `TRUE`, JSON arrays of objects are + #' simplified to data frames by jsonlite. Default `FALSE` preserves + #' list-of-lists for compatibility with pagination helpers. + GET = function(path, ..., url = self$api_url(path), parser = "parsed", simplify = FALSE) { + self$request("GET", url, parser = parser, simplify = simplify, ...) }, #' @description Perform an HTTP PUT request of the named API path. diff --git a/R/connectapi.R b/R/connectapi.R index 5b625696..bc430527 100644 --- a/R/connectapi.R +++ b/R/connectapi.R @@ -10,7 +10,6 @@ utils::globalVariables( c( ".", "access_type", - "connectapi_ptypes", "guid", "last_deployed_time", "owner_guid", @@ -27,6 +26,5 @@ current_connect_version <- "2024.03.0" .onLoad <- function(...) { vctrs::s3_register("dplyr::collect", "tbl_connect") - vctrs::s3_register("vctrs::vec_cast", "character.integer") invisible() } diff --git a/R/content.R b/R/content.R index 486c7e39..df7d23c8 100644 --- a/R/content.R +++ b/R/content.R @@ -788,7 +788,23 @@ get_jobs <- function(content) { validate_R6_class(content, "Content") jobs <- content$jobs() - parse_connectapi_typed(jobs, connectapi_ptypes$jobs, strict = TRUE) + out <- parse_connectapi_typed( + jobs, + datetime_cols = c("start_time", "end_time", "last_heartbeat_time", "queued_time") + ) + + # The older /applications/ endpoint returns timestamps as Unix epoch integers + # and ID fields as integers. Normalize to match the v1 endpoint's types. + # For the v1 endpoint these are already character/POSIXct, so the coercions + # are no-ops. + out <- coerce_epoch_to_posixct( + out, + c("start_time", "end_time", "last_heartbeat_time", "queued_time") + ) + coerce_to_character( + out, + c("id", "ppid", "pid", "app_id", "content_id", "variant_id", "bundle_id") + ) } #' Terminate Jobs @@ -832,22 +848,19 @@ terminate_jobs <- function(content, keys = NULL) { keys <- all_jobs[all_jobs$status == 0, ]$key if (length(keys) == 0) { message("No active jobs found.") - return(vctrs::vec_ptype(connectapi_ptypes$job_termination)) + return(tibble::tibble()) } } res <- purrr::map(keys, content$register_job_kill_order) res_content <- purrr::map(res, httr::content) - res_df <- tibble::tibble( - parse_connectapi_typed( - res_content, - connectapi_ptypes$job_termination, - strict = TRUE - ) - ) + res_df <- parse_connectapi_typed(res_content) # Errors will not have the job_key. res_df$job_key <- keys - res_df + # Keep only the columns relevant to job termination; the API response + # includes extra fields (e.g. payload, guid) on error that vary by outcome. + keep <- c("app_id", "app_guid", "job_key", "job_id", "result", "code", "error") + res_df[, intersect(keep, names(res_df)), drop = FALSE] } #' @rdname get_jobs @@ -896,7 +909,7 @@ get_log <- function(job, max_log_lines = NULL) { v1_url("content", job$app_guid, "jobs", job$key, "log"), query = query ) - parse_connectapi_typed(res$entries, connectapi_ptypes$job_log) + parse_connectapi_typed(res$entries, datetime_cols = "timestamp") } #' Set RunAs User @@ -1141,7 +1154,8 @@ get_bundles <- function(content) { validate_R6_class(content, "Content") bundles <- content$get_bundles() - parse_connectapi_typed(bundles, connectapi_ptypes$bundles) + out <- parse_connectapi_typed(bundles, datetime_cols = "created_time") + coerce_fs_bytes(out, "size") } #' @rdname get_bundles @@ -1347,7 +1361,7 @@ get_group_permission <- function(content, guid) { get_content_permissions <- function(content, add_owner = TRUE) { validate_R6_class(content, "Content") res <- content$permissions(add_owner = add_owner) - parse_connectapi_typed(res, connectapi_ptypes$permissions) + parse_connectapi_typed(res) } #' Render a content item. @@ -1495,7 +1509,7 @@ content_restart <- function(content) { get_content_packages <- function(content) { error_if_less_than(content$connect$version, "2025.01.0") res <- content$packages() - parse_connectapi_typed(res, connectapi_ptypes$content_packages) + parse_connectapi_typed(res) } #' Search for content on the Connect server @@ -1627,5 +1641,8 @@ as.data.frame.connect_content_list <- function( #' @export as_tibble.connect_content_list <- function(x, ...) { content_data <- purrr::map(x, "content") - parse_connectapi_typed(content_data, connectapi_ptypes$content) + parse_connectapi_typed( + content_data, + datetime_cols = c("created_time", "last_deployed_time") + ) } diff --git a/R/get.R b/R/get.R index 14ee3d31..e36a4bd4 100644 --- a/R/get.R +++ b/R/get.R @@ -73,7 +73,10 @@ get_users <- function( limit = limit ) - out <- parse_connectapi_typed(res, connectapi_ptypes$users) + out <- parse_connectapi_typed( + res, + datetime_cols = c("created_time", "updated_time", "active_time") + ) return(out) } @@ -229,12 +232,8 @@ get_content <- function( # v2024.06.0. if (compare_connect_version(src$version, "2024.06.0") < 0) { include <- "tags,owner" - content_ptype <- connectapi_ptypes$content[, - names(connectapi_ptypes$content) != "vanity_url" - ] } else { include <- "tags,owner,vanity_url" - content_ptype <- connectapi_ptypes$content } res <- src$content( @@ -253,7 +252,10 @@ get_content <- function( res <- res %>% purrr::keep(.p = .p) } - out <- parse_connectapi_typed(res, content_ptype) + out <- parse_connectapi_typed( + res, + datetime_cols = c("created_time", "last_deployed_time") + ) return(out) } @@ -327,7 +329,10 @@ content_list_by_tag <- function(src, tag) { res <- src$GET(v1_url("tags", tag_id, "content")) - out <- parse_connectapi_typed(res, connectapi_ptypes$content) + out <- parse_connectapi_typed( + res, + datetime_cols = c("created_time", "last_deployed_time") + ) return(out) } @@ -425,7 +430,7 @@ get_usage_shiny <- function( res <- page_cursor(src, res, limit = limit) - out <- parse_connectapi_typed(res, connectapi_ptypes$usage_shiny) + out <- parse_connectapi_typed(res, datetime_cols = c("started", "ended")) return(out) } @@ -521,7 +526,7 @@ get_usage_static <- function( res <- page_cursor(src, res, limit = limit) - out <- parse_connectapi_typed(res, connectapi_ptypes$usage_static) + out <- parse_connectapi_typed(res, datetime_cols = "time") return(out) } @@ -658,7 +663,7 @@ as.data.frame.connect_list_hits <- function( ..., unnest = TRUE ) { - usage_df <- parse_connectapi_typed(x, connectapi_ptypes$usage) + usage_df <- parse_connectapi_typed(x, datetime_cols = "timestamp") if (unnest) { if (!requireNamespace("tidyr", quietly = TRUE)) { stop( @@ -750,7 +755,7 @@ get_audit_logs <- function( res <- page_cursor(src, res, limit = limit) - out <- parse_connectapi_typed(res, connectapi_ptypes$audit_logs) + out <- parse_connectapi_typed(res, datetime_cols = "time") return(out) } @@ -792,7 +797,8 @@ get_procs <- function(src) { c(list(pid = y), x) } ) - tbl_data <- parse_connectapi_typed(proc_prep, connectapi_ptypes$procs) + tbl_data <- parse_connectapi_typed(proc_prep) + tbl_data <- coerce_fs_bytes(tbl_data, "ram") return(tbl_data) } @@ -1203,7 +1209,7 @@ get_packages <- function(src, name = NULL, page_size = 100000, limit = Inf) { page_size = page_size ) ) - out <- parse_connectapi_typed(res, connectapi_ptypes$packages) + out <- parse_connectapi_typed(res) # Connect is standardizing on using `content_id` and `content_guid`. # Handle that name change now in a forward-compatible way. @@ -1239,5 +1245,5 @@ get_packages <- function(src, name = NULL, page_size = 100000, limit = Inf) { #' @export get_vanity_urls <- function(client) { res <- client$vanities() - parse_connectapi_typed(res, connectapi_ptypes$vanities) + parse_connectapi_typed(res, datetime_cols = "created_time") } diff --git a/R/groups.R b/R/groups.R index 79d98edb..1652edb3 100644 --- a/R/groups.R +++ b/R/groups.R @@ -53,7 +53,7 @@ get_groups <- function(src, page_size = 500, prefix = NULL, limit = Inf) { limit = limit ) - parse_connectapi_typed(res, connectapi_ptypes$groups) + parse_connectapi_typed(res) } #' Get users within a specific group @@ -106,7 +106,10 @@ get_group_members <- function(src, guid) { res <- src$group_members(guid) - parse_connectapi(res$results) + parse_connectapi_typed( + res$results, + datetime_cols = c("created_time", "updated_time", "active_time") + ) } #' Get content access permissions for a group or groups @@ -172,7 +175,7 @@ get_one_groups_content <- function(src, guid) { role = NA_character_ )) } - parsed <- parse_connectapi_typed(res, connectapi_ptypes$group_content) + parsed <- parse_connectapi_typed(res) permissions_df <- purrr::list_rbind( purrr::map( diff --git a/R/integrations.R b/R/integrations.R index ad833bbc..d421871d 100644 --- a/R/integrations.R +++ b/R/integrations.R @@ -136,7 +136,10 @@ as.data.frame.connect_integration_list <- function( #' @return A tibble with one row per integration. #' @export as_tibble.connect_integration_list <- function(x, ...) { - parse_connectapi_typed(x, connectapi_ptypes$integrations) + parse_connectapi_typed( + x, + datetime_cols = c("created_time", "updated_time") + ) } # Integration class ---- diff --git a/R/lazy.R b/R/lazy.R index cbb03887..1ca2136d 100644 --- a/R/lazy.R +++ b/R/lazy.R @@ -37,9 +37,12 @@ tbl_connect <- function( from <- arg_match(from) - # TODO: go get the vars we should expect... - vars <- connectapi_ptypes[[from]] - if (is.null(vars)) vars <- character() + # Discover column names from a small API request rather than maintaining a + # hardcoded dictionary that must be updated every time the server changes. + vars <- tryCatch( + names(tbl_lazy_fetch(src, from, limit = 1)), + error = function(e) character() + ) # TODO: figure out number of rows... ops <- op_base_connect(from, vars) @@ -47,6 +50,44 @@ tbl_connect <- function( dplyr::make_tbl(c("connect", "lazy"), src = src, ops = ops) } +# Datetime columns for each lazy table type. These must match the +# datetime_cols passed in the corresponding getter functions: +# get_users(), get_groups(), get_content(), get_usage_shiny(), +# get_usage_static(), get_audit_logs(). +lazy_datetime_cols <- list( + users = c("created_time", "updated_time", "active_time"), + groups = character(), + content = c("created_time", "last_deployed_time"), + usage_shiny = c("started", "ended"), + usage_static = "time", + audit_logs = "time" +) + +# Fetch data for a lazy table endpoint. Shared by tbl_connect (for column +# discovery) and api_build.op_base_connect (for full collection). +tbl_lazy_fetch <- function(src, from, limit = Inf) { + if (from == "users") { + res <- page_offset(src, src$users(), limit = limit) + } else if (from == "groups") { + res <- page_offset(src, src$groups(), limit = limit) + } else if (from == "content") { + # TODO: no limit notion here... we just pull all of them... + res <- src$content() + } else if (from == "usage_shiny") { + res <- src$inst_shiny_usage(limit = limit) %>% + page_cursor(src, ., limit = limit) + } else if (from == "usage_static") { + res <- src$inst_content_visits(limit = limit) %>% + page_cursor(src, ., limit = limit) + } else if (from == "audit_logs") { + res <- src$audit_logs(limit = limit) %>% + page_cursor(src, ., limit = limit) + } else { + stop(glue::glue("'{from}' is not recognized")) + } + parse_connectapi_typed(res, datetime_cols = lazy_datetime_cols[[from]]) +} + # This will be registered in .onLoad if dplyr is available collect.tbl_connect <- # nolint function(x, ..., n = Inf) { @@ -65,23 +106,7 @@ api_build.op_head <- function(op, con, ..., n) { #' @export api_build.op_base_connect <- function(op, con, ..., n) { - if (op$x == "users") { - res <- page_offset(con, con$users(), limit = n) - } else if (op$x == "groups") { - res <- page_offset(con, con$groups(), limit = n) - } else if (op$x == "content") { - # TODO: no limit notion here... we just pull all of them... - res <- con$content() - } else if (op$x == "usage_shiny") { - res <- con$inst_shiny_usage(limit = n) %>% page_cursor(con, ., limit = n) - } else if (op$x == "usage_static") { - res <- con$inst_content_visits(limit = n) %>% page_cursor(con, ., limit = n) - } else if (op$x == "audit_logs") { - res <- con$audit_logs(limit = n) %>% page_cursor(con, ., limit = n) - } else { - stop(glue::glue("'{op$x}' is not recognized")) - } - parse_connectapi_typed(res, op$ptype) + tbl_lazy_fetch(con, op$x, limit = n) } cat_line <- function(...) { @@ -124,13 +149,12 @@ op_base_connect <- function(x, vars) { } op_base <- function(x, vars, class = character()) { - stopifnot(is.character(vars) || is.character(names(vars))) + stopifnot(is.character(vars)) structure( list( x = x, - vars = names(vars), - ptype = vars + vars = vars ), class = c(paste0("op_base_", class), "op_base", "op") ) diff --git a/R/parse.R b/R/parse.R index 2e141d37..ae3cc59a 100644 --- a/R/parse.R +++ b/R/parse.R @@ -27,64 +27,80 @@ make_timestamp <- function(input) { safe_format(input, "%Y-%m-%dT%H:%M:%SZ", tz = "UTC", usetz = FALSE) } -ensure_columns <- function(.data, ptype, strict = FALSE) { - # Given a prototype, ensure that all columns are present and cast to the correct type. - # If a column is missing in .data, it will be created with all missing values of the correct type. - # If a column is present in both, it will be cast to the correct type. - # If a column is present in .data but not in ptype, it will be left as is. - # If `strict == TRUE`, include only columns present in the ptype, in the order they occur. - for (i in names(ptype)) { - .data <- ensure_column(.data, ptype[[i]], i) - } +# Post-parse helpers for special column types. These are used by individual +# getter functions to coerce columns that jsonlite cannot infer automatically +# (e.g. byte sizes, 64-bit integers, epoch timestamps). - if (strict) { - .data <- .data[, names(ptype), drop = FALSE] +coerce_fs_bytes <- function(df, col) { + if (col %in% names(df)) { + df[[col]] <- fs::as_fs_bytes(df[[col]]) } - - .data + df } -ensure_column <- function(data, default, name) { - stopifnot(length(default) == 1) - col <- data[[name]] - if (rlang::is_null(col)) { - col <- vctrs::vec_rep(default, nrow(data)) - col <- vctrs::vec_cast(col, default) - } else { - if ( - inherits(default, "POSIXct") && !inherits(col, "POSIXct") - ) { - # manual fix because vctrs::vec_cast cannot cast double -> datetime or char -> datetime - col <- coerce_datetime(col, default, name = name) - } - - if (inherits(default, "fs_bytes") && !inherits(col, "fs_bytes")) { - col <- coerce_fsbytes(col, default) - } +coerce_integer64 <- function(df, col) { + if (col %in% names(df)) { + df[[col]] <- bit64::as.integer64(df[[col]]) + } + df +} - if (inherits(default, "integer64") && !inherits(col, "integer64")) { - col <- bit64::as.integer64(col) +coerce_epoch_to_posixct <- function(df, cols) { + for (col in intersect(cols, names(df))) { + if (is.numeric(df[[col]])) { + df[[col]] <- .POSIXct(as.double(df[[col]]), tz = Sys.timezone()) } + } + df +} - if (inherits(default, "list") && !inherits(col, "list")) { - col <- list(col) +coerce_to_character <- function(df, cols) { + for (col in intersect(cols, names(df))) { + if (is.numeric(df[[col]])) { + df[[col]] <- as.character(df[[col]]) } + } + df +} - col <- vctrs::vec_cast(col, default, x_arg = name) +parse_connectapi_typed <- function(data, datetime_cols = character()) { + if (inherits(data, "data.frame")) { + # Strip custom S3 classes to avoid dispatch loops (e.g., connect_list_hits + # defines as_tibble which calls parse_connectapi_typed, causing recursion) + class(data) <- "data.frame" + df <- tibble::as_tibble(data) + } else { + # Fallback for list-of-lists (backward compat, non-simplified responses) + df <- parse_connectapi(data) } - data[[name]] <- col - data + for (col in intersect(datetime_cols, names(df))) { + df[[col]] <- coerce_datetime(df[[col]]) + } + df } -parse_connectapi_typed <- function(data, ptype, strict = FALSE) { - ensure_columns(parse_connectapi(data), ptype, strict) +# Coerce a column to POSIXct. Handles character (RFC 3339), numeric (epoch +# seconds), POSIXct (pass-through), and all-NA logical vectors. +coerce_datetime <- function(x) { + if (is.null(x)) { + .POSIXct(double(), tz = Sys.timezone()) + } else if (is.character(x)) { + parse_connect_rfc3339(x) + } else if (is.numeric(x)) { + .POSIXct(as.double(x), tz = Sys.timezone()) + } else if (inherits(x, "POSIXct")) { + x + } else if (is.logical(x) && all(is.na(x))) { + .POSIXct(rep(NA_real_, length(x)), tz = Sys.timezone()) + } else { + stop("Cannot coerce ", class(x)[[1]], " to POSIXct", call. = FALSE) + } } # Build a tibble column-by-column instead of row-by-row (via list_rbind). # This avoids type conflicts when the same field is NULL in some rows and # non-NULL in others: NULL -> NA, and unlist() coerces that NA to match the -# type of the non-null values in the same column. ensure_columns() handles -# any further type coercion (e.g. character -> POSIXct) after this step. +# type of the non-null values in the same column. parse_connectapi <- function(data) { if (length(data) == 0) return(tibble::tibble()) @@ -115,43 +131,6 @@ parse_connectapi <- function(data) { tibble::new_tibble(cols, nrow = n) } -coerce_fsbytes <- function(x, to, ...) { - if (is.numeric(x)) { - fs::as_fs_bytes(x) - } else { - vctrs::stop_incompatible_cast(x = x, to = to, x_arg = "x", to_arg = "to") - } -} - -# name - optional. Must be named, the name of the variable / column being converted -coerce_datetime <- function(x, to, ...) { - tmp_name <- rlang::dots_list(...)[["name"]] - if (is.null(tmp_name) || is.na(tmp_name) || !is.character(tmp_name)) { - tmp_name <- "x" - } - - if (is.null(x)) { - as.POSIXct(character(), tz = tzone(to)) - } else if (is.numeric(x)) { - vctrs::new_datetime(as.double(x), tzone = tzone(to)) - } else if (is.character(x)) { - parse_connect_rfc3339(x) - } else if (inherits(x, "POSIXct")) { - x - } else if ( - all(is.logical(x) & is.na(x)) && length(is.logical(x) & is.na(x)) > 0 - ) { - NA_datetime_ - } else { - vctrs::stop_incompatible_cast( - x = x, - to = to, - x_arg = tmp_name, - to_arg = "to" - ) - } -} - # nolint start: commented_code_linter # Parses a character vector of dates received from Connect, using use RFC 3339, # returning a vector of POSIXct datetimes. @@ -170,23 +149,31 @@ coerce_datetime <- function(x, to, ...) { parse_connect_rfc3339 <- function(x) { if (length(x) == 0) return(.POSIXct(double(), tz = Sys.timezone())) + na_mask <- is.na(x) + if (all(na_mask)) { + return(.POSIXct(rep(NA_real_, length(x)), tz = Sys.timezone())) + } + + result <- rep(NA_real_, length(x)) + xn <- x[!na_mask] + # The date portion is always at fixed positions: YYYY-MM-DDTHH:MM:SS - dates <- as.Date(substr(x, 1, 10)) - hour <- as.integer(substr(x, 12, 13)) - min <- as.integer(substr(x, 15, 16)) + dates <- as.Date(substr(xn, 1, 10)) + hour <- as.integer(substr(xn, 12, 13)) + min <- as.integer(substr(xn, 15, 16)) # Seconds (with optional fractional part) run from position 18 to just before # the timezone suffix. The suffix is either "Z" (1 char) or "+HH:MM" (6 chars). - nc <- nchar(x) - is_utc <- endsWith(x, "Z") + nc <- nchar(xn) + is_utc <- endsWith(xn, "Z") tz_len <- ifelse(is_utc, 1L, 6L) - sec <- as.double(substr(x, 18, nc - tz_len)) + sec <- as.double(substr(xn, 18, nc - tz_len)) # Compute timezone offset in seconds for non-UTC timestamps - tz_offset <- rep(0, length(x)) + tz_offset <- rep(0, length(xn)) non_utc <- which(!is_utc) if (length(non_utc) > 0) { - tz_str <- substr(x[non_utc], nc[non_utc] - 5, nc[non_utc]) + tz_str <- substr(xn[non_utc], nc[non_utc] - 5, nc[non_utc]) tz_sign <- ifelse(substr(tz_str, 1, 1) == "+", 1, -1) tz_h <- as.integer(substr(tz_str, 2, 3)) tz_m <- as.integer(substr(tz_str, 5, 6)) @@ -195,35 +182,6 @@ parse_connect_rfc3339 <- function(x) { # Build epoch seconds directly: date days * 86400 + time of day - tz offset epoch <- as.double(dates) * 86400 + hour * 3600 + min * 60 + sec - tz_offset - .POSIXct(epoch, tz = Sys.timezone()) -} - -vec_cast.POSIXct.double <- # nolint: object_name_linter - function(x, to, ...) { - warn_experimental("vec_cast.POSIXct.double") - vctrs::new_datetime(x, tzone = tzone(to)) - } - -vec_cast.POSIXct.character <- # nolint: object_name_linter - function(x, to, ...) { - as.POSIXct(x, tz = tzone(to)) - } - -tzone <- function(x) { - attr(x, "tzone")[[1]] %||% "" -} - -vec_cast.character.integer <- # nolint: object_name_linter - function(x, to, ...) { - as.character(x) - } - -new_datetime <- function(x = double(), tzone = "") { - tzone <- tzone %||% "" - if (is.integer(x)) { - x <- as.double(x) - } - stopifnot(is.double(x)) - stopifnot(is.character(tzone)) - structure(x, tzone = tzone, class = c("POSIXct", "POSIXt")) + result[!na_mask] <- epoch + .POSIXct(result, tz = Sys.timezone()) } diff --git a/R/ptype.R b/R/ptype.R deleted file mode 100644 index 6e274f1e..00000000 --- a/R/ptype.R +++ /dev/null @@ -1,269 +0,0 @@ -NA_datetime_ <- # nolint: object_name_linter - vctrs::new_datetime(NA_real_, tzone = Sys.timezone()) -NA_list_ <- # nolint: object_name_linter - list(list()) - -connectapi_ptypes <- list( - users = tibble::tibble( - "email" = NA_character_, - "username" = NA_character_, - "first_name" = NA_character_, - "last_name" = NA_character_, - "user_role" = NA_character_, - "created_time" = NA_datetime_, - "updated_time" = NA_datetime_, - "active_time" = NA_datetime_, - "confirmed" = FALSE, - "locked" = FALSE, - "external_id" = NA_character_, - "guid" = NA_character_ - ), - groups = tibble::tibble( - "guid" = NA_character_, - "name" = NA_character_, - "owner_guid" = NA_character_, - "gid" = NA_character_ - ), - usage_shiny = tibble::tibble( - "content_guid" = NA_character_, - "user_guid" = NA_character_, - "started" = NA_datetime_, - "ended" = NA_datetime_, - "data_version" = NA_integer_ - ), - usage_static = tibble::tibble( - "content_guid" = NA_character_, - "user_guid" = NA_character_, - "variant_key" = NA_character_, - "time" = NA_datetime_, - "rendering_id" = NA_character_, - "bundle_id" = NA_character_, - "data_version" = NA_integer_ - ), - usage = tibble::tibble( - "id" = NA_integer_, - "user_guid" = NA_character_, - "content_guid" = NA_character_, - "timestamp" = NA_datetime_, - "data" = NA_list_ - ), - content = tibble::tibble( - "guid" = NA_character_, - "name" = NA_character_, - "title" = NA_character_, - "description" = NA_character_, - "access_type" = NA_character_, - "connection_timeout" = NA_integer_, - "read_timeout" = NA_integer_, - "init_timeout" = NA_integer_, - "idle_timeout" = NA_integer_, - "max_processes" = NA_integer_, - "min_processes" = NA_integer_, - "max_conns_per_process" = NA_integer_, - "load_factor" = NA_real_, - "created_time" = NA_datetime_, - "last_deployed_time" = NA_datetime_, - "bundle_id" = NA_character_, - "app_mode" = NA_character_, - "content_category" = NA_character_, - "parameterized" = FALSE, - "cluster_name" = NA_character_, - "image_name" = NA_character_, - "r_version" = NA_character_, - "py_version" = NA_character_, - "quarto_version" = NA_character_, - "run_as" = NA_character_, - "run_as_current_user" = FALSE, - "owner_guid" = NA_character_, - "content_url" = NA_character_, - "dashboard_url" = NA_character_, - "app_role" = NA_character_, - "vanity_url" = NA_character_, - "id" = NA_character_, - "owner" = NA_list_, - "tags" = NA_list_, - ), - content_old = tibble::tibble( - "id" = NA_integer_, - "guid" = NA_character_, - "access_type" = NA_character_, - "connection_timeout" = NA_real_, - "read_timeout" = NA_real_, - "init_timeout" = NA_real_, - "idle_timeout" = NA_real_, - "max_processes" = NA_integer_, - "min_processes" = NA_integer_, - "max_conns_per_process" = NA_integer_, - "load_factor" = NA_real_, - "url" = NA_character_, - "vanity_url" = NA, - "name" = NA_character_, - "title" = NA_character_, - "bundle_id" = NA_integer_, - # (1=shiny, 2=shiny Rmd, 3=source Rmd, 4=static, 5=api, 6=tensorflow, 7=python, 8=flask, 9=dash, 10=streamlit) - "app_mode" = NA_integer_, - "content_category" = NA_character_, - "has_parameters" = NA, - "created_time" = NA_datetime_, - "last_deployed_time" = NA_datetime_, - "r_version" = NA_character_, - "py_version" = NA_character_, - "build_status" = NA_integer_, - "run_as" = NA_character_, - "run_as_current_user" = NA, - "description" = NA_character_, - "app_role" = NA_character_, - "owner_first_name" = NA_character_, - "owner_last_name" = NA_character_, - "owner_username" = NA_character_, - "owner_guid" = NA_character_, - "owner_email" = NA_character_, - "owner_locked" = NA, - "is_scheduled" = NA, - "git" = NA_list_ - ), - audit_logs = tibble::tibble( - "id" = NA_character_, - "time" = NA_datetime_, - "user_id" = NA_character_, - "user_guid" = NA_character_, - "user_description" = NA_character_, - "action" = NA_character_, - "event_description" = NA_character_ - ), - procs = tibble::tibble( - pid = NA_character_, - appId = NA_integer_, - appGuid = NA_character_, - appName = NA_character_, - appUrl = NA_character_, - appRunAs = NA_character_, - type = NA_character_, - cpuCurrent = NA_real_, - cpuTotal = NA_integer_, - ram = fs::as_fs_bytes(NA_integer_) - ), - variant = tibble::tibble( - id = NA_integer_, - app_id = NA_integer_, - key = NA_character_, - bundle_id = NA_integer_, - is_default = NA, - name = NA_character_, - email_collaborators = NA, - email_viewers = NA, - created_time = NA_datetime_, - rendering_id = NA_integer_, - render_time = NA_datetime_, - render_duration = bit64::NA_integer64_, - visibility = NA_character_, - owner_id = NA_integer_ - ), - rendering = tibble::tibble( - id = NA_integer_, - app_id = NA_integer_, - variant_id = NA_integer_, - bundle_id = NA_integer_, - job_key = NA_character_, - render_time = NA_datetime_, - render_duration = bit64::as.integer64(NA_integer_), - active = NA, - app_guid = NA_character_, - variant_key = NA_character_, - ), - jobs = tibble::tibble( - id = NA_character_, - ppid = NA_character_, - pid = NA_character_, - key = NA_character_, - remote_id = NA_character_, - content_id = NA_character_, - content_guid = NA_character_, - app_id = NA_character_, - app_guid = NA_character_, - variant_id = NA_character_, - bundle_id = NA_character_, - start_time = NA_datetime_, - end_time = NA_datetime_, - last_heartbeat_time = NA_datetime_, - queued_time = NA_datetime_, - queue_name = NA_character_, - tag = NA_character_, - exit_code = NA_integer_, - status = NA_integer_, - hostname = NA_character_, - cluster = NA_character_, - image = NA_character_, - run_as = NA_character_, - ), - bundles = tibble::tibble( - id = NA_character_, - content_guid = NA_character_, - created_time = NA_datetime_, - r_version = NA_character_, - py_version = NA_character_, - active = NA, - size = fs::as_fs_bytes(NA_integer_), - metadata = NA_list_, - ), - permissions = tibble::tibble( - id = NA_character_, - content_guid = NA_character_, - principal_guid = NA_character_, - principal_type = NA_character_, - role = NA_character_ - ), - group_content = tibble::tibble( - content_guid = NA_character_, - content_name = NA_character_, - content_title = NA_character_, - access_type = NA_character_, - permissions = NA_list_ - ), - job_termination = tibble::tibble( - app_id = NA_integer_, - app_guid = NA_character_, - job_key = NA_character_, - job_id = NA_character_, - result = NA_character_, - code = NA_integer_, - error = NA_character_ - ), - vanities = tibble::tibble( - content_guid = NA_character_, - path = NA_character_, - created_time = NA_datetime_ - ), - job_log = tibble::tibble( - source = NA_character_, - timestamp = NA_datetime_, - data = NA_character_ - ), - packages = tibble::tibble( - language = NA_character_, - language_version = NA_character_, - name = NA_character_, - version = NA_character_, - hash = NA_character_, - bundle_id = NA_character_, - app_id = NA_character_, - app_guid = NA_character_, - ), - content_packages = tibble::tibble( - language = NA_character_, - name = NA_character_, - version = NA_character_, - hash = NA_character_ - ), - integrations = tibble::tibble( - id = NA_character_, - guid = NA_character_, - created_time = NA_datetime_, - updated_time = NA_datetime_, - name = NA_character_, - description = NA_character_, - template = NA_character_, - auth_type = NA_character_, - config = NA_list_ - ) -) diff --git a/R/remote.R b/R/remote.R index c57b71d8..f1141bce 100644 --- a/R/remote.R +++ b/R/remote.R @@ -98,7 +98,7 @@ groups_create_remote <- function( expect <- as.integer(expect) if (check) { local_groups <- get_groups(connect, prefix = prefix) - if (exact) { + if (exact && nrow(local_groups) > 0) { local_groups <- local_groups[local_groups["name"] == prefix, ] } if (nrow(local_groups) > 0) { diff --git a/R/user.R b/R/user.R index 90b2122b..938dc5b7 100644 --- a/R/user.R +++ b/R/user.R @@ -16,6 +16,7 @@ user_guid_from_username <- function(client, username) { user <- client$users(prefix = username) res <- user$results + if (length(res) == 0) { stop("ERROR: user not found") } else if (length(res) > 1) { diff --git a/R/variant.R b/R/variant.R index 4aa55be0..f7287890 100644 --- a/R/variant.R +++ b/R/variant.R @@ -257,7 +257,11 @@ get_variants <- function(content) { variants <- content$variants() - parse_connectapi_typed(variants, connectapi_ptypes$variant) + out <- parse_connectapi_typed( + variants, + datetime_cols = c("created_time", "render_time") + ) + coerce_integer64(out, "render_duration") } #' @rdname variant @@ -300,7 +304,8 @@ get_variant_renderings <- function(variant) { validate_R6_class(variant, "Variant") renders <- variant$renderings() - parse_connectapi_typed(renders, connectapi_ptypes$rendering) + out <- parse_connectapi_typed(renders, datetime_cols = "render_time") + coerce_integer64(out, "render_duration") } #' @rdname variant_render diff --git a/man/PositConnect.Rd b/man/PositConnect.Rd index f6730e4b..27deb076 100644 --- a/man/PositConnect.Rd +++ b/man/PositConnect.Rd @@ -251,7 +251,7 @@ Build a URL relative to the server root \subsection{Method \code{request()}}{ General wrapper around \code{httr} verbs \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Connect$request(method, url, ..., parser = "parsed")}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{Connect$request(method, url, ..., parser = "parsed", simplify = FALSE)}\if{html}{\out{
}} } \subsection{Arguments}{ @@ -266,6 +266,10 @@ General wrapper around \code{httr} verbs \item{\code{parser}}{How the response is parsed. If \code{NULL}, the \code{httr_response} will be returned. Otherwise, the argument is forwarded to \code{httr::content(res, as = parser)}.} + +\item{\code{simplify}}{Logical; if \code{TRUE}, JSON arrays of objects are +simplified to data frames by jsonlite. Default \code{FALSE} preserves +list-of-lists for compatibility with pagination helpers.} } \if{html}{\out{}} } @@ -276,7 +280,13 @@ will be returned. Otherwise, the argument is forwarded to \subsection{Method \code{GET()}}{ Perform an HTTP GET request of the named API path. \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Connect$GET(path, ..., url = self$api_url(path), parser = "parsed")}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{Connect$GET( + path, + ..., + url = self$api_url(path), + parser = "parsed", + simplify = FALSE +)}\if{html}{\out{
}} } \subsection{Arguments}{ @@ -292,6 +302,10 @@ a server resource that is not under \verb{/__api__}} \item{\code{parser}}{How the response is parsed. If \code{NULL}, the \code{httr_response} will be returned. Otherwise, the argument is forwarded to \code{httr::content(res, as = parser)}.} + +\item{\code{simplify}}{Logical; if \code{TRUE}, JSON arrays of objects are +simplified to data frames by jsonlite. Default \code{FALSE} preserves +list-of-lists for compatibility with pagination helpers.} } \if{html}{\out{}} } diff --git a/tests/integrated/helper.R b/tests/integrated/helper.R index 78dae128..a1b9c62c 100644 --- a/tests/integrated/helper.R +++ b/tests/integrated/helper.R @@ -1,11 +1,24 @@ -expect_ptype_equal <- function(actual, expected, exact = TRUE) { +expect_column_types <- function(actual, expected_types, exact = TRUE) { + # expected_types is a named list: list(col_name = "type_string", ...) if (!exact) { - # Keep only the columns from each that are in the other - shared_names <- intersect(names(actual), names(expected)) - actual <- actual[, shared_names] - expected <- expected[, shared_names] + shared_names <- intersect(names(actual), names(expected_types)) + expected_types <- expected_types[shared_names] + } + for (nm in names(expected_types)) { + expect_true( + nm %in% names(actual), + info = paste("Expected column", nm, "not found") + ) + if (nm %in% names(actual)) { + expect_true( + inherits(actual[[nm]], expected_types[[nm]]), + info = paste0( + "Column '", nm, "': expected ", expected_types[[nm]], + ", got ", paste(class(actual[[nm]]), collapse = "/") + ) + ) + } } - expect_equal(vctrs::vec_ptype(actual), vctrs::vec_ptype(expected)) } skip_if_connect_older_than <- function(client, version) { diff --git a/tests/integrated/test-get.R b/tests/integrated/test-get.R index 30e347f5..27b4d847 100644 --- a/tests/integrated/test-get.R +++ b/tests/integrated/test-get.R @@ -10,7 +10,11 @@ test_that("get_users works", { users <- get_users(client) expect_s3_class(users, c("tbl_df", "tbl", "data.frame")) - expect_ptype_equal(users, connectapi_ptypes$users, exact = FALSE) + + expect_true("guid" %in% names(users)) + expect_true("email" %in% names(users)) + expect_true("username" %in% names(users)) + expect_s3_class(users$created_time, "POSIXct") # Other tests create users, so specifying the exact number here is conditional # on the contents of other tests and the order that tests run in. @@ -22,39 +26,41 @@ test_that("get_users works", { }) test_that("get_groups works", { + # Create a group so we have data to assert against (test-get.R runs before + # test-groups.R alphabetically). + client$groups_create(name = paste0("test-get-groups-", uuid::UUIDgenerate())) + groups_list <- get_groups(client) expect_s3_class(groups_list, c("tbl_df", "tbl", "data.frame")) - - expect_ptype_equal(groups_list, connectapi_ptypes$groups) + expect_true(nrow(groups_list) > 0) + expect_true("guid" %in% names(groups_list)) + expect_true("name" %in% names(groups_list)) }) test_that("get_content works", { scoped_experimental_silence() content_list <- get_content(client) expect_s3_class(content_list, c("tbl_df", "tbl", "data.frame")) - - # various attributes have been added over the years, so exact match - # doesn't work against all versions of Connect - expect_ptype_equal(content_list, connectapi_ptypes$content, exact = FALSE) + expect_true("guid" %in% names(content_list)) + expect_true("name" %in% names(content_list)) + expect_s3_class(content_list$created_time, "POSIXct") }) test_that("get_usage_shiny works", { shiny_usage <- get_usage_shiny(client) expect_s3_class(shiny_usage, c("tbl_df", "tbl", "data.frame")) - - expect_ptype_equal(shiny_usage, connectapi_ptypes$usage_shiny) + # No shiny apps are deployed in integration tests, so this may be empty. + if (nrow(shiny_usage) > 0) { + expect_true("content_guid" %in% names(shiny_usage)) + expect_s3_class(shiny_usage$started, "POSIXct") + } }) test_that("get_usage_static works", { content_visits <- get_usage_static(client) expect_s3_class(content_visits, c("tbl_df", "tbl", "data.frame")) - - # path was added to usage_static in 2024 - expect_ptype_equal( - content_visits, - connectapi_ptypes$usage_static, - exact = FALSE - ) + expect_true("content_guid" %in% names(content_visits)) + expect_s3_class(content_visits$time, "POSIXct") }) test_that("get_audit_logs works", { @@ -63,17 +69,19 @@ test_that("get_audit_logs works", { # This is different on older versions, not sure it's worth worrying about how skip_if_connect_older_than(client, "2022.09.0") - expect_ptype_equal(audit_list, connectapi_ptypes$audit_logs) + expect_true("id" %in% names(audit_list)) + expect_s3_class(audit_list$time, "POSIXct") }) test_that("get_procs works", { scoped_experimental_silence() proc_data <- get_procs(client) - # TODO: This is not a great test, since no processes are running - # we could always start a content restoration... + # No long-running processes on a fresh test server, so this is usually empty. expect_s3_class(proc_data, "tbl_df") - expect_ptype_equal(proc_data, connectapi_ptypes$procs) + if (nrow(proc_data) > 0) { + expect_true(all(c("pid", "appId", "appGuid") %in% names(proc_data))) + } }) # experimental -------------------------------------------- diff --git a/tests/integrated/test-lazy.R b/tests/integrated/test-lazy.R index 36ee6627..26eb2947 100644 --- a/tests/integrated/test-lazy.R +++ b/tests/integrated/test-lazy.R @@ -35,7 +35,9 @@ test_that("users works", { expect_type(colnames(users), "character") expect_gt(length(colnames(users)), 1) - expect_ptype_equal(users_local, connectapi_ptypes$users, exact = FALSE) + expect_true("guid" %in% names(users_local)) + expect_true("email" %in% names(users_local)) + expect_s3_class(users_local$created_time, "POSIXct") }) test_that("usage_static works", { @@ -49,12 +51,8 @@ test_that("usage_static works", { expect_type(colnames(content_visits), "character") expect_gt(length(colnames(content_visits)), 1) - # path was added in 2024 - expect_ptype_equal( - content_visits_local, - connectapi_ptypes$usage_static, - exact = FALSE - ) + expect_true("content_guid" %in% names(content_visits_local)) + expect_s3_class(content_visits_local$time, "POSIXct") }) test_that("usage_shiny works", { @@ -66,9 +64,13 @@ test_that("usage_shiny works", { expect_true(is.na(nrow(shiny_usage))) expect_type(colnames(shiny_usage), "character") - expect_gt(length(colnames(shiny_usage)), 1) - expect_ptype_equal(shiny_usage_local, connectapi_ptypes$usage_shiny) + # No shiny apps are deployed in integration tests, so this may be empty. + if (nrow(shiny_usage_local) > 0) { + expect_gt(length(colnames(shiny_usage)), 1) + expect_true("content_guid" %in% names(shiny_usage_local)) + expect_s3_class(shiny_usage_local$started, "POSIXct") + } }) test_that("content works", { @@ -83,13 +85,8 @@ test_that("content works", { expect_type(colnames(content_list), "character") expect_gt(length(colnames(content_list)), 1) - # various attributes have been added over the years, so exact match - # doesn't work against all versions of Connect - expect_ptype_equal( - content_list_local, - connectapi_ptypes$content, - exact = FALSE - ) + expect_true("guid" %in% names(content_list_local)) + expect_s3_class(content_list_local$created_time, "POSIXct") }) test_that("groups works", { @@ -104,7 +101,8 @@ test_that("groups works", { expect_type(colnames(groups_list), "character") expect_gt(length(colnames(groups_list)), 1) - expect_ptype_equal(groups_list_local, connectapi_ptypes$groups, exact = FALSE) + expect_true("guid" %in% names(groups_list_local)) + expect_true("name" %in% names(groups_list_local)) }) test_that("audit_logs works", { @@ -121,5 +119,6 @@ test_that("audit_logs works", { # This is different on older versions, not sure it's worth worrying about how skip_if_connect_older_than(client, "2022.09.0") - expect_ptype_equal(audit_list_local, connectapi_ptypes$audit_logs) + expect_true("id" %in% names(audit_list_local)) + expect_s3_class(audit_list_local$time, "POSIXct") }) diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index dd8191e2..e3f21d78 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -49,7 +49,7 @@ MockConnect <- R6Class( # The request function matches the route against the routes in the names of # the response list. When a response is selected, it is removed from the # list. - request = function(method, url, ..., parser = "parsed") { + request = function(method, url, ..., parser = "parsed", simplify = FALSE) { route <- paste(method, url) # Record call @@ -67,7 +67,15 @@ MockConnect <- R6Class( res } else { self$raise_error(res) - httr::content(res, as = parser) + content_text <- httr::content(res, as = "text", encoding = "UTF-8") + if (is.null(content_text) || nchar(content_text) == 0) { + return(NULL) + } + jsonlite::fromJSON( + content_text, + simplifyVector = simplify, + simplifyDataFrame = simplify + ) } }, responses = list(), diff --git a/tests/testthat/test-content.R b/tests/testthat/test-content.R index 164e4706..e15d9d5c 100644 --- a/tests/testthat/test-content.R +++ b/tests/testthat/test-content.R @@ -352,7 +352,7 @@ with_mock_api({ app_id = c(NA, 52389L), app_guid = c(NA, "8f37d6e0"), job_key = c("waaTO7v75I84S1hQ", "k3sHkEoWJNwQim7g"), - job_id = c(NA, "40669829"), + job_id = c(NA_integer_, 40669829L), result = c(NA, "Order to kill job registered"), code = c(163L, NA), error = c( @@ -366,18 +366,7 @@ with_mock_api({ test_that("terminate_jobs() functions as expected with no active jobs", { item <- content_item(client, "01234567") expect_message( - expect_equal( - terminate_jobs(item), - tibble::tibble( - app_id = integer(), - app_guid = character(), - job_key = character(), - job_id = character(), - result = character(), - code = integer(), - error = character() - ) - ), + expect_equal(terminate_jobs(item), tibble::tibble()), "No active jobs found." ) }) @@ -508,45 +497,7 @@ with_mock_dir("2025.09.0", { test_that("search_content() can be converted to a data frame correctly", { content_df <- search_content(client, q = "sea bream") |> as_tibble() - expect_named( - content_df, - c( - "guid", - "name", - "title", - "description", - "access_type", - "connection_timeout", - "read_timeout", - "init_timeout", - "idle_timeout", - "max_processes", - "min_processes", - "max_conns_per_process", - "load_factor", - "created_time", - "last_deployed_time", - "bundle_id", - "app_mode", - "content_category", - "parameterized", - "cluster_name", - "image_name", - "r_version", - "py_version", - "quarto_version", - "run_as", - "run_as_current_user", - "owner_guid", - "content_url", - "dashboard_url", - "app_role", - "vanity_url", - "id", - "owner", - "tags" - ) - ) + expect_true(all(c("guid", "name", "title") %in% names(content_df))) expect_equal( content_df$title, c("sea bream report", "sea bream dashboard") diff --git a/tests/testthat/test-get.R b/tests/testthat/test-get.R index b73e82a4..f9f0103d 100644 --- a/tests/testthat/test-get.R +++ b/tests/testthat/test-get.R @@ -249,7 +249,7 @@ test_that("get_packages() works as expected with current return value", { language_version = c("3.7.6", "3.7.7"), name = c("absl-py", "absl-py"), version = c("0.12.0", "0.8.1"), - hash = c(NA_character_, NA_character_), + hash = c(NA, NA), bundle_id = c("9375", "6623"), content_id = c("4906", "3652"), content_guid = c("9bf33774", "1935b6cb") @@ -320,7 +320,7 @@ test_that("get_packages() works as expected with `content_guid` names in API res language_version = c("3.7.6", "3.7.7"), name = c("absl-py", "absl-py"), version = c("0.12.0", "0.8.1"), - hash = c(NA_character_, NA_character_), + hash = c(NA, NA), bundle_id = c("9375", "6623"), content_id = c("4906", "3652"), content_guid = c("9bf33774", "1935b6cb") @@ -384,26 +384,12 @@ with_mock_dir("2025.04.0", { ) expect_s3_class(usage, "connect_list_hits") - expect_s3_class(usage, "list") + expect_true(is.list(usage)) + expect_equal(length(usage), 5) + expect_equal(usage[[1]]$id, 8966707L) + expect_equal(usage[[1]]$content_guid, "475618c9") - expect_length(usage, 5) - - # Check first element - expect_equal( - usage[[1]], - list( - id = 8966707L, - user_guid = NULL, - content_guid = "475618c9", - timestamp = "2025-04-30T12:49:16.269904Z", - data = list( - path = "/hello", - user_agent = "Datadog/Synthetics" - ) - ) - ) - - # Check conversion to data.frame + # Check conversion to data.frame (with unnesting) usage_df <- as.data.frame(usage) expect_equal( usage_df, diff --git a/tests/testthat/test-parse.R b/tests/testthat/test-parse.R index 8b3cc347..5298da49 100644 --- a/tests/testthat/test-parse.R +++ b/tests/testthat/test-parse.R @@ -1,51 +1,3 @@ -test_that("coerce_fsbytes fills the void", { - expect_s3_class(coerce_fsbytes(1L, fs::as_fs_bytes(NA_integer_)), "fs_bytes") - expect_s3_class(coerce_fsbytes(1, fs::as_fs_bytes(NA_integer_)), "fs_bytes") - expect_error( - coerce_fsbytes(data.frame(), fs::as_fs_bytes(NA_integer_)), - class = "vctrs_error_incompatible_type" - ) -}) - -test_that("coerce_datetime fills the void", { - chardate <- "2023-10-25T17:04:08Z" - numdate <- as.double(Sys.time()) - expect_s3_class(coerce_datetime(chardate, NA_datetime_), "POSIXct") - expect_s3_class( - coerce_datetime(c(chardate, chardate), NA_datetime_), - "POSIXct" - ) - expect_s3_class(coerce_datetime(numdate, NA_datetime_), "POSIXct") - expect_s3_class(coerce_datetime(c(numdate, numdate), NA_datetime_), "POSIXct") - expect_s3_class(coerce_datetime(NA_datetime_, NA_datetime_), "POSIXct") - expect_s3_class( - coerce_datetime(c(NA_datetime_, NA_datetime_), NA_datetime_), - "POSIXct" - ) - expect_s3_class(coerce_datetime(NA_integer_, NA_datetime_), "POSIXct") - expect_s3_class( - coerce_datetime(c(NA_integer_, NA_integer_), NA_datetime_), - "POSIXct" - ) - expect_s3_class(coerce_datetime(NA, NA_datetime_), "POSIXct") - expect_s3_class(coerce_datetime(c(NA, NA), NA), "POSIXct") - expect_s3_class(coerce_datetime(NULL, NA), "POSIXct") - - expect_error( - coerce_datetime(data.frame(), NA_datetime_), - class = "vctrs_error_incompatible_type" - ) - expect_error( - coerce_datetime(list(), NA_datetime_, name = "list"), - class = "vctrs_error_incompatible_type" - ) - - expect_error( - coerce_datetime(NA_complex_, NA_datetime_, name = "complexity"), - class = "vctrs_error_incompatible_type" - ) -}) - test_that("parse_connect_rfc3339() parses timestamps with offsets as expected", { x_mixed <- c( "2023-08-22T14:13:14Z", @@ -139,6 +91,20 @@ test_that("parse_connect_rfc3339() handles fractional seconds", { expect_identical(parse_connect_rfc3339(x), expected) }) +test_that("parse_connect_rfc3339() handles NA values", { + withr::local_envvar(TZ = "UTC") + result <- parse_connect_rfc3339(c("2023-08-22T14:13:14Z", NA)) + expect_s3_class(result, "POSIXct") + expect_equal(length(result), 2) + expect_false(is.na(result[1])) + expect_true(is.na(result[2])) + + # All NA + result <- parse_connect_rfc3339(c(NA_character_, NA_character_)) + expect_s3_class(result, "POSIXct") + expect_true(all(is.na(result))) +}) + test_that("make_timestamp produces expected output", { x_mixed <- c( "2023-08-22T14:13:14Z", @@ -173,81 +139,81 @@ test_that("make_timestamp produces expected output", { withr::local_envvar(TZ = "America/New_York") expect_equal( - make_timestamp(coerce_datetime(x_mixed, NA_datetime_)), + make_timestamp(parse_connect_rfc3339(x_mixed)), rep(outcome, 2) ) expect_equal( - make_timestamp(coerce_datetime(x_zero_offset, NA_datetime_)), + make_timestamp(parse_connect_rfc3339(x_zero_offset)), outcome ) expect_equal( - make_timestamp(coerce_datetime(x_plus_one, NA_datetime_)), + make_timestamp(parse_connect_rfc3339(x_plus_one)), outcome ) expect_equal( - make_timestamp(coerce_datetime(x_minus_one, NA_datetime_)), + make_timestamp(parse_connect_rfc3339(x_minus_one)), outcome ) expect_equal( - make_timestamp(coerce_datetime(single_zero_offset, NA_datetime_)), + make_timestamp(parse_connect_rfc3339(single_zero_offset)), outcome[1] ) expect_equal( - make_timestamp(coerce_datetime(single_offset, NA_datetime_)), + make_timestamp(parse_connect_rfc3339(single_offset)), outcome[1] ) expect_equal(make_timestamp(outcome), outcome) withr::local_envvar(TZ = "UTC") expect_equal( - make_timestamp(coerce_datetime(x_mixed, NA_datetime_)), + make_timestamp(parse_connect_rfc3339(x_mixed)), rep(outcome, 2) ) expect_equal( - make_timestamp(coerce_datetime(x_zero_offset, NA_datetime_)), + make_timestamp(parse_connect_rfc3339(x_zero_offset)), outcome ) expect_equal( - make_timestamp(coerce_datetime(x_plus_one, NA_datetime_)), + make_timestamp(parse_connect_rfc3339(x_plus_one)), outcome ) expect_equal( - make_timestamp(coerce_datetime(x_minus_one, NA_datetime_)), + make_timestamp(parse_connect_rfc3339(x_minus_one)), outcome ) expect_equal( - make_timestamp(coerce_datetime(single_zero_offset, NA_datetime_)), + make_timestamp(parse_connect_rfc3339(single_zero_offset)), outcome[1] ) expect_equal( - make_timestamp(coerce_datetime(single_offset, NA_datetime_)), + make_timestamp(parse_connect_rfc3339(single_offset)), outcome[1] ) expect_equal(make_timestamp(outcome), outcome) withr::local_envvar(TZ = "Asia/Tokyo") expect_equal( - make_timestamp(coerce_datetime(x_mixed, NA_datetime_)), + make_timestamp(parse_connect_rfc3339(x_mixed)), rep(outcome, 2) ) expect_equal( - make_timestamp(coerce_datetime(x_zero_offset, NA_datetime_)), + make_timestamp(parse_connect_rfc3339(x_zero_offset)), outcome ) expect_equal( - make_timestamp(coerce_datetime(x_plus_one, NA_datetime_)), + make_timestamp(parse_connect_rfc3339(x_plus_one)), outcome ) expect_equal( - make_timestamp(coerce_datetime(x_minus_one, NA_datetime_)), + make_timestamp(parse_connect_rfc3339(x_minus_one)), outcome ) expect_equal( - make_timestamp(coerce_datetime(single_zero_offset, NA_datetime_)), + make_timestamp(parse_connect_rfc3339(single_zero_offset)), outcome[1] ) expect_equal( - make_timestamp(coerce_datetime(single_offset, NA_datetime_)), + make_timestamp(parse_connect_rfc3339(single_offset)), outcome[1] ) expect_equal(make_timestamp(outcome), outcome) @@ -261,49 +227,8 @@ test_that("make_timestamp is safe for strings", { }) test_that("make_timestamp converts to character", { - expect_type(make_timestamp(NA_datetime_), "character") -}) - -test_that("ensure_column works with lists", { - list_chk_null <- ensure_column(tibble::tibble(), NA_list_, "hello") - expect_s3_class(list_chk_null, "tbl_df") - expect_type(list_chk_null$hello, "list") - - list_chk_same <- ensure_column( - tibble::tibble(hello = list(list(1, 2, 3), list(1, 2, 3, 4))), - NA_list_, - "hello" - ) - expect_s3_class(list_chk_same, "tbl_df") - expect_type(list_chk_same$hello, "list") -}) - -test_that("ensure_column works with POSIXct", { - time_chk_null <- ensure_column(tibble::tibble(), NA_datetime_, "hello") - expect_s3_class(time_chk_null, "tbl_df") - expect_s3_class(time_chk_null$hello, "POSIXct") - - time_chk_some <- ensure_column( - tibble::tibble(one = c(1, 2, 3)), - NA_datetime_, - "hello" - ) - expect_s3_class(time_chk_some, "tbl_df") - expect_s3_class(time_chk_some$hello, "POSIXct") - - skip("Ahh! this fails presently. Are double -> POSIXct conversions allowed?") - time_chk_convert <- ensure_column( - tibble::tibble(hello = c(1, 2, 3)), - NA_datetime_, - "hello" - ) - expect_s3_class(time_chk_convert, "tbl_df") - expect_s3_class(time_chk_convert$hello, "POSIXct") -}) - -test_that("converts length one list", { - hm <- ensure_column(tibble::tibble(one = "hi"), NA_list_, "one") - expect_type(hm$one, "list") + ts <- .POSIXct(NA_real_, tz = Sys.timezone()) + expect_type(make_timestamp(ts), "character") }) test_that("parse_connectapi handles mixed null/non-null character values", { @@ -344,3 +269,97 @@ test_that("parse_connectapi handles mixed null/non-null integer timestamps", { expect_type(result$end_time, "double") expect_identical(result$end_time, c(NA_real_, 1732556770)) }) + +test_that("coerce_datetime handles character (RFC 3339)", { + withr::local_envvar(TZ = "UTC") + result <- coerce_datetime(c("2023-08-22T14:13:14Z", "2020-01-01T01:02:03Z")) + expect_s3_class(result, "POSIXct") + expect_equal(length(result), 2) +}) + +test_that("coerce_datetime handles numeric (epoch seconds)", { + result <- coerce_datetime(1692713594) + expect_s3_class(result, "POSIXct") + expect_equal(as.double(result), 1692713594, tolerance = 1) +}) + +test_that("coerce_datetime handles POSIXct pass-through", { + ts <- as.POSIXct("2023-08-22 14:13:14", tz = "UTC") + result <- coerce_datetime(ts) + expect_identical(result, ts) +}) + +test_that("coerce_datetime handles NULL", { + result <- coerce_datetime(NULL) + expect_s3_class(result, "POSIXct") + expect_equal(length(result), 0) +}) + +test_that("coerce_datetime handles all-NA logical", { + result <- coerce_datetime(c(NA, NA)) + expect_s3_class(result, "POSIXct") + expect_true(all(is.na(result))) + expect_equal(length(result), 2) +}) + +test_that("coerce_datetime rejects unsupported types", { + expect_error(coerce_datetime(data.frame()), "Cannot coerce") + expect_error(coerce_datetime(NA_complex_), "Cannot coerce") +}) + +test_that("parse_connectapi_typed converts specified datetime columns", { + data <- list( + list(guid = "aaa", created_time = "2023-08-22T14:13:14Z"), + list(guid = "bbb", created_time = "2020-01-01T01:02:03Z") + ) + + result <- parse_connectapi_typed(data, datetime_cols = "created_time") + expect_s3_class(result, "tbl_df") + expect_s3_class(result$created_time, "POSIXct") + expect_type(result$guid, "character") +}) + +test_that("parse_connectapi_typed leaves columns alone without datetime_cols", { + data <- list( + list(guid = "aaa", created_time = "2023-08-22T14:13:14Z"), + list(guid = "bbb", created_time = "2020-01-01T01:02:03Z") + ) + + result <- parse_connectapi_typed(data) + expect_s3_class(result, "tbl_df") + # Without datetime_cols, timestamps stay as character + expect_type(result$created_time, "character") +}) + +test_that("parse_connectapi_typed handles data frame input (fast path)", { + df <- data.frame( + guid = c("aaa", "bbb"), + created_time = c("2023-08-22T14:13:14Z", "2020-01-01T01:02:03Z"), + stringsAsFactors = FALSE + ) + + result <- parse_connectapi_typed(df, datetime_cols = "created_time") + expect_s3_class(result, "tbl_df") + expect_s3_class(result$created_time, "POSIXct") +}) + +test_that("parse_connectapi_typed handles empty input", { + result <- parse_connectapi_typed(list()) + expect_s3_class(result, "tbl_df") + expect_equal(nrow(result), 0) + + result <- parse_connectapi_typed(tibble::tibble()) + expect_s3_class(result, "tbl_df") + expect_equal(nrow(result), 0) +}) + +test_that("parse_connectapi_typed handles all-NA datetime column", { + data <- list( + list(guid = "aaa", active_time = NULL), + list(guid = "bbb", active_time = NULL) + ) + + result <- parse_connectapi_typed(data, datetime_cols = "active_time") + expect_s3_class(result$active_time, "POSIXct") + expect_true(all(is.na(result$active_time))) +}) From 400a8628ffb0597843a550dfece005d3fc794b34 Mon Sep 17 00:00:00 2001 From: Kara Woo Date: Thu, 5 Mar 2026 11:59:50 -0800 Subject: [PATCH 3/4] improve performance for page_cursor --- R/page.R | 26 +++++++++++---- tests/testthat/test-page.R | 67 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 6 deletions(-) diff --git a/R/page.R b/R/page.R index 8808c25b..9aa5cf28 100644 --- a/R/page.R +++ b/R/page.R @@ -24,20 +24,34 @@ page_cursor <- function(client, req, limit = Inf) { prg$tick() response <- req - # collect whole pages, then flatten once at the end - pages <- list(response$results) - n_items <- length(response$results) + # Convert the first page (list-of-lists from simplify=FALSE) to a data frame. + # Subsequent pages use simplify=TRUE so jsonlite builds data frames in C, + # which is significantly faster for high-volume endpoints. + first_results <- response$results + if (length(first_results) > 0 && !is.data.frame(first_results)) { + first_results <- parse_connectapi(first_results) + } + + pages <- list(first_results) + n_items <- if (is.data.frame(first_results)) nrow(first_results) else length(first_results) + while (!is.null(response$paging$`next`) && n_items < limit) { prg$tick() next_url <- response$paging$`next` - response <- client$GET(url = next_url) + response <- client$GET(url = next_url, simplify = TRUE) pages[[length(pages) + 1L]] <- response$results - n_items <- n_items + length(response$results) + n_items <- n_items + nrow(response$results) + } + + if (length(pages) == 1L && !is.data.frame(pages[[1L]])) { + # Single empty page — return as-is for downstream handling + return(pages[[1L]]) } - head(do.call(c, pages), n = limit) + out <- vctrs::vec_rbind(!!!pages) + head(out, n = limit) } # TODO: Decide if this `limit = Inf` is helpful or a hack... # it is essentially a "row limit" on paging diff --git a/tests/testthat/test-page.R b/tests/testthat/test-page.R index 1638dbb0..5e002831 100644 --- a/tests/testthat/test-page.R +++ b/tests/testthat/test-page.R @@ -14,3 +14,70 @@ with_mock_dir("2025.09.0", { ) }) }) + +test_that("page_cursor accumulates data frames from multiple pages", { + # Simulate a two-page cursor-paginated response. The first page comes from + # the caller with simplify=FALSE (list-of-lists), while page_cursor fetches + # subsequent pages with simplify=TRUE (data frames). + mock_client <- list( + GET = function(url, ..., simplify = FALSE) { + list( + results = data.frame(id = 3:4, name = c("c", "d")), + paging = list(`next` = NULL) + ) + } + ) + + initial_response <- list( + results = list( + list(id = 1L, name = "a"), + list(id = 2L, name = "b") + ), + paging = list(`next` = "https://connect.example/__api__/v1/things?next=abc") + ) + + res <- page_cursor(mock_client, initial_response) + expect_s3_class(res, "data.frame") + expect_equal(nrow(res), 4) + expect_equal(res$id, 1:4) + expect_equal(res$name, c("a", "b", "c", "d")) +}) + +test_that("page_cursor returns empty list for empty single page", { + mock_client <- list(GET = function(...) stop("should not be called")) + + initial_response <- list( + results = list(), + paging = list(`next` = NULL) + ) + + res <- page_cursor(mock_client, initial_response) + expect_equal(length(res), 0) +}) + +test_that("page_cursor respects limit", { + call_count <- 0L + mock_client <- list( + GET = function(url, ..., simplify = FALSE) { + call_count <<- call_count + 1L + list( + results = data.frame(id = 3:4, name = c("c", "d")), + paging = list(`next` = "https://connect.example/__api__/v1/things?next=more") + ) + } + ) + + initial_response <- list( + results = list( + list(id = 1L, name = "a"), + list(id = 2L, name = "b") + ), + paging = list(`next` = "https://connect.example/__api__/v1/things?next=abc") + ) + + res <- page_cursor(mock_client, initial_response, limit = 3) + expect_s3_class(res, "data.frame") + expect_equal(nrow(res), 3) + # Only fetched one additional page since we hit the limit + expect_equal(call_count, 1L) +}) From 25349d8be6b6be66dc42d8db419919522386c8af Mon Sep 17 00:00:00 2001 From: Kara Woo Date: Thu, 5 Mar 2026 14:47:19 -0800 Subject: [PATCH 4/4] keep columns even if missing from all responses --- R/content.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/content.R b/R/content.R index df7d23c8..be2ada0a 100644 --- a/R/content.R +++ b/R/content.R @@ -860,7 +860,7 @@ terminate_jobs <- function(content, keys = NULL) { # Keep only the columns relevant to job termination; the API response # includes extra fields (e.g. payload, guid) on error that vary by outcome. keep <- c("app_id", "app_guid", "job_key", "job_id", "result", "code", "error") - res_df[, intersect(keep, names(res_df)), drop = FALSE] + res_df[, keep, drop = FALSE] } #' @rdname get_jobs