From 26d70215cfc88b868a3a53ae99f765698ec35ccc Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 24 Dec 2025 15:47:39 -0800 Subject: [PATCH] Bump R version dependency to 3.5.0 --- .ci/README.md | 2 +- .dev/CRAN_Release.cmd | 14 ++-- .devcontainer/r-ancient-gcc/Dockerfile | 2 +- .github/workflows/R-CMD-check-occasional.yaml | 4 +- .gitlab-ci.yml | 8 +- DESCRIPTION | 2 +- NEWS.md | 3 + R/IDateTime.R | 4 +- R/merge.R | 8 +- R/utils.R | 11 --- R/wrappers.R | 4 +- inst/tests/tests.Rraw | 78 ++++++++----------- src/dogroups.c | 3 +- 13 files changed, 58 insertions(+), 85 deletions(-) diff --git a/.ci/README.md b/.ci/README.md index 67b0f9d0e9..d9cf8a6341 100644 --- a/.ci/README.md +++ b/.ci/README.md @@ -12,7 +12,7 @@ Test jobs: - `test-lin-rel-cran` - `--as-cran` on Linux, strict test for final status of `R CMD check`. - `test-lin-dev-gcc-strict-cran` - `--as-cran` on Linux, `r-devel` built with `-enable-strict-barrier --disable-long-double`, test for compilation warnings, test for new NOTEs/WARNINGs from `R CMD check`. - `test-lin-dev-clang-cran` - same as `gcc-strict` job but R built with `clang` and no `--enable-strict-barrier --disable-long-double` flags. -- `test-lin-ancient-cran` - Stated R dependency version (currently 3.4.0) on Linux. +- `test-lin-ancient-cran` - Stated R dependency version (currently 3.5.0) on Linux. - `test-lin-dev-clang-san` - `r-devel` on Linux built with `clang -fsanitize=address,undefined` (including LeakSanitizer), test for sanitizer output in tests and examples. - `test-lin-dev-gcc-san` - `r-devel` on Linux built with `gcc -fsanitize=address,undefined` (including LeakSanitizer), test for sanitizer output in tests and examples. - `test-win-rel` - `r-release` on Windows. diff --git a/.dev/CRAN_Release.cmd b/.dev/CRAN_Release.cmd index e2b930b0cf..f23e99bf67 100644 --- a/.dev/CRAN_Release.cmd +++ b/.dev/CRAN_Release.cmd @@ -224,23 +224,23 @@ system.time(test.data.table(script="*.Rraw")) # apx 8h = froll 3h + nafill 1m + ############################################### -# R 3.4.0 (stated dependency) +# R 3.5.0 (stated dependency) ############################################### ### ONE TIME BUILD sudo apt-get -y build-dep r-base cd ~/build -wget http://cran.stat.ucla.edu/src/base/R-3/R-3.4.0.tar.gz -tar xvf R-3.4.0.tar.gz -cd R-3.4.0 +wget http://cran.stat.ucla.edu/src/base/R-3/R-3.5.0.tar.gz +tar xvf R-3.5.0.tar.gz +cd R-3.5.0 CFLAGS="-fcommon" FFLAGS="-fallow-argument-mismatch" ./configure --without-recommended-packages make -alias R340=~/build/R-3.4.0/bin/R +alias R350=~/build/R-3.5.0/bin/R ### END ONE TIME BUILD cd ~/GitHub/data.table -R340 CMD INSTALL ./data.table_1.18.99.tar.gz -R340 +R350 CMD INSTALL ./data.table_1.18.99.tar.gz +R350 require(data.table) test.data.table(script="*.Rraw") diff --git a/.devcontainer/r-ancient-gcc/Dockerfile b/.devcontainer/r-ancient-gcc/Dockerfile index 7672b19ce0..9956156583 100644 --- a/.devcontainer/r-ancient-gcc/Dockerfile +++ b/.devcontainer/r-ancient-gcc/Dockerfile @@ -1,4 +1,4 @@ -FROM registry.gitlab.com/rdatatable/dockerfiles/r-3.4.0 +FROM registry.gitlab.com/rdatatable/dockerfiles/r-3.5.0 RUN apt-get -qq update \ && apt-get install -y --no-install-recommends git diff --git a/.github/workflows/R-CMD-check-occasional.yaml b/.github/workflows/R-CMD-check-occasional.yaml index f3cbfd803d..a739725e8d 100644 --- a/.github/workflows/R-CMD-check-occasional.yaml +++ b/.github/workflows/R-CMD-check-occasional.yaml @@ -16,7 +16,7 @@ jobs: fail-fast: false matrix: os: [macOS-latest, windows-latest, ubuntu-latest] - r: ['devel', 'release', '3.4', '3.5', '3.6', '4.0', '4.1', '4.2', '4.3'] + r: ['devel', 'release', '3.5', '3.6', '4.0', '4.1', '4.2', '4.3'] locale: ['en_US.utf8', 'zh_CN.utf8', 'lv_LV.utf8'] # Chinese for translations, Latvian for collate order (#3502) exclude: # only run non-English locale CI on Ubuntu @@ -29,8 +29,6 @@ jobs: - os: windows-latest locale: 'lv_LV.utf8' # macOS/arm64 only available for R>=4.1.0 - - os: macOS-latest - r: '3.4' - os: macOS-latest r: '3.5' - os: macOS-latest diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e448dd030c..05cc1e445c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -10,8 +10,7 @@ variables: _R_CHECK_NO_STOP_ON_TEST_ERROR_: "true" _R_CHECK_SYSTEM_CLOCK_: "false" ## https://stackoverflow.com/questions/63613301/r-cmd-check-note-unable-to-verify-current-time _R_CHECK_TESTS_NLINES_: "0" - TZ: "UTC" ## to avoid 'Failed to create bus connection' from timedatectl via Sys.timezone() on Docker with R 3.4. - ## Setting TZ for all GLCI jobs to isolate them from timezone. We could have a new GLCI job to test under + TZ: "UTC" ## Setting TZ for all GLCI jobs to isolate them from timezone. We could have a new GLCI job to test under ## a non-UTC timezone, although, that's what we do routinely in dev. RUN_ALL_DATATABLE_TESTS: "yes" ## run optional tests in CI R_REL_VERSION: "4.5" # only raise when RTOOLS for REL is available @@ -49,9 +48,6 @@ mirror-packages: cache: paths: - bus/$CI_JOB_NAME/cran - variables: - # TODO(R-ancient>=3.5.0): remove this; let it save PACKAGES.rds in version-3 format - R_DEFAULT_SERIALIZE_VERSION: 2 script: - echo 'source(".ci/ci.R")' >> .Rprofile - mkdir -p bus/$CI_JOB_NAME/cran/src/contrib @@ -211,7 +207,7 @@ test-lin-dev-clang-cran: # stated dependency on R test-lin-ancient-cran: <<: *test-lin - image: registry.gitlab.com/rdatatable/dockerfiles/r-3.4.0 + image: registry.gitlab.com/rdatatable/dockerfiles/r-3.5.0 variables: _R_CHECK_FORCE_SUGGESTS_: "FALSE" # can be removed if all dependencies are available (knitr, xts, etc.) script: diff --git a/DESCRIPTION b/DESCRIPTION index f8a1f6d8f6..ba48ad1aa4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: data.table Version: 1.18.99 Title: Extension of `data.frame` -Depends: R (>= 3.4.0) +Depends: R (>= 3.5.0) Imports: methods Suggests: bit64 (>= 4.0.0), bit (>= 4.0.4), R.utils (>= 2.13.0), xts, zoo (>= 1.8-1), yaml, knitr, markdown Description: Fast aggregation of large data (e.g. 100GB in RAM), fast ordered joins, fast add/modify/delete of columns by group using no copies at all, list columns, friendly and fast character-separated-value read/write. Offers a natural and flexible syntax, for faster development. diff --git a/NEWS.md b/NEWS.md index 5c97d3b69e..80566f4db8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,9 @@ ## data.table [v1.18.99](https://github.com/Rdatatable/data.table/milestone/37?closed=1) (in development) +### Notes + +1. {data.table} now depends on R 3.5.0 (2018). ## data.table [v1.18.0](https://github.com/Rdatatable/data.table/milestone/37?closed=1) 23 December 2025 diff --git a/R/IDateTime.R b/R/IDateTime.R index 2a06ef2c7a..49fa5abda2 100644 --- a/R/IDateTime.R +++ b/R/IDateTime.R @@ -124,9 +124,7 @@ chooseOpsMethod.IDate = function(x, y, mx, my, cl, reverse) inherits(y, "Date") if ( is.double(e2) && !fitsInInt32(e2) ) { # IDate deliberately doesn't support fractional days so revert to base Date return(base::`-.Date`(as.Date(e1), e2)) - # can't call base::.Date directly (last line of base::`-.Date`) as tried in PR#3168 because - # i) ?.Date states "Internal objects in the base package most of which are only user-visible because of the special nature of the base namespace." - # ii) .Date was newly exposed in R some time after 3.4.4 + # can't call base::.Date directly (last line of base::`-.Date`) as tried in PR#3168 because ?.Date states "Internal objects in the base package most of which are only user-visible because of the special nature of the base namespace." } ans = as.integer(unclass(e1) - unclass(e2)) if (inherits(e2, "Date")) { diff --git a/R/merge.R b/R/merge.R index b05d5d3319..4eb02a1f05 100644 --- a/R/merge.R +++ b/R/merge.R @@ -122,11 +122,11 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL } .maybe_warn_merge_dots <- function(...) { - # TODO(R >= 3.5.0): use ...length() - n_dots <- length(dots <- list(...)) - if (!n_dots) return(invisible()) + if (!...length()) return(invisible()) - nm <- names(dots) + # TODO(R>=4.1.0): Use ...names() + nm <- names(list(...)) + n_dots <- length(nm) if (is.null(nm)) { warningf(ngettext(n_dots, "merge.data.table() received %d unnamed argument in '...' which will be ignored.", "merge.data.table() received %d unnamed arguments in '...' which will be ignored."), diff --git a/R/utils.R b/R/utils.R index 39a623b36d..9d89f6f0a4 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,16 +1,5 @@ # all non-exported / unused internal (utility) functions -# R 3.5.0 made isTRUE longer but more efficient : -# `is.logical(x) && length(x)==1L && !is.na(x) && x` -# Before R 3.5.0, isTRUE was defined as simply: -# identical(TRUE,x) -# See PR#3421 for timings. -# It was changed in R so that isTRUE(c(a=TRUE)) returned TRUE: https://github.com/wch/r-source/commit/828997ac6ecfb73aaa0aae9d1d0584a4ffc50881#diff-b41e3f9f1d389bb6f7a842cd5a3308b8 -if (base::getRversion() < "3.5.0") { - isTRUE = function(x) is.logical(x) && length(x)==1L && !is.na(x) && x # backport R's new implementation of isTRUE - isFALSE = function(x) is.logical(x) && length(x)==1L && !is.na(x) && !x # backport isFALSE that was added in R 3.5.0 - suspendInterrupts = function(expr) expr -} isTRUEorNA = function(x) is.logical(x) && length(x)==1L && (is.na(x) || x) isTRUEorFALSE = function(x) is.logical(x) && length(x)==1L && !is.na(x) allNA = function(x) .Call(C_allNAR, x) diff --git a/R/wrappers.R b/R/wrappers.R index 7683b434fc..c19c1b1eea 100644 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -7,8 +7,8 @@ setcoalesce = function(..., nan=NA) .Call(Ccoalesce, list(...), TRUE, nan_is_na( fifelse = function(test, yes, no, na=NA) .Call(CfifelseR, test, yes, no, na) fcase = function(..., default=NA) { - # TODO(R>=3.5.0): Use ...length() to avoid the need for suppressWarnings() here - default_condition = suppressWarnings(rep(TRUE, length(switch(1L, ...)))) # better than ..1/..elt(1): won't fail for empty fcase() + default_condition = logical() + if (...length()) default_condition = rep(TRUE, length(..1)) arg_list = as.list(substitute(list(..., default_condition, default)))[-1L] .Call(CfcaseR, parent.frame(), arg_list) } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index b0a5c690b2..8730b94633 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -2307,31 +2307,29 @@ test(754.04, DT[, b := a][3, b := 6L], data.table(a=INT(4,2,3),b=INT(4,2,6))) test(754.05, DT[, a := as.numeric(a), verbose=TRUE], output="Direct plonk.*no copy") RHS = as.integer(DT$a) test(754.06, DT[, a:= RHS, verbose=TRUE], output="RHS for item 1 has been duplicated") -if (base::getRversion() >= "3.5.0") { # TODO(R>=3.5.0): test unconditionally - # Expand ALTREPS in assign.c, #5400 - # String conversion gets deferred - ## first, a regression test of R itself -- we want to make sure our own test continues to be useful & testing its intended purpose - test(754.07, {a = 1:10; .Internal(inspect(a)); b = as.character(a); .Internal(inspect(b))}, output = "\\bcompact\\b.*\\bdeferred string conversion\\b") - test(754.08, DT[, a := as.character(a), verbose=TRUE], output="RHS for item 1 has been duplicated") - # Executing the code inside of test expands the ALTREP so we repeat the code - # in order to check the result after a further assignment - DT = data.table(a=1:3) - DT[, b := as.character(a)] - DT[, a := 5L] - test(754.09, DT, data.table(a=5L, b=as.character(1:3))) - # This function returns an ALTREP wrapper if the input is at least length 64 - testFun = function(x) { - x[FALSE] = 1 - x - } - DT = data.table(id=1:64, col1=0, col2=0) - test(754.10, DT[, col1 := testFun(col2), verbose = TRUE], output="RHS for item 1 has been duplicated") - DT = data.table(id=1:64, col1=0, col2=0) - DT[, col1 := testFun(col2)] - DT[, col2 := 999] - test(754.11, DT, data.table(id=1:64, col1=0, col2=999)) - rm(testFun) +# Expand ALTREPS in assign.c, #5400 +# String conversion gets deferred +## first, a regression test of R itself -- we want to make sure our own test continues to be useful & testing its intended purpose +test(754.07, {a = 1:10; .Internal(inspect(a)); b = as.character(a); .Internal(inspect(b))}, output = "\\bcompact\\b.*\\bdeferred string conversion\\b") +test(754.08, DT[, a := as.character(a), verbose=TRUE], output="RHS for item 1 has been duplicated") +# Executing the code inside of test expands the ALTREP so we repeat the code +# in order to check the result after a further assignment +DT = data.table(a=1:3) +DT[, b := as.character(a)] +DT[, a := 5L] +test(754.09, DT, data.table(a=5L, b=as.character(1:3))) +# This function returns an ALTREP wrapper if the input is at least length 64 +testFun = function(x) { + x[FALSE] = 1 + x } +DT = data.table(id=1:64, col1=0, col2=0) +test(754.10, DT[, col1 := testFun(col2), verbose = TRUE], output="RHS for item 1 has been duplicated") +DT = data.table(id=1:64, col1=0, col2=0) +DT[, col1 := testFun(col2)] +DT[, col2 := 999] +test(754.11, DT, data.table(id=1:64, col1=0, col2=999)) +rm(testFun) # Used to test warning on redundant by (#2282) but by=.EACHI has now superseded DT = data.table(a=letters[1:3],b=rep(c("d","e"),each=3),x=1:6,key=c('a', 'b')) @@ -6868,12 +6866,11 @@ test(1463.79, shift(x,-1L, type="cyclic"), as.raw(c(2:5, 1))) test(1463.80, shift(x,-(1:2),type="cyclic"), list(as.raw(c(2:5, 1)), as.raw(c(3:5,1:2)))) # shift incompatible types (e.g. Date and POSIXct) -# TODO(R>=3.5): use .Date() instead of setting class by hand -d = structure(0:4, class="Date") +d = .Date(0:4) p = .POSIXct(1:5) test(1463.81, shift(d, fill=p[1L]), error="Filling Date with POSIXct .* unsupported.*") test(1463.82, shift(p, fill=d[1L]), error="Filling POSIXct with Date .* unsupported.*") -test(1463.83, shift(d, fill=as.IDate(2000L)), structure(c(2000L, 0:3), class="Date")) +test(1463.83, shift(d, fill=as.IDate(2000L)), .Date(c(2000L, 0:3))) # FR #686 DT = data.table(a=rep(c("A", "B", "C", "A", "B"), c(2,2,3,1,2)), foo=1:10) @@ -11643,8 +11640,7 @@ test(1764.2, format(structure(NA_integer_, class = "ITime")), NA_character_) # IDateTime error when tzone is NULL, #1973 x = as.POSIXct('2017-03-17', tz="UTC") attr(x, 'tzone') = NULL -test(1765.1, print(IDateTime(x)), output=".*idate.*itime.*1: 2017-03-1[67]", - ignore.warning="timedatectl") # R 3.4's Sys.timezone() raises this warning in docker, #4182 +test(1765.1, print(IDateTime(x)), output=".*idate.*itime.*1: 2017-03-1[67]") # test test's ignore.warning test(1765.2, {warning("foo"); 4L}, 4L, ignore.warning="foo") @@ -15155,10 +15151,8 @@ test(2042.3, DT[ , format(mean(date),"%b-%Y"), by=g, verbose=TRUE ], # just thi # also incidentally fixed #2491 DT = data.table( Group = c("A", "A", "B", "B", "C", "C"), - Date1 = `class<-`(c(17446.0291040738, 17470.0221205444, 17445.0765226481, # `class<-`() == .Date() to pass on R 3.1.0 # TODO: update to .Date on R 3.5 - 17456.0360002079, 17440.0230725919, 17451.0572453837), "Date"), - Date2 = `class<-`(c(17459.1561177987, 17451.1086757995, 17449.0820898537, - 17443.1175238448, 17461.0463715783, 17448.1033968224), "Date") + Date1 = .Date(c(17446.0291040738, 17470.0221205444, 17445.0765226481, 17456.0360002079, 17440.0230725919, 17451.0572453837)), + Date2 = .Date(c(17459.1561177987, 17451.1086757995, 17449.0820898537, 17443.1175238448, 17461.0463715783, 17448.1033968224)) ) DT[ , DiffTime := abs(difftime(Date1, Date2, units = 'days'))] test(2042.4, DT[ , round(mean(DiffTime)), by=Group, verbose=TRUE], @@ -19408,8 +19402,7 @@ test(2286, x<2, structure(list(1), class = "foo"), x<3, structure(list(2), class = "foo"), # Force gc() and some allocations which have a good chance at landing in the region that was earlier left unprotected - # TODO(R>=3.5.0): no need to branch here for gc(full=TRUE) - { if ("full" %in% names(formals(gc))) gc(full = TRUE) else gc(); replicate(10, FALSE); x<4 }, + { gc(full = TRUE); replicate(10, FALSE); x<4 }, `attr<-`(list(3), "class", "foo")), structure(list(1, 2, 3), class = "foo")) @@ -20813,8 +20806,8 @@ y = data.table(c=1, d=1L) test(2297.03, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .*no fractions.* to type integer.*Coercing .*c .*no fractions.* to type integer") test(2297.04, y[x, on=.(d == a, c == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .*no fractions.* to type integer.*Coercing .*c .*no fractions.* to type integer") # dates -d_int = `class<-`(1L, "Date") # TODO: update to .Date on R 3.5 -d_dbl = `class<-`(1, "Date") +d_int = .Date(1L) +d_dbl = .Date(1) x = data.table(a=d_int) y = data.table(c=d_int, d=d_dbl) test(2297.11, y[x, on=.(c == a, d == a)], data.table(c=d_int, d=d_int)) @@ -21651,13 +21644,10 @@ test(2339.02, lapply(DT, sqrt_dot_sym), list(a=1.0)) test(2339.03, DT[, lapply(.SD, function(...) sqrt(..1))], data.table(a=1.0)) test(2339.04, DT[, lapply(.SD, sqrt_dot_sym)], data.table(a=1.0)) sqrt_elt_sym = function(...) sqrt(...elt(1)) -# TODO(R>=3.5.0): run this unconditionally -if (!inherits(tryCatch(sqrt_elt_sym(1), error=identity), "error")) { - test(2339.05, lapply(DT, sqrt_elt_sym), list(a=1.0)) - test(2339.06, lapply(DT, function(...) sqrt(...elt(1L))), list(a=1.0)) - test(2339.07, DT[, lapply(.SD, sqrt_elt_sym)], data.table(a=1.0)) - test(2339.08, DT[, lapply(.SD, function(...) sqrt(...elt(1L)))], data.table(a=1.0)) -} +test(2339.05, lapply(DT, sqrt_elt_sym), list(a=1.0)) +test(2339.06, lapply(DT, function(...) sqrt(...elt(1L))), list(a=1.0)) +test(2339.07, DT[, lapply(.SD, sqrt_elt_sym)], data.table(a=1.0)) +test(2339.08, DT[, lapply(.SD, function(...) sqrt(...elt(1L)))], data.table(a=1.0)) # edge case of weird, though technically valid, names: # _not_ '..N' style and distinct from '...' # `123`='a'; DT[, ..123] doesn't work, but that's fine & easy to work around diff --git a/src/dogroups.c b/src/dogroups.c index 373242516f..5200d33f36 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -41,8 +41,7 @@ static bool anySpecialStatic(SEXP x, hashtab * specials) { // we restore the true truelength for when R starts to use vector truelength. SEXP attribs, list_el; const int n = length(x); - // use length() not LENGTH() because LENGTH() on NULL is segfault in R<3.5 where we still define USE_RINTERNALS - // (see data.table.h), and isNewList() is true for NULL + // use length() not LENGTH() because isNewList() is true for NULL if (n==0) return false; if (isVectorAtomic(x))