From 9c58f3e86d1593deb25eac6f5ad07624b3ce1be8 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Mon, 3 Mar 2025 14:33:03 +0300 Subject: [PATCH 1/7] Switch more tests to test(output=...) capture.output() tests are fine in many places, but these ones tested for messages that have since been translated and must be skipped in foreign more. --- inst/tests/tests.Rraw | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 91efa4e72..146e4683b 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -8286,8 +8286,8 @@ test(1579.03, dt[, lapply(.SD, median), keyby=x], dt[, lapply(.SD, function(x) median(as.numeric(x))), keyby=x]) test(1579.04, dt[, lapply(.SD, median, na.rm=TRUE), keyby=x], dt[, lapply(.SD, function(x) median(as.numeric(x), na.rm=TRUE)), keyby=x]) -ans = capture.output(dt[, lapply(.SD, median), by=x, verbose=TRUE]) -test(1579.05, any(grepl("GForce optimized", ans)), TRUE) +test(1579.05, dt[, lapply(.SD, median), by=x, verbose=TRUE], + output = "GForce optimized") # testing gforce::ghead and gforce::gtail # head(.SD, 1) and tail(.SD, 1) optimisation @@ -9280,11 +9280,9 @@ test(1633.4, dt[, .SD, by=1:nrow(dt)], data.table(nrow=1:nrow(dt), dt)) # make s # reuse secondary indices dt = data.table(x=sample(3, 10, TRUE), y=1:10) -v1 = capture.output(ans1 <- dt[.(3:2), on="x", verbose=TRUE]) +test(1634.1, dt[.(3:2), on="x", verbose=TRUE], output = "ad hoc") setindex(dt, x) -v2 = capture.output(ans2 <- dt[.(3:2), on="x", verbose=TRUE]) -test(1634.1, any(grepl("ad hoc", v1)), TRUE) -test(1634.2, any(grepl("existing index", v2)), TRUE) +test(1634.2, dt[.(3:2), on="x", verbose=TRUE], output = "existing index") # fread's fill argument detects separator better in complex cases as well, #1573 # if pasted to the console, these tests won't work. But do work when sourced as these are tabs not spaces in text @@ -11653,8 +11651,8 @@ test(1765.4, {warning("foobar1"); warning("foobar2"); warning("FOO"); 4L}, 4L, i test(1765.5, {warning("foobar1"); warning("foobar2"); warning("FOO"); 4L}, 4L, ignore.warning="2", warning=c("foobar1","FOO")) # print(null.data.table()) should not output NULL as well, #1852 -# use capture.output() in this case rather than output= to ensure NULL is not output -test(1766, capture.output(print(data.table(NULL))), "Null data.table (0 rows and 0 cols)") +# the ^, $ specifiers don't match embedded newlines (if any) so match will fail if NULL is also printed +test(1766, data.table(NULL), output = "^Null data\\.table \\(0 rows and 0 cols\\)$") # Bug on subset of 1-row data.table when expr returns a named logical vector #2152 options(datatable.auto.index=FALSE) @@ -11888,8 +11886,8 @@ test(1774.17, as.data.table(x, na.rm='a'), error="'na.rm' must be scalar") # verify print.keys works DT1 <- data.table(a = 1:3, key = "a") -test(1775.1, capture.output(print(DT1, print.keys = TRUE)), - c("Key: ", " a", "1: 1", "2: 2", "3: 3")) +test(1775.1, DT1, options = c(datatable.print.keys = TRUE), + output = c("Key: ", " a", "1: 1", "2: 2", "3: 3")) DT2 <- data.table(a = 1:3, b = 4:6) setindexv(DT2, c("b","a")) test(1775.2, print(DT2, print.keys = TRUE), @@ -13675,7 +13673,7 @@ test(1962.034, setkeyv(DT, c('a', '')), setkey(DT, a) test(1962.035, {setkeyv(DT, character(0L)); key(DT)}, NULL, warning = 'cols is a character vector of zero length') -test(1962.036, any(grepl('already ordered', capture.output(setkey(DT, a, verbose = TRUE))))) +test(1962.036, setkey(DT, a, verbose = TRUE), output = 'already ordered') setnames(DT, '.xi') setkey(DT, NULL) test(1962.037, setkey(DT, .xi), From b6d9685ea4d9e89a7a61ab8e43ca30b50eb80ed5 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Mon, 3 Mar 2025 16:12:10 +0300 Subject: [PATCH 2/7] Use notOutput= instead of capture.output() tests Where capture.output() was previously used to test the absence of messages, use notOutput= instead so that the test will be skipped. While the test succeeded anyway due to the captured output containing neither the original untranslated message (being matched) nor its translation (that could be printed if a regression happened), skipping the test is more fair. --- inst/tests/tests.Rraw | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 146e4683b..5579637dd 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -2031,8 +2031,8 @@ options(datatable.optimize = 2L) test(658.1, DT[ , mean(x), by=grp1, verbose=TRUE], output="GForce optimized j to.*gmean") test(658.2, DT[ , list(mean(x)), by=grp1, verbose=TRUE], output="GForce optimized j to.*gmean") test(658.3, DT[ , list(mean(x), mean(y)), by=grp1, verbose=TRUE], output="GForce optimized j to.*gmean") -tt = capture.output(DT[,list(mean(x),mean(y)),by=list(grp1,grp2),verbose=TRUE]) -test(659, !length(grep("Wrote less rows", tt))) # first group is one row with this seed. Ensure we treat this as aggregate case rather than allocate too many rows. +# first group is one row with this seed. Ensure we treat this as aggregate case rather than allocate too many rows. +test(659, DT[,list(mean(x),mean(y)),by=list(grp1,grp2),verbose=TRUE], notOutput = "Wrote less rows") # Test .N for logical i subset DT = data.table(a=1:10, b=rnorm(10)) @@ -2040,8 +2040,10 @@ test(660, DT[a==8L, .N], 1L) # Test that growing is sensible in worst case DT = data.table(a=rep(1:10,1:10),b=rnorm(55)) -tt = capture.output(DT[,sum(b)*b,by=a,verbose=TRUE]) -test(661, length(grep("growing from",tt))<3) # was 6 when we simply grew enough for latest result +test(661, DT[,sum(b)*b,by=a,verbose=TRUE], + notOutput = paste(rep("growing from", 3), collapse = ".*")) +# Need to test that "growing from" is printed less than three times. +# The number was previously 6 when we simply grew enough for latest result # Test that adding a new logical column is supported, #2094 DT=data.table(a=1:3) @@ -10246,13 +10248,13 @@ options("datatable.use.index"=TRUE, "datatable.auto.index"=TRUE) test(1666.01, d[k==1L, verbose=TRUE], d[3L], output="Creating new index 'k'") d = data.table(k=3:1) options("datatable.use.index"=TRUE, "datatable.auto.index"=FALSE) -test(1666.02, grep("Creating new index", capture.output(d[k==1L, verbose=TRUE])), integer(0)) # do not create index +test(1666.02, d[k==1L, verbose=TRUE], notOutput="Creating new index") # do not create index d = data.table(k=3:1) options("datatable.use.index"=FALSE, "datatable.auto.index"=FALSE) -test(1666.03, grep("Creating new index", capture.output(d[k==1L, verbose=TRUE])), integer(0)) +test(1666.03, d[k==1L, verbose=TRUE], notOutput="Creating new index") d = data.table(k=3:1) options("datatable.use.index"=FALSE, "datatable.auto.index"=TRUE) -test(1666.04, grep("Creating new index", capture.output(d[k==1L, verbose=TRUE])), integer(0)) +test(1666.04, d[k==1L, verbose=TRUE], notOutput="Creating new index") d = data.table(k=3:1) # subset - index setindex(d, k) options("datatable.use.index"=TRUE, "datatable.auto.index"=TRUE) @@ -10260,9 +10262,9 @@ test(1666.05, d[k==1L, verbose=TRUE], d[3L], output="Optimized subsetting with i options("datatable.use.index"=TRUE, "datatable.auto.index"=FALSE) test(1666.06, d[k==1L, verbose=TRUE], d[3L], output="Optimized subsetting with index 'k'") options("datatable.use.index"=FALSE, "datatable.auto.index"=FALSE) -test(1666.07, grep("Using existing index", capture.output(d[k==1L, verbose=TRUE])), integer(0)) # not using existing index +test(1666.07, d[k==1L, verbose=TRUE], notOutput="Using existing index") # not using existing index options("datatable.use.index"=FALSE, "datatable.auto.index"=TRUE) -test(1666.08, grep("Using existing index", capture.output(d[k==1L, verbose=TRUE])), integer(0)) +test(1666.08, d[k==1L, verbose=TRUE], notOutput="Using existing index") d1 = data.table(k=3:1) # join - no index d2 = data.table(k=2:4) options("datatable.use.index"=TRUE, "datatable.auto.index"=TRUE) @@ -10270,9 +10272,9 @@ test(1666.09, d1[d2, on="k", verbose=TRUE], d1[d2, on="k"], output="ad hoc") options("datatable.use.index"=TRUE, "datatable.auto.index"=FALSE) test(1666.10, d1[d2, on="k", verbose=TRUE], d1[d2, on="k"], output="ad hoc") options("datatable.use.index"=FALSE, "datatable.auto.index"=FALSE) -test(1666.11, grep("Looking for existing (secondary) index", capture.output(d1[d2, on="k", verbose=TRUE])), integer(0)) # not looking for index +test(1666.11, d1[d2, on="k", verbose=TRUE], notOutput="Looking for existing (secondary) index") # not looking for index options("datatable.use.index"=FALSE, "datatable.auto.index"=TRUE) -test(1666.12, grep("Looking for existing (secondary) index", capture.output(d1[d2, on="k", verbose=TRUE])), integer(0)) +test(1666.12, d1[d2, on="k", verbose=TRUE], notOutput="Looking for existing (secondary) index") d1 = data.table(k=3:1,v1=10:12) # join - index d2 = data.table(k=2:4,v2=20:22) setindex(d1, k) @@ -16319,7 +16321,7 @@ test(2094.02, X$TAG, rep(names(x), each = 2)) # use arbitrary column without message when fun.aggregate=length, #2980 DT = data.table(a=c(3L, 3L, 2L, 9L, 5L, 10L, 3L, 2L, 9L, 8L), b=rep(1:5, 2)) -test(2095, any(grepl('override', capture.output(dcast(DT, a~b, fun.aggregate=length)), fixed=TRUE)), FALSE) +test(2095, dcast(DT, a~b, fun.aggregate=length), notOutput='override') # gmean intermediate can overflow integers without warning, #986 test(2096, data.table(a=c(1L,1L), v=c(2e9L, 2e9L))[, mean(v), a], data.table(a=1L, V1=2e9)) From ed267006f3abcf59e0fa01418b2b3461d13a98f0 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Mon, 10 Mar 2025 19:32:37 +0300 Subject: [PATCH 3/7] test.data.table(): count skipped message tests When working in foreign mode, count the occurrences of tests for message content (which are essentially skipped) and report the number if it's non-zero. --- R/test.data.table.R | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/R/test.data.table.R b/R/test.data.table.R index 7d6bc58e1..b4c7a6aa3 100644 --- a/R/test.data.table.R +++ b/R/test.data.table.R @@ -130,6 +130,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F } assign("foreign", foreign, envir=env) assign("nfail", 0L, envir=env) + assign("nskip", 0L, envir=env) assign("ntest", 0L, envir=env) assign("prevtest", -1L, envir=env) assign("whichfail", NULL, envir=env) @@ -282,6 +283,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F nfail = env$nfail ntest = env$ntest + nskip = env$nskip if (nfail > 0L) { # nocov start stopf( @@ -331,6 +333,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F get("mtext")(lastRSS, side=4L, at=lastRSS, las=1L, font=2L) } + if (foreign && nskip > 0L) catf("Skipped %d tests for translated messages. ", nskip) # nocov catf("All %d tests (last %.8g) in %s completed ok in %s\n", ntest, env$prevtest, names(fn), timetaken(env$started.at)) ans = nfail==0L attr(ans, "timings") = timings # as attr to not upset callers who expect a TRUE/FALSE result @@ -409,6 +412,7 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no memtest.id = get("memtest.id", parent.frame()) filename = get("filename", parent.frame()) foreign = get("foreign", parent.frame()) + nskip = get("nskip", parent.frame()) showProgress = get("showProgress", parent.frame()) time = nTest = RSS = NULL # to avoid 'no visible binding' note # TODO(R>=4.0.2): Use add=TRUE up-front in on.exit() once non-positional arguments are supported. @@ -451,6 +455,11 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no length(grep(x, y, fixed=TRUE)) || # try treating x as literal first; useful for most messages containing ()[]+ characters length(tryCatch(grep(x, y, ignore.case=ignore.case), error=function(e)NULL)) # otherwise try x as regexp } + if (foreign && .test.data.table && ( + length(error) || length(warning) || length(message) || length(output) || + length(notOutput) || length(ignore.warning) + )) + assign("nskip", nskip+1L, parent.frame(), inherits=TRUE) # nocov xsub = substitute(x) ysub = substitute(y) From 3ffc01339da61c199a4c0bb5a3fb6e707b6116bb Mon Sep 17 00:00:00 2001 From: Ivan K Date: Sat, 26 Jul 2025 11:36:22 +0300 Subject: [PATCH 4/7] test 1590.14: don't assume 1252 if not UTF-8 --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 5579637dd..e6d0fd04f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -8541,7 +8541,7 @@ if (x1==x2) { # NB: x1==x2 is a condition in base R, independent of data.table test(1590.12, forderv( c(x2,x1,x1,x2)), integer()) # don't test base ... test(1590.13, base::order(c(x2,x1,x1,x2)), 1:4) -} else { +} else if (identical(l10n_info()$codepage, 1252L)) { # Windows-1252, #2856 test(1590.14, forderv( c(x2,x1,x1,x2)), INT(1,4,2,3)) # don't test base ... test(1590.15, base::order(c(x2,x1,x1,x2)), INT(1,4,2,3)) From dfb67d9c4c7202c47c15e6f74cf8cc30b3651f36 Mon Sep 17 00:00:00 2001 From: Ivan K Date: Mon, 15 Dec 2025 23:54:42 +0300 Subject: [PATCH 5/7] Elaborate on what 1252 is --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index e6d0fd04f..102e44cda 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -8542,7 +8542,7 @@ if (x1==x2) { test(1590.12, forderv( c(x2,x1,x1,x2)), integer()) # don't test base ... test(1590.13, base::order(c(x2,x1,x1,x2)), 1:4) } else if (identical(l10n_info()$codepage, 1252L)) { - # Windows-1252, #2856 + # US English ANSI codepage on Windows is CP1252, #2856 test(1590.14, forderv( c(x2,x1,x1,x2)), INT(1,4,2,3)) # don't test base ... test(1590.15, base::order(c(x2,x1,x1,x2)), INT(1,4,2,3)) } From b0f41d582ea0857c44e78f80c91dea12a67d82af Mon Sep 17 00:00:00 2001 From: Ivan K Date: Tue, 16 Dec 2025 00:55:08 +0300 Subject: [PATCH 6/7] Better test for not printing NULL --- inst/tests/tests.Rraw | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 102e44cda..9cb69fed6 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11653,8 +11653,7 @@ test(1765.4, {warning("foobar1"); warning("foobar2"); warning("FOO"); 4L}, 4L, i test(1765.5, {warning("foobar1"); warning("foobar2"); warning("FOO"); 4L}, 4L, ignore.warning="2", warning=c("foobar1","FOO")) # print(null.data.table()) should not output NULL as well, #1852 -# the ^, $ specifiers don't match embedded newlines (if any) so match will fail if NULL is also printed -test(1766, data.table(NULL), output = "^Null data\\.table \\(0 rows and 0 cols\\)$") +test(1766, data.table(NULL), notOutput = "NULL$") # Bug on subset of 1-row data.table when expr returns a named logical vector #2152 options(datatable.auto.index=FALSE) From 01857cba4299fe6bcf735c9590da9a2de1feff9a Mon Sep 17 00:00:00 2001 From: Ivan K Date: Tue, 16 Dec 2025 01:10:47 +0300 Subject: [PATCH 7/7] Restore test 1775.1 for print() options Instead, test substitute(x) for %iscall% "print" to decide whether to print the x before testing the output. --- R/test.data.table.R | 6 +++++- inst/tests/tests.Rraw | 12 ++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/R/test.data.table.R b/R/test.data.table.R index b4c7a6aa3..c26450e25 100644 --- a/R/test.data.table.R +++ b/R/test.data.table.R @@ -487,7 +487,11 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no # save the overhead of capture.output() since there are a lot of tests, often called in loops # Thanks to tryCatch2 by Jan here : https://github.com/jangorecki/logR/blob/master/R/logR.R#L21 } else { - out = capture.output(print(x <- suppressMessages(withCallingHandlers(tryCatch(x, error=eHandler), warning=wHandler, message=mHandler)))) + out = if (xsub %iscall% "print") { + capture.output(x <- suppressMessages(withCallingHandlers(tryCatch(x, error=eHandler), warning=wHandler, message=mHandler))) + } else { + capture.output(print(x <- suppressMessages(withCallingHandlers(tryCatch(x, error=eHandler), warning=wHandler, message=mHandler)))) + } } if (!is.null(options)) { # some of the options passed to test() may break internal data.table use below (e.g. invalid datatable.alloccol), so undo them ASAP diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 9cb69fed6..c948f34f0 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11887,7 +11887,7 @@ test(1774.17, as.data.table(x, na.rm='a'), error="'na.rm' must be scalar") # verify print.keys works DT1 <- data.table(a = 1:3, key = "a") -test(1775.1, DT1, options = c(datatable.print.keys = TRUE), +test(1775.1, print(DT1, print.keys = TRUE), output = c("Key: ", " a", "1: 1", "2: 2", "3: 3")) DT2 <- data.table(a = 1:3, b = 4:6) setindexv(DT2, c("b","a")) @@ -16704,10 +16704,10 @@ test(2125.05, print(DT, trunc.cols=TRUE, class=TRUE, row.names=FALSE), "1 variable not shown: \\[d \\]")) test(2125.06, print(DT, trunc.cols=TRUE, col.names="none"), output=c("^ 1: 0 bbbbbbbbbbbbb ccccccccccccc", ".*", - "1 variable not shown: \\[d\\]", "")) + "1 variable not shown: \\[d\\]$")) test(2125.07, print(DT, trunc.cols=TRUE, class=TRUE, col.names="none"), output=c("^ 1: 0 bbbbbbbbbbbbb", ".*", - "2 variables not shown: \\[c, d\\]", ""), + "2 variables not shown: \\[c, d\\]$"), warning = "Column classes will be suppressed when col.names is 'none'") options("width" = 20) DT = data.table(a = vector("integer", 2), @@ -21490,11 +21490,11 @@ test(2328.2, droplevels(DT), data.table(f=factor(), i=integer(), f2=factor())) #6882 print() output with col.names="none" dt = data.table(short = 1:3, verylongcolumnname = 4:6) -test(2329.1, print(dt, col.names = "none"), output = "1: 1 4\n2: 2 5\n3: 3 6\n") +test(2329.1, print(dt, col.names = "none"), output = "1: 1 4\n2: 2 5\n3: 3 6$") dt = data.table(x = 123456, y = "wide_string") -test(2329.2, print(dt, col.names = "none"), output = "1: 123456 wide_string\n") +test(2329.2, print(dt, col.names = "none"), output = "1: 123456 wide_string$") dt = data.table(a = NA_integer_, b = NaN) -test(2329.3, print(dt, col.names = "none"), output = "1: NA NaN\n") +test(2329.3, print(dt, col.names = "none"), output = "1: NA NaN$") # Row name extraction from multiple vectors, #7136 x <- 1:3