Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 47 additions & 39 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -1220,11 +1220,51 @@ replace_dot_alias = function(e) {
m[is.na(m)] = ncol(x)+seq_along(newnames)
cols = as.integer(m)
# don't pass verbose to selfrefok here -- only activated when
# ok=-1 which will trigger setalloccol with verbose after
# the jval = eval(jsub, ...)
# ok=-1 which will trigger setalloccol with verbose in the next branch
# if a change in the number of columns is suspected
if (ok==0L) # ok==0 so no warning when loaded from disk (-1) [-1 considered TRUE by R]
if (is.data.table(x)) warningf("A shallow copy of this data.table was taken so that := can add or remove %d columns by reference. At an earlier point, this data.table was copied by R (or was created manually using structure() or similar). Avoid names<- and attr<- which in R currently (and oddly) may copy the whole data.table. Use set* syntax instead to avoid copying: ?set, ?setnames and ?setattr. It's also not unusual for data.table-agnostic packages to produce tables affected by this issue. If this message doesn't help, please report your use case to the data.table issue tracker so the root cause can be fixed or this message improved.", length(newnames))
}
# ok <- selfrefok above called without verbose -- only activated when
# ok=-1 which will trigger setalloccol with verbose in the next
# branch, which again calls _selfrefok and returns the message then
# !is.data.table for DF |> DT(,:=) tests 2212.16-19 (#5113) where a shallow copy is routine for data.frame
if (
(
!is.null(newnames) || # adding new columns
is.null(jsub) || (jsub %iscall% "list" && any(vapply_1b(jsub[-1], is.null))) # removing columns
) && (
(ok<1L) || # unsafe to resize
(truelength(x) < ncol(x)+length(newnames)) # not enough space for new columns
)
) {
DT = x # in case getOption contains "ncol(DT)" as it used to. TODO: warn and then remove
n = length(newnames) + eval(getOption("datatable.alloccol")) # TODO: warn about expressions and then drop the eval()
# i.e. reallocate at the size as if the new columns were added followed by setalloccol().
name = substitute(x)
if (is.name(name) && ok && verbose) { # && NAMED(x)>0 (TO DO) # ok here includes -1 (loaded from disk)
catf("Growing vector of column pointers from truelength %d to %d. A shallow copy has been taken, see ?setalloccol. Only a potential issue if two variables point to the same data (we can't yet detect that well) and if not you can safely ignore this. To avoid this message you could setalloccol() first, deep copy first using copy(), wrap with suppressWarnings() or increase the 'datatable.alloccol' option.\n", truelength(x), n)
# #1729 -- copying to the wrong environment here can cause some confusion
if (ok == -1L) catf("Note that the shallow copy will assign to the environment from which := was called. That means for example that if := was called within a function, the original table may be unaffected.\n")

# Verbosity should not issue warnings, so cat rather than warning.
# TO DO: Add option 'datatable.pedantic' to turn on warnings like this.

# TO DO ... comments moved up from C ...
# Note that the NAMED(dt)>1 doesn't work because .Call
# always sets to 2 (see R-ints), it seems. Work around
# may be possible but not yet working. When the NAMED test works, we can drop allocwarn argument too
# because that's just passed in as FALSE from [<- where we know `*tmp*` isn't really NAMED=2.
# Note also that this growing will happen for missing columns assigned NULL, too. But so rare, we
# don't mind.
}
setalloccol(x, n, verbose=verbose) # always assigns to calling scope; i.e. this scope
if (is.name(name)) {
assign(as.character(name),x,parent.frame(),inherits=TRUE)
} else if (.is_simple_extraction(name)) {
.reassign_extracted_table(name, x)
} # TO DO: else if env$<- or list$<-
}
}
}

Expand Down Expand Up @@ -1400,45 +1440,13 @@ replace_dot_alias = function(e) {
} else if (is.numeric(lhs)) {
lhs = names_x[m]
}
# ok <- selfrefok above called without verbose -- only activated when
# ok=-1 which will trigger setalloccol with verbose in the next
# branch, which again calls _selfrefok and returns the message then
# !is.data.table for DF |> DT(,:=) tests 2212.16-19 (#5113) where a shallow copy is routine for data.frame
if (
(
!is.null(newnames) || # adding new columns
is.null(jval) || (is.list(jval) && any(vapply_1b(jval, is.null))) # removing columns
) && (
(ok<1L) || # unsafe to resize
(truelength(x) < ncol(x)+length(newnames)) # not enough space for new columns
)
) {
DT = x # in case getOption contains "ncol(DT)" as it used to. TODO: warn and then remove
n = length(newnames) + eval(getOption("datatable.alloccol")) # TODO: warn about expressions and then drop the eval()
# i.e. reallocate at the size as if the new columns were added followed by setalloccol().
# cater for deleting columns by assigning NULL
if ((is.null(jval) || (is.list(jval) && any(vapply_1b(jval, is.null)))) && selfrefok(x, verbose=FALSE) < 1L) {
name = substitute(x)
if (is.name(name) && ok && verbose) { # && NAMED(x)>0 (TO DO) # ok here includes -1 (loaded from disk)
catf("Growing vector of column pointers from truelength %d to %d. A shallow copy has been taken, see ?setalloccol. Only a potential issue if two variables point to the same data (we can't yet detect that well) and if not you can safely ignore this. To avoid this message you could setalloccol() first, deep copy first using copy(), wrap with suppressWarnings() or increase the 'datatable.alloccol' option.\n", truelength(x), n)
# #1729 -- copying to the wrong environment here can cause some confusion
if (ok == -1L) catf("Note that the shallow copy will assign to the environment from which := was called. That means for example that if := was called within a function, the original table may be unaffected.\n")

# Verbosity should not issue warnings, so cat rather than warning.
# TO DO: Add option 'datatable.pedantic' to turn on warnings like this.

# TO DO ... comments moved up from C ...
# Note that the NAMED(dt)>1 doesn't work because .Call
# always sets to 2 (see R-ints), it seems. Work around
# may be possible but not yet working. When the NAMED test works, we can drop allocwarn argument too
# because that's just passed in as FALSE from [<- where we know `*tmp*` isn't really NAMED=2.
# Note also that this growing will happen for missing columns assigned NULL, too. But so rare, we
# don't mind.
}
setalloccol(x, n, verbose=verbose) # always assigns to calling scope; i.e. this scope
setalloccol(x, verbose=FALSE)
if (is.name(name)) {
assign(as.character(name),x,parent.frame(),inherits=TRUE)
} else if (.is_simple_extraction(name)) {
.reassign_extracted_table(name, x)
} # TO DO: else if env$<- or list$<-
assign(as.character(name), x, parent.frame(), inherits=TRUE)
}
}
Copy link
Member Author

@aitap aitap Dec 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you! With this commit, the 13 packages flagged for this issue pass their R CMD checks with nothing worse than NOTE.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice! I guess we need to battle test this throughly but as a bandaid it should hold

# TODO?: use set() here now that it can add new columns. Then remove newnames and alloc logic above.
.Call(Cassign,x,irows,cols,newnames,jval)
Expand Down
5 changes: 5 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -21916,3 +21916,8 @@ DT = suppressMessages(rbindlist(DTl)) # used to crash
test(2352.1, dim(DT), c(300002L, 1L))
test(2352.2, DT[[1]], rep(42, nrow(DT)))
rm(DTn, DTl, DT)

# insert columns by group in freshly unserialized data.tables, #7498
DT = unserialize(serialize(as.data.table(mtcars), NULL))
test(2353, DT[,foo:=mean(mpg),by=cyl], as.data.table(mtcars)[,foo:=mean(mpg),by=cyl])
rm(DT)
Loading