From b972e136e17a07d43a965bfb12f3e5b67a70434a Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Sun, 12 Sep 2021 00:49:34 +0200 Subject: [PATCH 01/30] init topn --- NAMESPACE | 2 + R/wrappers.R | 2 + src/data.table.h | 3 ++ src/init.c | 2 + src/topn.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 108 insertions(+) create mode 100644 src/topn.c diff --git a/NAMESPACE b/NAMESPACE index 260b7a7af4..0736912263 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -201,3 +201,5 @@ S3method(format_list_item, default) export(fdroplevels) S3method(droplevels, data.table) + +export(topn) diff --git a/R/wrappers.R b/R/wrappers.R index dcf8ba08e5..3fb13ea496 100644 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -16,3 +16,5 @@ isRealReallyInt = function(x) .Call(CisRealReallyIntR, x) isReallyReal = function(x) .Call(CisReallyReal, x) coerceAs = function(x, as, copy=TRUE) .Call(CcoerceAs, x, as, copy) + +topn = function(x, n=length(x), na.last=TRUE, decreasing=FALSE) .Call(Ctopn, x, as.integer(n), na.last, decreasing) diff --git a/src/data.table.h b/src/data.table.h index 2ba639a64a..2b9a4b491d 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -258,3 +258,6 @@ int dt_win_snprintf(char *dest, size_t n, const char *fmt, ...); // programming.c SEXP substitute_call_arg_namesR(SEXP expr, SEXP env); + +// topn.c +SEXP topn(SEXP x, SEXP nArg, SEXP ascArg, SEXP naArg); diff --git a/src/init.c b/src/init.c index 56bf66d419..97602e9ebd 100644 --- a/src/init.c +++ b/src/init.c @@ -128,6 +128,7 @@ SEXP allNAR(); SEXP test_dt_win_snprintf(); SEXP dt_zlib_version(); SEXP startsWithAny(); +SEXP topn(); // .Externals SEXP fastmean(); @@ -225,6 +226,7 @@ R_CallMethodDef callMethods[] = { {"Cdt_zlib_version", (DL_FUNC)&dt_zlib_version, -1}, {"Csubstitute_call_arg_namesR", (DL_FUNC) &substitute_call_arg_namesR, -1}, {"CstartsWithAny", (DL_FUNC)&startsWithAny, -1}, +{"Ctopn", (DL_FUNC)&topn, -1}, {NULL, NULL, 0} }; diff --git a/src/topn.c b/src/topn.c new file mode 100644 index 0000000000..221f0e9565 --- /dev/null +++ b/src/topn.c @@ -0,0 +1,99 @@ +#include "data.table.h" + +static inline void swap(int *a, int *b) { int tmp=*a; *a=*b; *b=tmp; } + +static inline bool icmp(const int a, const int b, const bool min, const bool nalast) { + if (a==NA_INTEGER) return(nalast); + if (b==NA_INTEGER) return(!nalast); + return(min ? a < b : a > b); +} +static inline bool dcmp(const double a, const double b, const bool min, const bool nalast) { + if (isnan(a)) return(nalast); + if (isnan(b)) return(!nalast); + return(min ? a < b : a > b); +} + +static inline bool i64cmp(const int64_t a, const int64_t b, const bool min, const bool nalast) { + if (a==NA_INTEGER64) return(nalast); + if (b==NA_INTEGER64) return(!nalast); + return(min ? a < b : a > b); +} + +static inline bool scmp(SEXP a, SEXP b, const bool min, bool nalast) { + if (a==NA_STRING) return(nalast); + if (b==NA_STRING) return(!nalast); + return(min ? strcmp(CHAR(a),CHAR(b))<0 : strcmp(CHAR(a),CHAR(b)))>0; +} + +// compare value with both childs and sift value down if child value smaller +// than parent (for minheap) +#undef SIFT +#define SIFT(CMP) { \ + int smallest, l, r; \ + while(true) { \ + smallest = k; \ + l = (k << 1) + 1; \ + r = (k << 1) + 2; \ + if (l < len && CMP(VAL[IND[l]],VAL[IND[smallest]],min,nalast)) \ + smallest = l; \ + if (r < len && CMP(VAL[IND[r]],VAL[IND[smallest]],min,nalast)) \ + smallest = r; \ + if (smallest != k) { \ + swap(&IND[k], &IND[smallest]); \ + k = smallest; \ + } else { \ + break; \ + } \ + } \ +} + +// for finding decreasing topn build minheap and add values if they exceed +// minimum by overwriting minimum and following down sifting +#undef TOPN +#define TOPN(CTYPE, RTYPE, CMP) { \ + const CTYPE *restrict VAL = (const CTYPE *)RTYPE(x); \ + for (int i=n/2; i>=0; --i) { k=i; len=n; SIFT(CMP); } \ + for (int i=n; i 0.")); + if (!IS_TRUE_OR_FALSE(ascArg)) error(_("%s must be TRUE or FALSE"), "decreasing"); + if (!IS_TRUE_OR_FALSE(naArg)) error(_("%s must be TRUE or FALSE"), "na.last"); + + const int xlen = LENGTH(x); + const int n = INTEGER(nArg)[0]; + if (n > xlen) error(_("TODO")); + + const bool min = LOGICAL(ascArg)[0]; + const bool nalast = LOGICAL(naArg)[0]; + + SEXP ans; + int k, len; + ans = PROTECT(allocVector(INTSXP, n)); + int *restrict ians = INTEGER(ans); + int *restrict IND = malloc(n*sizeof(int)); + for (int i=0; i Date: Mon, 13 Sep 2021 10:38:31 +0200 Subject: [PATCH 02/30] make stable --- src/topn.c | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/src/topn.c b/src/topn.c index 221f0e9565..e667c07e93 100644 --- a/src/topn.c +++ b/src/topn.c @@ -2,27 +2,31 @@ static inline void swap(int *a, int *b) { int tmp=*a; *a=*b; *b=tmp; } -static inline bool icmp(const int a, const int b, const bool min, const bool nalast) { - if (a==NA_INTEGER) return(nalast); - if (b==NA_INTEGER) return(!nalast); - return(min ? a < b : a > b); +static inline bool icmp(const int *x, const int a, const int b, const bool min, const bool nalast) { + if (x[a]==x[b]) return(a > b); + if (x[a]==NA_INTEGER) return(nalast); + if (x[b]==NA_INTEGER) return(!nalast); + return(min ? x[a] < x[b] : x[a] > x[b]); } -static inline bool dcmp(const double a, const double b, const bool min, const bool nalast) { - if (isnan(a)) return(nalast); - if (isnan(b)) return(!nalast); - return(min ? a < b : a > b); +static inline bool dcmp(const double *x, const int a, const int b, const bool min, const bool nalast) { + if (x[a]==x[b] || isnan(x[a]) && isnan(x[b])) return(a > b); + if (isnan(x[a])) return(nalast); + if (isnan(x[b])) return(!nalast); + return(min ? x[a] < x[b] : x[a] > x[b]); } -static inline bool i64cmp(const int64_t a, const int64_t b, const bool min, const bool nalast) { - if (a==NA_INTEGER64) return(nalast); - if (b==NA_INTEGER64) return(!nalast); - return(min ? a < b : a > b); +static inline bool i64cmp(const int64_t *x, const int a, const int b, const bool min, const bool nalast) { + if (x[a]==x[b]) return(a > b); + if (x[a]==NA_INTEGER64) return(nalast); + if (x[b]==NA_INTEGER64) return(!nalast); + return(min ? x[a] < x[b] : x[a] > x[b]); } -static inline bool scmp(SEXP a, SEXP b, const bool min, bool nalast) { - if (a==NA_STRING) return(nalast); - if (b==NA_STRING) return(!nalast); - return(min ? strcmp(CHAR(a),CHAR(b))<0 : strcmp(CHAR(a),CHAR(b)))>0; +static inline bool scmp(const SEXP *restrict x, const int a, const int b, const bool min, bool nalast) { + if (strcmp(CHAR(x[a]), CHAR(x[b])) == 0) return (a > b); + if (x[a]==NA_STRING) return(nalast); + if (x[b]==NA_STRING) return(!nalast); + return(min ? strcmp(CHAR(x[a]),CHAR(x[b]))<0 : strcmp(CHAR(x[a]),CHAR(x[b])))>0; } // compare value with both childs and sift value down if child value smaller @@ -34,9 +38,9 @@ static inline bool scmp(SEXP a, SEXP b, const bool min, bool nalast) { smallest = k; \ l = (k << 1) + 1; \ r = (k << 1) + 2; \ - if (l < len && CMP(VAL[IND[l]],VAL[IND[smallest]],min,nalast)) \ + if (l < len && CMP(VAL,IND[l],IND[smallest],min,nalast)) \ smallest = l; \ - if (r < len && CMP(VAL[IND[r]],VAL[IND[smallest]],min,nalast)) \ + if (r < len && CMP(VAL,IND[r],IND[smallest],min,nalast)) \ smallest = r; \ if (smallest != k) { \ swap(&IND[k], &IND[smallest]); \ @@ -54,7 +58,7 @@ static inline bool scmp(SEXP a, SEXP b, const bool min, bool nalast) { const CTYPE *restrict VAL = (const CTYPE *)RTYPE(x); \ for (int i=n/2; i>=0; --i) { k=i; len=n; SIFT(CMP); } \ for (int i=n; i Date: Tue, 14 Sep 2021 00:03:42 +0200 Subject: [PATCH 03/30] add complex --- src/topn.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/topn.c b/src/topn.c index e667c07e93..c7c3679a68 100644 --- a/src/topn.c +++ b/src/topn.c @@ -22,13 +22,24 @@ static inline bool i64cmp(const int64_t *x, const int a, const int b, const bool return(min ? x[a] < x[b] : x[a] > x[b]); } -static inline bool scmp(const SEXP *restrict x, const int a, const int b, const bool min, bool nalast) { +static inline bool scmp(const SEXP *restrict x, const int a, const int b, const bool min, const bool nalast) { if (strcmp(CHAR(x[a]), CHAR(x[b])) == 0) return (a > b); if (x[a]==NA_STRING) return(nalast); if (x[b]==NA_STRING) return(!nalast); return(min ? strcmp(CHAR(x[a]),CHAR(x[b]))<0 : strcmp(CHAR(x[a]),CHAR(x[b])))>0; } +static inline bool ccmp(const Rcomplex *x, const int a, const int b, const bool min, const bool nalast) { + if ((isnan(x[a].r) || isnan(x[a].i)) && (isnan(x[b].r) || isnan(x[b].i))) return (a > b); + if (x[a].r==x[b].r) { + if (x[a].i==x[b].i) return(a > b); + return(min ? x[a].i < x[b].i : x[a].i > x[b].i); + } + if (isnan(x[a].r) || isnan(x[a].i)) return(nalast); + if (isnan(x[b].r) || isnan(x[b].i)) return(!nalast); + return(min ? x[a].r < x[b].r : x[a].r > x[b].r); +} + // compare value with both childs and sift value down if child value smaller // than parent (for minheap) #undef SIFT @@ -40,7 +51,7 @@ static inline bool scmp(const SEXP *restrict x, const int a, const int b, const r = (k << 1) + 2; \ if (l < len && CMP(VAL,IND[l],IND[smallest],min,nalast)) \ smallest = l; \ - if (r < len && CMP(VAL,IND[r],IND[smallest],min,nalast)) \ + if (r < len && CMP(VAL,IND[r],IND[smallest],min,nalast)) \ smallest = r; \ if (smallest != k) { \ swap(&IND[k], &IND[smallest]); \ @@ -90,11 +101,12 @@ SEXP topn(SEXP x, SEXP nArg, SEXP naArg, SEXP ascArg) { int *restrict IND = malloc(n*sizeof(int)); for (int i=0; i Date: Wed, 15 Sep 2021 23:27:01 +0200 Subject: [PATCH 04/30] rename index array --- src/topn.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/topn.c b/src/topn.c index c7c3679a68..1c5c6a490b 100644 --- a/src/topn.c +++ b/src/topn.c @@ -48,13 +48,13 @@ static inline bool ccmp(const Rcomplex *x, const int a, const int b, const bool while(true) { \ smallest = k; \ l = (k << 1) + 1; \ - r = (k << 1) + 2; \ - if (l < len && CMP(VAL,IND[l],IND[smallest],min,nalast)) \ + r = l+1; \ + if (l < len && CMP(VAL,INDEX[l],INDEX[smallest],min,nalast)) \ smallest = l; \ - if (r < len && CMP(VAL,IND[r],IND[smallest],min,nalast)) \ + if (r < len && CMP(VAL,INDEX[r],INDEX[smallest],min,nalast)) \ smallest = r; \ if (smallest != k) { \ - swap(&IND[k], &IND[smallest]); \ + swap(&INDEX[k], &INDEX[smallest]); \ k = smallest; \ } else { \ break; \ @@ -69,17 +69,17 @@ static inline bool ccmp(const Rcomplex *x, const int a, const int b, const bool const CTYPE *restrict VAL = (const CTYPE *)RTYPE(x); \ for (int i=n/2; i>=0; --i) { k=i; len=n; SIFT(CMP); } \ for (int i=n; i xlen) error(_("TODO")); + if (n > xlen) error(_("n must be smaller or equal than length(x) but provided n=%d and length(x)=%d"), n, xlen); const bool min = LOGICAL(ascArg)[0]; const bool nalast = LOGICAL(naArg)[0]; @@ -98,8 +98,8 @@ SEXP topn(SEXP x, SEXP nArg, SEXP naArg, SEXP ascArg) { int k, len; ans = PROTECT(allocVector(INTSXP, n)); int *restrict ians = INTEGER(ans); - int *restrict IND = malloc(n*sizeof(int)); - for (int i=0; i Date: Fri, 17 Sep 2021 10:59:49 +0200 Subject: [PATCH 05/30] ISNAN_COMPLEX --- src/topn.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/topn.c b/src/topn.c index 1c5c6a490b..3c9bb53f23 100644 --- a/src/topn.c +++ b/src/topn.c @@ -30,13 +30,13 @@ static inline bool scmp(const SEXP *restrict x, const int a, const int b, const } static inline bool ccmp(const Rcomplex *x, const int a, const int b, const bool min, const bool nalast) { - if ((isnan(x[a].r) || isnan(x[a].i)) && (isnan(x[b].r) || isnan(x[b].i))) return (a > b); + if (ISNAN_COMPLEX(x[a]) && ISNAN_COMPLEX(x[b])) return (a > b); if (x[a].r==x[b].r) { if (x[a].i==x[b].i) return(a > b); return(min ? x[a].i < x[b].i : x[a].i > x[b].i); } - if (isnan(x[a].r) || isnan(x[a].i)) return(nalast); - if (isnan(x[b].r) || isnan(x[b].i)) return(!nalast); + if (ISNAN_COMPLEX(x[a])) return(nalast); + if (ISNAN_COMPLEX(x[b])) return(!nalast); return(min ? x[a].r < x[b].r : x[a].r > x[b].r); } @@ -88,8 +88,11 @@ SEXP topn(SEXP x, SEXP nArg, SEXP naArg, SEXP ascArg) { if (!IS_TRUE_OR_FALSE(naArg)) error(_("%s must be TRUE or FALSE"), "na.last"); const int xlen = LENGTH(x); - const int n = INTEGER(nArg)[0]; - if (n > xlen) error(_("n must be smaller or equal than length(x) but provided n=%d and length(x)=%d"), n, xlen); + int n = INTEGER(nArg)[0]; + if (n > xlen) { + warning(_("n should be smaller or equal than length(x) but provided n=%d and length(x)=%d.\n Coercing n to length(x)."), n, xlen); + n = xlen; + } const bool min = LOGICAL(ascArg)[0]; const bool nalast = LOGICAL(naArg)[0]; From 1687cb6bc25d75628da44fff00b1b3d6b50afb7b Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Fri, 17 Sep 2021 12:53:51 +0200 Subject: [PATCH 06/30] added tests --- inst/tests/tests.Rraw | 45 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 9af8653531..ec6762eac1 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -61,6 +61,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) { split.data.table = data.table:::split.data.table if (!exists('startsWith', 'package:base', inherits=FALSE)) startsWith = data.table:::startsWith test = data.table:::test + topn = data.table:::topn uniqlengths = data.table:::uniqlengths uniqlist = data.table:::uniqlist which_ = data.table:::which_ @@ -18141,4 +18142,48 @@ DT = data.table(A=letters[1:3], B=4:6, key="A") test(2215.1, DT["b", B], 5L) # has worked forever test(2215.2, DT[factor("b"), B], 5L) # now works too, joining fact/fact, char/fact and fact/char have plenty of tests +# topn retrieving which.max/which.min for more than 1 element #3804 +test(2216.00, topn(x, 7L, na.last=TRUE, decreasing=FALSE), order(x, na.last=TRUE, decreasing=FALSE)[1L:7L]) +test(2216.00, topn(x, 7L, na.last=FALSE, decreasing=FALSE), order(x, na.last=FALSE, decreasing=FALSE)[1L:7L]) +test(2216.00, topn(x, 7L, na.last=TRUE, decreasing=TRUE), order(x, na.last=TRUE, decreasing=TRUE)[1L:7L]) +test(2216.00, topn(x, 7L, na.last=FALSE, decreasing=TRUE), order(x, na.last=FALSE, decreasing=TRUE)[1L:7L]) + +test(2216.00, topn(x, 10L, na.last=TRUE, decreasing=FALSE), order(x, na.last=TRUE, decreasing=FALSE)) +test(2216.00, topn(x, 10L, na.last=FALSE, decreasing=FALSE), order(x, na.last=FALSE, decreasing=FALSE)) +test(2216.00, topn(x, 10L, na.last=TRUE, decreasing=TRUE), order(x, na.last=TRUE, decreasing=TRUE)) +test(2216.00, topn(x, 10L, na.last=FALSE, decreasing=TRUE), order(x, na.last=FALSE, decreasing=TRUE)) + +set.seed(373L) +x=list(sample(c(-1:1, NA), 10, TRUE), + sample(c(rnorm(3), NA), 10, TRUE), + sample(c(letters[1:3],NA), 10, TRUE), + c(NaN, Inf, -Inf), + c(-Inf, 0, Inf), + c(-Inf, Inf), + c(Inf, -Inf), + c(0, NaN), + c(NaN, 0), + c(3+9i, 10+5i, 8+2i, 10+4i, 3+3i, 1+2i, 5+1i, 8+1i, 8+2i, 10+6i), + c(3, 10+5i, 8, 10+4i, NA, 3, 1+2i, 5+1i, 8+1i, 8, NA)) +testnum = 2216.0 +for (i in seq.int(x)) { + testnum = testnum + 0.01 + l = length(x[[i]]) + s = sample(l-1L, 1L) + test(testnum+0.001, topn(x[[i]], l, na.last=TRUE, decreasing=FALSE), order(x[[i]], na.last=TRUE, decreasing=FALSE)) + test(testnum+0.002, topn(x[[i]], l, na.last=FALSE, decreasing=FALSE), order(x[[i]], na.last=FALSE, decreasing=FALSE)) + test(testnum+0.003, topn(x[[i]], l, na.last=TRUE, decreasing=TRUE), order(x[[i]], na.last=TRUE, decreasing=TRUE)) + test(testnum+0.004, topn(x[[i]], l, na.last=FALSE, decreasing=TRUE), order(x[[i]], na.last=FALSE, decreasing=TRUE)) + test(testnum+0.005, topn(x[[i]], s, na.last=TRUE, decreasing=FALSE), order(x[[i]], na.last=TRUE, decreasing=FALSE)[1L:s]) + test(testnum+0.006, topn(x[[i]], s, na.last=FALSE, decreasing=FALSE), order(x[[i]], na.last=FALSE, decreasing=FALSE)[1L:s]) + test(testnum+0.007, topn(x[[i]], s, na.last=TRUE, decreasing=TRUE), order(x[[i]], na.last=TRUE, decreasing=TRUE)[1L:s]) + test(testnum+0.008, topn(x[[i]], s, na.last=FALSE, decreasing=TRUE), order(x[[i]], na.last=FALSE, decreasing=TRUE)[1L:s]) +} +if (test_bit64) { + x=as.integer64(c(-5, 5, 0, NA, -5, 5, 1e18, NA, 5, 1e-18)) + test(2216.901, topn(x, 10L, na.last=TRUE, decreasing=FALSE), order(x, na.last=TRUE, decreasing=FALSE)) + test(2216.902, topn(x, 10L, na.last=FALSE, decreasing=FALSE), order(x, na.last=FALSE, decreasing=FALSE)) + test(2216.903, topn(x, 10L, na.last=TRUE, decreasing=TRUE), order(x, na.last=TRUE, decreasing=TRUE)) + test(2216.904, topn(x, 10L, na.last=FALSE, decreasing=TRUE), order(x, na.last=FALSE, decreasing=TRUE)) +} From fc7b948acb51a3ed1abe7d968e8225c5915ad19a Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Fri, 17 Sep 2021 12:54:04 +0200 Subject: [PATCH 07/30] added man --- man/topn.Rd | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 man/topn.Rd diff --git a/man/topn.Rd b/man/topn.Rd new file mode 100644 index 0000000000..6ced2ffefa --- /dev/null +++ b/man/topn.Rd @@ -0,0 +1,44 @@ +\name{topn} +\alias{topn} +\alias{which.max} +\alias{which.min} +\title{ Top n values index } +\description{ + \code{topn} returns the indices of the smallest (resp. largest) \code{n} values of x. This is an extension \code{\link{which.min}} (\code{\link{which.max}}) which only return the index of the minimum (resp. maximum). + + Especially, for large vectors this method will be faster and less memory intensive than \code{order} since no full sort is performed. + + \code{bit64::integer64} type is also supported. +} + +\usage{ +topn(x, n, na.last=TRUE, decreasing=FALSE) +} +\arguments{ + \item{x}{ A numeric, complex, character or logical vector. } + \item{n}{ A numeric vector length 1. How many indices to select. } + \item{na.last}{ Control treatment of \code{NA}s. If \code{TRUE}, missing values in the data are put last; if \code{FALSE}, they are put first. } + \item{decreasing}{ Logical. Indicating whether the order should be increasing or decreasing. } +} + +\value{ + An integer vector giving the indicies of the \code{n} smallest (largest) for \code{decreasing=FALSE (TRUE)} elements of \code{x}. +} + +\examples{ +x = c(1:4, 0:5, 11) +# indices of smallest 3 values +topn(x, 3) +# indices of largest 3 values +topn(x, 3, decreasing = TRUE) + +## NA's can be put to front or back +x = c(NA, 1:4) +topn(x, 5) +topn(x, 5, na.last=FALSE) + +} +\seealso{ + \code{\link{data.table}}, \code{\link{order}}, \code{\link{which.max}, \code{\link{which.min}}} +} +\keyword{ data } From 90a652f0a91f7cd16d16ec53936e5c848b2bca65 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Fri, 17 Sep 2021 13:22:49 +0200 Subject: [PATCH 08/30] fix man --- man/topn.Rd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/man/topn.Rd b/man/topn.Rd index 6ced2ffefa..b3166a2b60 100644 --- a/man/topn.Rd +++ b/man/topn.Rd @@ -8,6 +8,7 @@ Especially, for large vectors this method will be faster and less memory intensive than \code{order} since no full sort is performed. + \code{bit64::integer64} type is also supported. } @@ -39,6 +40,6 @@ topn(x, 5, na.last=FALSE) } \seealso{ - \code{\link{data.table}}, \code{\link{order}}, \code{\link{which.max}, \code{\link{which.min}}} + \code{\link{data.table}}, \code{\link{order}}, \code{\link{which.max}}, \code{\link{which.min}} } \keyword{ data } From c513efbda539139425bb49dddb7c2278f9b6cea7 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Fri, 17 Sep 2021 13:23:06 +0200 Subject: [PATCH 09/30] finish tests --- R/wrappers.R | 2 +- inst/tests/tests.Rraw | 38 ++++++++++++++------------------------ 2 files changed, 15 insertions(+), 25 deletions(-) diff --git a/R/wrappers.R b/R/wrappers.R index 3fb13ea496..53a38218b9 100644 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -17,4 +17,4 @@ isReallyReal = function(x) .Call(CisReallyReal, x) coerceAs = function(x, as, copy=TRUE) .Call(CcoerceAs, x, as, copy) -topn = function(x, n=length(x), na.last=TRUE, decreasing=FALSE) .Call(Ctopn, x, as.integer(n), na.last, decreasing) +topn = function(x, n, na.last=TRUE, decreasing=FALSE) .Call(Ctopn, x, as.integer(n), na.last, decreasing) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ec6762eac1..ed52ec37b4 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18143,16 +18143,6 @@ test(2215.1, DT["b", B], 5L) # has worked forever test(2215.2, DT[factor("b"), B], 5L) # now works too, joining fact/fact, char/fact and fact/char have plenty of tests # topn retrieving which.max/which.min for more than 1 element #3804 -test(2216.00, topn(x, 7L, na.last=TRUE, decreasing=FALSE), order(x, na.last=TRUE, decreasing=FALSE)[1L:7L]) -test(2216.00, topn(x, 7L, na.last=FALSE, decreasing=FALSE), order(x, na.last=FALSE, decreasing=FALSE)[1L:7L]) -test(2216.00, topn(x, 7L, na.last=TRUE, decreasing=TRUE), order(x, na.last=TRUE, decreasing=TRUE)[1L:7L]) -test(2216.00, topn(x, 7L, na.last=FALSE, decreasing=TRUE), order(x, na.last=FALSE, decreasing=TRUE)[1L:7L]) - -test(2216.00, topn(x, 10L, na.last=TRUE, decreasing=FALSE), order(x, na.last=TRUE, decreasing=FALSE)) -test(2216.00, topn(x, 10L, na.last=FALSE, decreasing=FALSE), order(x, na.last=FALSE, decreasing=FALSE)) -test(2216.00, topn(x, 10L, na.last=TRUE, decreasing=TRUE), order(x, na.last=TRUE, decreasing=TRUE)) -test(2216.00, topn(x, 10L, na.last=FALSE, decreasing=TRUE), order(x, na.last=FALSE, decreasing=TRUE)) - set.seed(373L) x=list(sample(c(-1:1, NA), 10, TRUE), sample(c(rnorm(3), NA), 10, TRUE), @@ -18166,24 +18156,24 @@ x=list(sample(c(-1:1, NA), 10, TRUE), c(3+9i, 10+5i, 8+2i, 10+4i, 3+3i, 1+2i, 5+1i, 8+1i, 8+2i, 10+6i), c(3, 10+5i, 8, 10+4i, NA, 3, 1+2i, 5+1i, 8+1i, 8, NA)) testnum = 2216.0 +b = CJ(c(TRUE,FALSE), c(TRUE,FALSE)) for (i in seq.int(x)) { - testnum = testnum + 0.01 l = length(x[[i]]) s = sample(l-1L, 1L) - test(testnum+0.001, topn(x[[i]], l, na.last=TRUE, decreasing=FALSE), order(x[[i]], na.last=TRUE, decreasing=FALSE)) - test(testnum+0.002, topn(x[[i]], l, na.last=FALSE, decreasing=FALSE), order(x[[i]], na.last=FALSE, decreasing=FALSE)) - test(testnum+0.003, topn(x[[i]], l, na.last=TRUE, decreasing=TRUE), order(x[[i]], na.last=TRUE, decreasing=TRUE)) - test(testnum+0.004, topn(x[[i]], l, na.last=FALSE, decreasing=TRUE), order(x[[i]], na.last=FALSE, decreasing=TRUE)) - test(testnum+0.005, topn(x[[i]], s, na.last=TRUE, decreasing=FALSE), order(x[[i]], na.last=TRUE, decreasing=FALSE)[1L:s]) - test(testnum+0.006, topn(x[[i]], s, na.last=FALSE, decreasing=FALSE), order(x[[i]], na.last=FALSE, decreasing=FALSE)[1L:s]) - test(testnum+0.007, topn(x[[i]], s, na.last=TRUE, decreasing=TRUE), order(x[[i]], na.last=TRUE, decreasing=TRUE)[1L:s]) - test(testnum+0.008, topn(x[[i]], s, na.last=FALSE, decreasing=TRUE), order(x[[i]], na.last=FALSE, decreasing=TRUE)[1L:s]) + for (j in nrow(b)) { + test(testnum, topn(x[[i]], l, na.last=b[[j,1]], decreasing=b[[j,2]]), order(x[[i]], na.last=b[[j,1]], decreasing=b[[j,2]])) + testnum = testnum + 0.01 + test(testnum, topn(x[[i]], s, na.last=b[[j,1]], decreasing=b[[j,2]]), order(x[[i]], na.last=b[[j,1]], decreasing=b[[j,2]])[1L:s]) + testnum = testnum + 0.01 + } } - +test(2216.80, topn(1L, 2L), 1L, + warning="n should be smaller or equal than length(x) but provided n=2 and length(x)=1.\n Coercing n to length(x).") +test(2216.81, topn(1L, -1L), error="topn(x,n) only implemented for n > 0.") if (test_bit64) { x=as.integer64(c(-5, 5, 0, NA, -5, 5, 1e18, NA, 5, 1e-18)) - test(2216.901, topn(x, 10L, na.last=TRUE, decreasing=FALSE), order(x, na.last=TRUE, decreasing=FALSE)) - test(2216.902, topn(x, 10L, na.last=FALSE, decreasing=FALSE), order(x, na.last=FALSE, decreasing=FALSE)) - test(2216.903, topn(x, 10L, na.last=TRUE, decreasing=TRUE), order(x, na.last=TRUE, decreasing=TRUE)) - test(2216.904, topn(x, 10L, na.last=FALSE, decreasing=TRUE), order(x, na.last=FALSE, decreasing=TRUE)) + test(2216.82, topn(x, 10L, na.last=TRUE, decreasing=FALSE), order(x, na.last=TRUE, decreasing=FALSE)) + test(2216.83, topn(x, 10L, na.last=FALSE, decreasing=FALSE), order(x, na.last=FALSE, decreasing=FALSE)) + test(2216.84, topn(x, 10L, na.last=TRUE, decreasing=TRUE), order(x, na.last=TRUE, decreasing=TRUE)) + test(2216.85, topn(x, 10L, na.last=FALSE, decreasing=TRUE), order(x, na.last=FALSE, decreasing=TRUE)) } From ab25d3f1964800955bc0d274df7e388693534692 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Fri, 17 Sep 2021 16:28:45 +0200 Subject: [PATCH 10/30] add coverage --- inst/tests/tests.Rraw | 1 + src/topn.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ed52ec37b4..aa56243b0e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18167,6 +18167,7 @@ for (i in seq.int(x)) { testnum = testnum + 0.01 } } +test(2216.79, topn(as.raw(1), 1), error="Type 'raw' not supported by topn." test(2216.80, topn(1L, 2L), 1L, warning="n should be smaller or equal than length(x) but provided n=2 and length(x)=1.\n Coercing n to length(x).") test(2216.81, topn(1L, -1L), error="topn(x,n) only implemented for n > 0.") diff --git a/src/topn.c b/src/topn.c index 3c9bb53f23..d679bb4f5b 100644 --- a/src/topn.c +++ b/src/topn.c @@ -111,7 +111,7 @@ SEXP topn(SEXP x, SEXP nArg, SEXP naArg, SEXP ascArg) { case CPLXSXP: { TOPN(Rcomplex, COMPLEX, ccmp); } break; case STRSXP: { TOPN(SEXP, STRING_PTR, scmp); } break; default: - free(INDEX); error(_("Type '%s' not supported by topn"), type2char(TYPEOF(x))); + free(INDEX); error(_("Type '%s' not supported by topn."), type2char(TYPEOF(x))); } UNPROTECT(1); return(ans); From 6785cf5cc4474b9ed1454fafddd5657c2782346a Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Fri, 17 Sep 2021 16:33:17 +0200 Subject: [PATCH 11/30] typo --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index aa56243b0e..a2ff67b575 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18167,7 +18167,7 @@ for (i in seq.int(x)) { testnum = testnum + 0.01 } } -test(2216.79, topn(as.raw(1), 1), error="Type 'raw' not supported by topn." +test(2216.79, topn(as.raw(1), 1), error="Type 'raw' not supported by topn.") test(2216.80, topn(1L, 2L), 1L, warning="n should be smaller or equal than length(x) but provided n=2 and length(x)=1.\n Coercing n to length(x).") test(2216.81, topn(1L, -1L), error="topn(x,n) only implemented for n > 0.") From 78502c76629396b15fc09be37caf62df6acb5831 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Fri, 17 Sep 2021 17:13:41 +0200 Subject: [PATCH 12/30] add NEWS --- NEWS.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/NEWS.md b/NEWS.md index 30866667b9..be2bbb7370 100644 --- a/NEWS.md +++ b/NEWS.md @@ -141,6 +141,19 @@ 26. `base::droplevels()` gains a fast method for `data.table`, [#647](https://github.com/Rdatatable/data.table/issues/647). Thanks to Steve Lianoglou for requesting, and Jan Gorecki and Benjamin Schwendinger for the PR. `fdroplevels()` for use on vectors has also been added. +27. New function `topn(x,n)` [#3804](https://github.com/Rdatatable/data.table/issues/3804). It returns the indices of the `n` smallest/largest values of a vector `x`. Previously, one had to use `order(x)[1:n]` which produced a full sorting of `x`. Usage of `topn` is advised for large vectors where sorting takes long time. Thanks to Michael Chirico for requesting, and Benjamin Schwendinger for PR. + + ```R + set.seed(123) + x = rnorm(1e8) + system.time(topn(x, 5L)) + # user system elapsed + # 0.249 0.000 0.250 + system.time(order(x)[1L:5L]) + # user system elapsed + # 4.710 0.369 5.078 + ``` + ## BUG FIXES 1. `by=.EACHI` when `i` is keyed but `on=` different columns than `i`'s key could create an invalidly keyed result, [#4603](https://github.com/Rdatatable/data.table/issues/4603) [#4911](https://github.com/Rdatatable/data.table/issues/4911). Thanks to @myoung3 and @adamaltmejd for reporting, and @ColeMiller1 for the PR. An invalid key is where a `data.table` is marked as sorted by the key columns but the data is not sorted by those columns, leading to incorrect results from subsequent queries. From c9b7a07ce6508f51dbe83ff287bd2a0cd29a4380 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 10 Jan 2024 23:02:44 +0100 Subject: [PATCH 13/30] add sorted argument --- R/wrappers.R | 2 +- inst/tests/tests.Rraw | 12 ++++++++++-- man/topn.Rd | 4 ++-- src/data.table.h | 2 +- src/topn.c | 30 +++++++++++++++++++----------- 5 files changed, 33 insertions(+), 17 deletions(-) diff --git a/R/wrappers.R b/R/wrappers.R index 53a38218b9..c031f11137 100644 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -17,4 +17,4 @@ isReallyReal = function(x) .Call(CisReallyReal, x) coerceAs = function(x, as, copy=TRUE) .Call(CcoerceAs, x, as, copy) -topn = function(x, n, na.last=TRUE, decreasing=FALSE) .Call(Ctopn, x, as.integer(n), na.last, decreasing) +topn = function(x, n, na.last=TRUE, decreasing=FALSE, sorted=FALSE) .Call(Ctopn, x, as.integer(n), na.last, decreasing, sorted) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index a2ff67b575..0f17cf3c07 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18161,9 +18161,17 @@ for (i in seq.int(x)) { l = length(x[[i]]) s = sample(l-1L, 1L) for (j in nrow(b)) { - test(testnum, topn(x[[i]], l, na.last=b[[j,1]], decreasing=b[[j,2]]), order(x[[i]], na.last=b[[j,1]], decreasing=b[[j,2]])) + test(testnum, topn(x[[i]], l, na.last=b[[j,1]], decreasing=b[[j,2]], sorted=TRUE), + order(x[[i]], na.last=b[[j,1]], decreasing=b[[j,2]])) testnum = testnum + 0.01 - test(testnum, topn(x[[i]], s, na.last=b[[j,1]], decreasing=b[[j,2]]), order(x[[i]], na.last=b[[j,1]], decreasing=b[[j,2]])[1L:s]) + test(testnum, topn(x[[i]], s, na.last=b[[j,1]], decreasing=b[[j,2]], sorted=TRUE), + order(x[[i]], na.last=b[[j,1]], decreasing=b[[j,2]])[1L:s]) + testnum = testnum + 0.01 + test(testnum, sort(topn(x[[i]], l, na.last=b[[j,1]], decreasing=b[[j,2]], sorted=FALSE)), + sort(order(x[[i]], na.last=b[[j,1]], decreasing=b[[j,2]]))) + testnum = testnum + 0.01 + test(testnum, sort(topn(x[[i]], s, na.last=b[[j,1]], decreasing=b[[j,2]], sorted=FALSE)), + sort(order(x[[i]], na.last=b[[j,1]], decreasing=b[[j,2]])[1L:s])) testnum = testnum + 0.01 } } diff --git a/man/topn.Rd b/man/topn.Rd index b3166a2b60..fb9d44f0dc 100644 --- a/man/topn.Rd +++ b/man/topn.Rd @@ -8,7 +8,6 @@ Especially, for large vectors this method will be faster and less memory intensive than \code{order} since no full sort is performed. - \code{bit64::integer64} type is also supported. } @@ -19,7 +18,8 @@ topn(x, n, na.last=TRUE, decreasing=FALSE) \item{x}{ A numeric, complex, character or logical vector. } \item{n}{ A numeric vector length 1. How many indices to select. } \item{na.last}{ Control treatment of \code{NA}s. If \code{TRUE}, missing values in the data are put last; if \code{FALSE}, they are put first. } - \item{decreasing}{ Logical. Indicating whether the order should be increasing or decreasing. } + \item{decreasing}{ Logical. Default is \code{FALSE}. Indicating whether the order should be increasing or decreasing. } + \item[{sorted}{ Logical. Default is \code{FALSE}. Indicating whether order should be sorted with respect to decreasing. } } \value{ diff --git a/src/data.table.h b/src/data.table.h index 2b9a4b491d..96fa613f8f 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -260,4 +260,4 @@ int dt_win_snprintf(char *dest, size_t n, const char *fmt, ...); SEXP substitute_call_arg_namesR(SEXP expr, SEXP env); // topn.c -SEXP topn(SEXP x, SEXP nArg, SEXP ascArg, SEXP naArg); +SEXP topn(SEXP x, SEXP nArg, SEXP ascArg, SEXP naArg, SEXP sortedArg); diff --git a/src/topn.c b/src/topn.c index d679bb4f5b..88a3994219 100644 --- a/src/topn.c +++ b/src/topn.c @@ -65,7 +65,7 @@ static inline bool ccmp(const Rcomplex *x, const int a, const int b, const bool // for finding decreasing topn build minheap and add values if they exceed // minimum by overwriting minimum and following down sifting #undef TOPN -#define TOPN(CTYPE, RTYPE, CMP) { \ +#define TOPN(CTYPE, RTYPE, CMP, SORTED) { \ const CTYPE *restrict VAL = (const CTYPE *)RTYPE(x); \ for (int i=n/2; i>=0; --i) { k=i; len=n; SIFT(CMP); } \ for (int i=n; i 0.")); if (!IS_TRUE_OR_FALSE(ascArg)) error(_("%s must be TRUE or FALSE"), "decreasing"); if (!IS_TRUE_OR_FALSE(naArg)) error(_("%s must be TRUE or FALSE"), "na.last"); + if (!IS_TRUE_OR_FALSE(sortedArg)) error(_("%s must be TRUE or FALSE"), "sorted"); const int xlen = LENGTH(x); int n = INTEGER(nArg)[0]; @@ -96,6 +103,7 @@ SEXP topn(SEXP x, SEXP nArg, SEXP naArg, SEXP ascArg) { const bool min = LOGICAL(ascArg)[0]; const bool nalast = LOGICAL(naArg)[0]; + const bool sorted = LOGICAL(sortedArg)[0]; SEXP ans; int k, len; @@ -104,12 +112,12 @@ SEXP topn(SEXP x, SEXP nArg, SEXP naArg, SEXP ascArg) { int *restrict INDEX = malloc(n*sizeof(int)); for (int i=0; i Date: Wed, 10 Jan 2024 23:18:10 +0100 Subject: [PATCH 14/30] add CODEOWNERS --- CODEOWNERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CODEOWNERS b/CODEOWNERS index 7d7a5ecaac..e5a529e99a 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -49,3 +49,7 @@ # utils /src/utils.c @HughParsonage + +# ordering +/src/topn.c @ben-schwen +/man/topn.Rd @ben-schwen From db055f536978eb71bd14b5de008b7cf0d9ae1550 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 10 Jan 2024 23:23:22 +0100 Subject: [PATCH 15/30] update NEWS --- NEWS.md | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index 25716a3bee..e1ed662cc8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -298,12 +298,15 @@ ```R set.seed(123) x = rnorm(1e8) - system.time(topn(x, 5L)) - # user system elapsed - # 0.249 0.000 0.250 + system.time(topn(x, 5L,sorted=TRUE)) + # user system elapsed + # 0.287 0.000 0.288 + system.time(topn(x, 5L,sorted=FALSE)) + # user system elapsed + # 0.283 0.000 0.282 system.time(order(x)[1L:5L]) - # user system elapsed - # 4.710 0.369 5.078 + # user system elapsed + # 6.658 0.620 7.279 ``` ## BUG FIXES From 2b16d6a8227c06abbf8987dd126c3284535ab09f Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 10 Jan 2024 23:30:06 +0100 Subject: [PATCH 16/30] fix docs --- man/topn.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/topn.Rd b/man/topn.Rd index fb9d44f0dc..5410104194 100644 --- a/man/topn.Rd +++ b/man/topn.Rd @@ -19,7 +19,7 @@ topn(x, n, na.last=TRUE, decreasing=FALSE) \item{n}{ A numeric vector length 1. How many indices to select. } \item{na.last}{ Control treatment of \code{NA}s. If \code{TRUE}, missing values in the data are put last; if \code{FALSE}, they are put first. } \item{decreasing}{ Logical. Default is \code{FALSE}. Indicating whether the order should be increasing or decreasing. } - \item[{sorted}{ Logical. Default is \code{FALSE}. Indicating whether order should be sorted with respect to decreasing. } + \item{sorted}{ Logical. Default is \code{FALSE}. Indicating whether order should be sorted with respect to decreasing. } } \value{ From 13caa28dd41621bedb4720e29acda660416465b8 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Wed, 10 Jan 2024 23:35:07 +0100 Subject: [PATCH 17/30] add arg to doc --- man/topn.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/topn.Rd b/man/topn.Rd index 5410104194..442360ba24 100644 --- a/man/topn.Rd +++ b/man/topn.Rd @@ -12,7 +12,7 @@ } \usage{ -topn(x, n, na.last=TRUE, decreasing=FALSE) +topn(x, n, na.last=TRUE, decreasing=FALSE, sorted=FALSE) } \arguments{ \item{x}{ A numeric, complex, character or logical vector. } From 0fba26018c5235781b615eb8c9b2c581a4136506 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:16:53 +0200 Subject: [PATCH 18/30] unname args --- src/data.table.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data.table.h b/src/data.table.h index b42fe42724..36f19596d5 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -269,7 +269,7 @@ SEXP substitute_call_arg_namesR(SEXP expr, SEXP env); SEXP notchin(SEXP x, SEXP table); // topn.c -SEXP topn(SEXP x, SEXP nArg, SEXP ascArg, SEXP naArg, SEXP sortedArg); +SEXP topn(SEXP, SEXP, SEXP, SEXP, SEXP); // functions called from R level .Call/.External and registered in init.c // these now live here to pass -Wstrict-prototypes, #5477 From ff541fd381ba6c06b08bc6c19e5e229eaf74fe11 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:17:54 +0200 Subject: [PATCH 19/30] remove parentheses from return --- src/topn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/topn.c b/src/topn.c index 88a3994219..ba9e8a9f96 100644 --- a/src/topn.c +++ b/src/topn.c @@ -3,7 +3,7 @@ static inline void swap(int *a, int *b) { int tmp=*a; *a=*b; *b=tmp; } static inline bool icmp(const int *x, const int a, const int b, const bool min, const bool nalast) { - if (x[a]==x[b]) return(a > b); + if (x[a]==x[b]) return a > b; if (x[a]==NA_INTEGER) return(nalast); if (x[b]==NA_INTEGER) return(!nalast); return(min ? x[a] < x[b] : x[a] > x[b]); From fe0e6f9b94bc9f39a4455f415f7c1804b0501b50 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 12 Sep 2024 11:23:43 +0200 Subject: [PATCH 20/30] change index a,b with i,j --- src/topn.c | 56 +++++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/src/topn.c b/src/topn.c index 88a3994219..06efa36efd 100644 --- a/src/topn.c +++ b/src/topn.c @@ -2,42 +2,42 @@ static inline void swap(int *a, int *b) { int tmp=*a; *a=*b; *b=tmp; } -static inline bool icmp(const int *x, const int a, const int b, const bool min, const bool nalast) { - if (x[a]==x[b]) return(a > b); - if (x[a]==NA_INTEGER) return(nalast); - if (x[b]==NA_INTEGER) return(!nalast); - return(min ? x[a] < x[b] : x[a] > x[b]); +static inline bool icmp(const int *x, const int i, const int j, const bool min, const bool nalast) { + if (x[i]==x[j]) return(i > j); + if (x[i]==NA_INTEGER) return(nalast); + if (x[j]==NA_INTEGER) return(!nalast); + return(min ? x[i] < x[j] : x[i] > x[j]); } -static inline bool dcmp(const double *x, const int a, const int b, const bool min, const bool nalast) { - if (x[a]==x[b] || isnan(x[a]) && isnan(x[b])) return(a > b); - if (isnan(x[a])) return(nalast); - if (isnan(x[b])) return(!nalast); - return(min ? x[a] < x[b] : x[a] > x[b]); +static inline bool dcmp(const double *x, const int i, const int j, const bool min, const bool nalast) { + if (x[i]==x[j] || isnan(x[i]) && isnan(x[j])) return(i > j); + if (isnan(x[i])) return(nalast); + if (isnan(x[j])) return(!nalast); + return(min ? x[i] < x[j] : x[i] > x[j]); } -static inline bool i64cmp(const int64_t *x, const int a, const int b, const bool min, const bool nalast) { - if (x[a]==x[b]) return(a > b); - if (x[a]==NA_INTEGER64) return(nalast); - if (x[b]==NA_INTEGER64) return(!nalast); - return(min ? x[a] < x[b] : x[a] > x[b]); +static inline bool i64cmp(const int64_t *x, const int i, const int j, const bool min, const bool nalast) { + if (x[i]==x[j]) return(i > j); + if (x[i]==NA_INTEGER64) return(nalast); + if (x[j]==NA_INTEGER64) return(!nalast); + return(min ? x[i] < x[j] : x[i] > x[j]); } -static inline bool scmp(const SEXP *restrict x, const int a, const int b, const bool min, const bool nalast) { - if (strcmp(CHAR(x[a]), CHAR(x[b])) == 0) return (a > b); - if (x[a]==NA_STRING) return(nalast); - if (x[b]==NA_STRING) return(!nalast); - return(min ? strcmp(CHAR(x[a]),CHAR(x[b]))<0 : strcmp(CHAR(x[a]),CHAR(x[b])))>0; +static inline bool scmp(const SEXP *restrict x, const int i, const int j, const bool min, const bool nalast) { + if (strcmp(CHAR(x[i]), CHAR(x[j])) == 0) return (i > j); + if (x[i]==NA_STRING) return(nalast); + if (x[j]==NA_STRING) return(!nalast); + return(min ? strcmp(CHAR(x[i]),CHAR(x[j]))<0 : strcmp(CHAR(x[i]),CHAR(x[j])))>0; } -static inline bool ccmp(const Rcomplex *x, const int a, const int b, const bool min, const bool nalast) { - if (ISNAN_COMPLEX(x[a]) && ISNAN_COMPLEX(x[b])) return (a > b); - if (x[a].r==x[b].r) { - if (x[a].i==x[b].i) return(a > b); - return(min ? x[a].i < x[b].i : x[a].i > x[b].i); +static inline bool ccmp(const Rcomplex *x, const int i, const int j, const bool min, const bool nalast) { + if (ISNAN_COMPLEX(x[i]) && ISNAN_COMPLEX(x[j])) return (i > j); + if (x[i].r==x[j].r) { + if (x[i].i==x[j].i) return(i > j); + return(min ? x[i].i < x[j].i : x[i].i > x[j].i); } - if (ISNAN_COMPLEX(x[a])) return(nalast); - if (ISNAN_COMPLEX(x[b])) return(!nalast); - return(min ? x[a].r < x[b].r : x[a].r > x[b].r); + if (ISNAN_COMPLEX(x[i])) return(nalast); + if (ISNAN_COMPLEX(x[j])) return(!nalast); + return(min ? x[i].r < x[j].r : x[i].r > x[j].r); } // compare value with both childs and sift value down if child value smaller From e1d037efbf433b813bfb0a6c2478aefda232753d Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 12 Sep 2024 11:27:16 +0200 Subject: [PATCH 21/30] remove parentheses from return --- src/topn.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/topn.c b/src/topn.c index 06efa36efd..cb90457783 100644 --- a/src/topn.c +++ b/src/topn.c @@ -3,41 +3,41 @@ static inline void swap(int *a, int *b) { int tmp=*a; *a=*b; *b=tmp; } static inline bool icmp(const int *x, const int i, const int j, const bool min, const bool nalast) { - if (x[i]==x[j]) return(i > j); - if (x[i]==NA_INTEGER) return(nalast); - if (x[j]==NA_INTEGER) return(!nalast); - return(min ? x[i] < x[j] : x[i] > x[j]); + if (x[i]==x[j]) return i > j; + if (x[i]==NA_INTEGER) return nalast; + if (x[j]==NA_INTEGER) return !nalast; + return min ? x[i] < x[j] : x[i] > x[j]; } static inline bool dcmp(const double *x, const int i, const int j, const bool min, const bool nalast) { - if (x[i]==x[j] || isnan(x[i]) && isnan(x[j])) return(i > j); - if (isnan(x[i])) return(nalast); - if (isnan(x[j])) return(!nalast); - return(min ? x[i] < x[j] : x[i] > x[j]); + if (x[i]==x[j] || isnan(x[i]) && isnan(x[j])) return i > j; + if (isnan(x[i])) return nalast; + if (isnan(x[j])) return !nalast; + return min ? x[i] < x[j] : x[i] > x[j]; } static inline bool i64cmp(const int64_t *x, const int i, const int j, const bool min, const bool nalast) { - if (x[i]==x[j]) return(i > j); - if (x[i]==NA_INTEGER64) return(nalast); - if (x[j]==NA_INTEGER64) return(!nalast); - return(min ? x[i] < x[j] : x[i] > x[j]); + if (x[i]==x[j]) return i > j; + if (x[i]==NA_INTEGER64) return nalast; + if (x[j]==NA_INTEGER64) return !nalast; + return min ? x[i] < x[j] : x[i] > x[j]; } static inline bool scmp(const SEXP *restrict x, const int i, const int j, const bool min, const bool nalast) { - if (strcmp(CHAR(x[i]), CHAR(x[j])) == 0) return (i > j); - if (x[i]==NA_STRING) return(nalast); - if (x[j]==NA_STRING) return(!nalast); - return(min ? strcmp(CHAR(x[i]),CHAR(x[j]))<0 : strcmp(CHAR(x[i]),CHAR(x[j])))>0; + if (strcmp(CHAR(x[i]), CHAR(x[j])) == 0) return i > j; + if (x[i]==NA_STRING) return nalast; + if (x[j]==NA_STRING) return !nalast; + return min ? strcmp(CHAR(x[i]),CHAR(x[j]))<0 : strcmp(CHAR(x[i]),CHAR(x[j]))>0; } static inline bool ccmp(const Rcomplex *x, const int i, const int j, const bool min, const bool nalast) { - if (ISNAN_COMPLEX(x[i]) && ISNAN_COMPLEX(x[j])) return (i > j); + if (ISNAN_COMPLEX(x[i]) && ISNAN_COMPLEX(x[j])) return i > j; if (x[i].r==x[j].r) { - if (x[i].i==x[j].i) return(i > j); - return(min ? x[i].i < x[j].i : x[i].i > x[j].i); + if (x[i].i==x[j].i) return i > j; + return min ? x[i].i < x[j].i : x[i].i > x[j].i; } - if (ISNAN_COMPLEX(x[i])) return(nalast); - if (ISNAN_COMPLEX(x[j])) return(!nalast); - return(min ? x[i].r < x[j].r : x[i].r > x[j].r); + if (ISNAN_COMPLEX(x[i])) return nalast; + if (ISNAN_COMPLEX(x[j])) return !nalast; + return min ? x[i].r < x[j].r : x[i].r > x[j].r; } // compare value with both childs and sift value down if child value smaller From d8cf910e287f8bc69bcf03ac5a15746c7f5ec24f Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 12 Sep 2024 11:30:49 +0200 Subject: [PATCH 22/30] remove unnecessary const --- src/topn.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/topn.c b/src/topn.c index cb90457783..18216b6654 100644 --- a/src/topn.c +++ b/src/topn.c @@ -2,34 +2,35 @@ static inline void swap(int *a, int *b) { int tmp=*a; *a=*b; *b=tmp; } -static inline bool icmp(const int *x, const int i, const int j, const bool min, const bool nalast) { +static inline bool icmp(const int *x, int i, int j, bool min, bool nalast) { if (x[i]==x[j]) return i > j; if (x[i]==NA_INTEGER) return nalast; if (x[j]==NA_INTEGER) return !nalast; return min ? x[i] < x[j] : x[i] > x[j]; } -static inline bool dcmp(const double *x, const int i, const int j, const bool min, const bool nalast) { + +static inline bool dcmp(const double *x, int i, int j, bool min, bool nalast) { if (x[i]==x[j] || isnan(x[i]) && isnan(x[j])) return i > j; if (isnan(x[i])) return nalast; if (isnan(x[j])) return !nalast; return min ? x[i] < x[j] : x[i] > x[j]; } -static inline bool i64cmp(const int64_t *x, const int i, const int j, const bool min, const bool nalast) { +static inline bool i64cmp(const int64_t *x, int i, int j, bool min, bool nalast) { if (x[i]==x[j]) return i > j; if (x[i]==NA_INTEGER64) return nalast; if (x[j]==NA_INTEGER64) return !nalast; return min ? x[i] < x[j] : x[i] > x[j]; } -static inline bool scmp(const SEXP *restrict x, const int i, const int j, const bool min, const bool nalast) { +static inline bool scmp(const SEXP *restrict x, int i, int j, bool min, bool nalast) { if (strcmp(CHAR(x[i]), CHAR(x[j])) == 0) return i > j; if (x[i]==NA_STRING) return nalast; if (x[j]==NA_STRING) return !nalast; return min ? strcmp(CHAR(x[i]),CHAR(x[j]))<0 : strcmp(CHAR(x[i]),CHAR(x[j]))>0; } -static inline bool ccmp(const Rcomplex *x, const int i, const int j, const bool min, const bool nalast) { +static inline bool ccmp(const Rcomplex *x, int i, int j, bool min, bool nalast) { if (ISNAN_COMPLEX(x[i]) && ISNAN_COMPLEX(x[j])) return i > j; if (x[i].r==x[j].r) { if (x[i].i==x[j].i) return i > j; From 5e547dfe9c3c6624a5f2bfbf3beca6432c5641cf Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 12 Sep 2024 11:33:51 +0200 Subject: [PATCH 23/30] add parentheses for op precedence --- src/topn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/topn.c b/src/topn.c index 18216b6654..056c45ce66 100644 --- a/src/topn.c +++ b/src/topn.c @@ -10,7 +10,7 @@ static inline bool icmp(const int *x, int i, int j, bool min, bool nalast) { } static inline bool dcmp(const double *x, int i, int j, bool min, bool nalast) { - if (x[i]==x[j] || isnan(x[i]) && isnan(x[j])) return i > j; + if (x[i]==x[j] || (isnan(x[i]) && isnan(x[j]))) return i > j; if (isnan(x[i])) return nalast; if (isnan(x[j])) return !nalast; return min ? x[i] < x[j] : x[i] > x[j]; From 134d0a39aa174f606a329c0d1be89bc10f88354c Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 12 Sep 2024 11:35:33 +0200 Subject: [PATCH 24/30] clarify comment --- src/topn.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/topn.c b/src/topn.c index 056c45ce66..eabfed13bf 100644 --- a/src/topn.c +++ b/src/topn.c @@ -41,8 +41,7 @@ static inline bool ccmp(const Rcomplex *x, int i, int j, bool min, bool nalast) return min ? x[i].r < x[j].r : x[i].r > x[j].r; } -// compare value with both childs and sift value down if child value smaller -// than parent (for minheap) +// compare value of node with values of left/right child nodes and sift value down if value of child node is smaller than parent (for minheap) #undef SIFT #define SIFT(CMP) { \ int smallest, l, r; \ From 37efbf387635df2e2be46e6581cbc8a3fd507b6d Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Thu, 12 Sep 2024 11:37:11 +0200 Subject: [PATCH 25/30] add error for malloc fail --- src/topn.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/topn.c b/src/topn.c index eabfed13bf..9e44ad7a82 100644 --- a/src/topn.c +++ b/src/topn.c @@ -110,6 +110,7 @@ SEXP topn(SEXP x, SEXP nArg, SEXP naArg, SEXP ascArg, SEXP sortedArg) { ans = PROTECT(allocVector(INTSXP, n)); int *restrict ians = INTEGER(ans); int *restrict INDEX = malloc(n*sizeof(int)); + if(INDEX == NULL) error(_("Internal error: Couldn't allocate memory for heap indices.")); // # nocov for (int i=0; i Date: Tue, 17 Sep 2024 00:02:48 +0200 Subject: [PATCH 26/30] add quickselect support --- R/wrappers.R | 1 + src/data.table.h | 1 + src/init.c | 1 + src/topn.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 76 insertions(+) diff --git a/R/wrappers.R b/R/wrappers.R index c031f11137..29da64cc35 100644 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -18,3 +18,4 @@ isReallyReal = function(x) .Call(CisReallyReal, x) coerceAs = function(x, as, copy=TRUE) .Call(CcoerceAs, x, as, copy) topn = function(x, n, na.last=TRUE, decreasing=FALSE, sorted=FALSE) .Call(Ctopn, x, as.integer(n), na.last, decreasing, sorted) +quickn = function(x, n, na.last=TRUE, decreasing=FALSE) .Call(Cquickn, x, as.integer(n), na.last, decreasing) diff --git a/src/data.table.h b/src/data.table.h index 36f19596d5..b0948b7e25 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -270,6 +270,7 @@ SEXP notchin(SEXP x, SEXP table); // topn.c SEXP topn(SEXP, SEXP, SEXP, SEXP, SEXP); +SEXP quickn(SEXP, SEXP, SEXP, SEXP); // functions called from R level .Call/.External and registered in init.c // these now live here to pass -Wstrict-prototypes, #5477 diff --git a/src/init.c b/src/init.c index c5c6b638bd..611cd45884 100644 --- a/src/init.c +++ b/src/init.c @@ -140,6 +140,7 @@ R_CallMethodDef callMethods[] = { {"Csubstitute_call_arg_namesR", (DL_FUNC) &substitute_call_arg_namesR, -1}, {"CstartsWithAny", (DL_FUNC)&startsWithAny, -1}, {"Ctopn", (DL_FUNC)&topn, -1}, +{"Cquickn", (DL_FUNC)&quickn, -1}, {"CconvertDate", (DL_FUNC)&convertDate, -1}, {"Cnotchin", (DL_FUNC)¬chin, -1}, {NULL, NULL, 0} diff --git a/src/topn.c b/src/topn.c index 9e44ad7a82..8299354add 100644 --- a/src/topn.c +++ b/src/topn.c @@ -125,3 +125,76 @@ SEXP topn(SEXP x, SEXP nArg, SEXP naArg, SEXP ascArg, SEXP sortedArg) { UNPROTECT(1); return(ans); } + +#undef QUICKN +#define QUICKN(CTYPE, RTYPE, CMP, SWAP) \ + CTYPE *ix = (CTYPE *)RTYPE(x); \ + CTYPE *ians = (CTYPE *)RTYPE(ans); \ + unsigned long l = 0, ir = xlen - 1; \ + for (;;) { \ + if (ir <= l + 1) { \ + if (ir == l + 1 && CMP(ix,l,ir,min,nalast)) { \ + SWAP(ix+l, ix+ir); \ + } \ + break; \ + } else { \ + unsigned long mid = (l + ir) >> 1; \ + SWAP(ix+mid, ix+l + 1); \ + if (CMP(ix,l,ir,min,nalast)) { \ + SWAP(ix+l, ix+ir); \ + } \ + if (CMP(ix,l+1,ir,min,nalast)) { \ + SWAP(ix+l+1, ix+ir); \ + } \ + if (CMP(ix,l,l+1,min,nalast)) { \ + SWAP(ix+l, ix+l+1); \ + } \ + unsigned long i = l + 1, j = ir; \ + for (;;) { \ + do i++; while (CMP(ix,l+1,i,min,nalast)); \ + do j--; while (CMP(ix,j,l+1,min,nalast)); \ + if (j < i) break; \ + SWAP(ix+i, ix+j); \ + } \ + SWAP(ix+l+1, ix+j); \ + if (j >= n) ir = j - 1; \ + if (j <= n) l = i; \ + } \ + } \ + for (int i=0; i 0.")); + if (!IS_TRUE_OR_FALSE(ascArg)) error(_("%s must be TRUE or FALSE"), "decreasing"); + if (!IS_TRUE_OR_FALSE(naArg)) error(_("%s must be TRUE or FALSE"), "na.last"); + + const int xlen = LENGTH(x); + int n = INTEGER(nArg)[0]; + x = PROTECT(duplicate(x)); + + const bool min = LOGICAL(ascArg)[0]; + const bool nalast = LOGICAL(naArg)[0]; + + SEXP ans; + ans = PROTECT(allocVector(TYPEOF(x), n)); + switch(TYPEOF(x)) { + case LGLSXP: case INTSXP: { QUICKN(int, INTEGER, icmp, iswap); } break; + case REALSXP: { + if (INHERITS(x, char_integer64)) { QUICKN(int64_t, REAL, i64cmp, i64swap); } + else { QUICKN(double, REAL, dcmp, dswap); } break; } + case CPLXSXP: { QUICKN(Rcomplex, COMPLEX, ccmp, cswap); } break; + default: + error(_("Type '%s' not supported by quickn."), type2char(TYPEOF(x))); + } + copyMostAttrib(x, ans); + UNPROTECT(2); + return(ans); +} + From f8ff588264b286d07a16c62a4855ba43cecd14c9 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Tue, 17 Sep 2024 00:09:30 +0200 Subject: [PATCH 27/30] add string support for quickselect --- src/topn.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/topn.c b/src/topn.c index 8299354add..eaacb2aae9 100644 --- a/src/topn.c +++ b/src/topn.c @@ -64,8 +64,8 @@ static inline bool ccmp(const Rcomplex *x, int i, int j, bool min, bool nalast) // for finding decreasing topn build minheap and add values if they exceed // minimum by overwriting minimum and following down sifting -#undef TOPN -#define TOPN(CTYPE, RTYPE, CMP, SORTED) { \ +#undef HEAPN +#define HEAPN(CTYPE, RTYPE, CMP, SORTED) { \ const CTYPE *restrict VAL = (const CTYPE *)RTYPE(x); \ for (int i=n/2; i>=0; --i) { k=i; len=n; SIFT(CMP); } \ for (int i=n; i 0.")); @@ -190,6 +191,7 @@ SEXP quickn(SEXP x, SEXP nArg, SEXP naArg, SEXP ascArg) { if (INHERITS(x, char_integer64)) { QUICKN(int64_t, REAL, i64cmp, i64swap); } else { QUICKN(double, REAL, dcmp, dswap); } break; } case CPLXSXP: { QUICKN(Rcomplex, COMPLEX, ccmp, cswap); } break; + case STRSXP: { QUICKN(SEXP, STRING_PTR, scmp, sswap); } break; default: error(_("Type '%s' not supported by quickn."), type2char(TYPEOF(x))); } @@ -197,4 +199,3 @@ SEXP quickn(SEXP x, SEXP nArg, SEXP naArg, SEXP ascArg) { UNPROTECT(2); return(ans); } - From 2dfb7406b6358acfe0cd484e20a9cd311765729f Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Tue, 17 Sep 2024 00:22:10 +0200 Subject: [PATCH 28/30] use memcpy instead of assignment --- src/topn.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/topn.c b/src/topn.c index eaacb2aae9..b004d95d4b 100644 --- a/src/topn.c +++ b/src/topn.c @@ -161,9 +161,7 @@ SEXP topn(SEXP x, SEXP nArg, SEXP naArg, SEXP ascArg, SEXP sortedArg) { if (j <= n) l = i; \ } \ } \ - for (int i=0; i Date: Sun, 10 Nov 2024 13:51:58 +0100 Subject: [PATCH 29/30] update NEWS --- NEWS.md | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/NEWS.md b/NEWS.md index e1ed662cc8..4bd5376ce3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -293,20 +293,22 @@ 41. `tables()` is faster by default by excluding the size of character strings in R's global cache (which may be shared) and excluding the size of list column items (which also may be shared). `mb=` now accepts any function which accepts a `data.table` and returns a higher and better estimate of its size in bytes, albeit more slowly; e.g. `mb = utils::object.size`. -27. New function `topn(x,n)` [#3804](https://github.com/Rdatatable/data.table/issues/3804). It returns the indices of the `n` smallest/largest values of a vector `x`. Previously, one had to use `order(x)[1:n]` which produced a full sorting of `x`. Usage of `topn` is advised for large vectors where sorting takes long time. Thanks to Michael Chirico for requesting, and Benjamin Schwendinger for PR. +27. New function `topn(x,n)` [#3804](https://github.com/Rdatatable/data.table/issues/3804). It returns the indices of the `n` smallest/largest values of a vector `x`. Previously, one had to use `order(x)[1:n]` which produced a full sorting of `x`. Usage of `topn` is advised for large vectors where sorting takes long time. Thanks to Michael Chirico for requesting, and Benjamin Schwendinger for the PR. ```R set.seed(123) x = rnorm(1e8) - system.time(topn(x, 5L,sorted=TRUE)) - # user system elapsed - # 0.287 0.000 0.288 - system.time(topn(x, 5L,sorted=FALSE)) - # user system elapsed - # 0.283 0.000 0.282 - system.time(order(x)[1L:5L]) - # user system elapsed - # 6.658 0.620 7.279 + bm = bench::mark(check=FALSE, min_iterations=10, + topn(x, 5L, sorted=TRUE), + topn(x, 5L, sorted=FALSE), + order(x)[1:5] + ) + setDT(bm)[, .(expression, min, median, lapply(time, max), mem_alloc)] + # expression min median V4 mem_alloc + # + # 1: topn(x, 5L, sorted = TRUE) 151.65ms 155.21ms 171ms 0B + # 2: topn(x, 5L, sorted = FALSE) 151.59ms 158.77ms 176ms 0B + # 3: order(x)[1:5] 2.69s 2.77s 2.84s 381MB ``` ## BUG FIXES From e9688fb2c640370fbeb2dd8729376a089b771c4d Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Sun, 21 Dec 2025 14:53:47 +0100 Subject: [PATCH 30/30] make linter happy --- src/topn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/topn.c b/src/topn.c index b004d95d4b..a58c0bc410 100644 --- a/src/topn.c +++ b/src/topn.c @@ -110,7 +110,7 @@ SEXP topn(SEXP x, SEXP nArg, SEXP naArg, SEXP ascArg, SEXP sortedArg) { ans = PROTECT(allocVector(INTSXP, n)); int *restrict ians = INTEGER(ans); int *restrict INDEX = malloc(n*sizeof(int)); - if(INDEX == NULL) error(_("Internal error: Couldn't allocate memory for heap indices.")); // # nocov + if (!INDEX) error(_("Internal error: Couldn't allocate memory for heap indices.")); // # nocov for (int i=0; i