Skip to content

Commit a67c4c8

Browse files
committed
fix Rcpp::CharacterVector copies
1 parent 5f6163d commit a67c4c8

File tree

4 files changed

+33
-12
lines changed

4 files changed

+33
-12
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: narray
22
Title: Subset- And Name-Aware Array Utility Functions
3-
Version: 0.5.0
3+
Version: 0.5.1
44
Author: Michael Schubert <mschu.dev@gmail.com>
55
Maintainer: Michael Schubert <mschu.dev@gmail.com>
66
Description: Stacking arrays according to dimension names, subset-aware

NEWS.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
# git head
1+
# narray 0.5.1
2+
3+
* Fixed a bug where `stack` made unnecessary vector copies
24

35
# narray 0.5.0
46

@@ -27,16 +29,19 @@
2729
* new `lambda` syntax (#14)
2830

2931
# narray 0.3.2
32+
3033
* add `collect` function as opposite to `mask`
3134
* deprecate `summarize` in favor of `map`
3235
* adjust tests for `testthat>=2`
3336

3437
# narray 0.2.2
38+
3539
* fix bug where `split` with `NA` in `subsets` caused wrong splitting; these
3640
are now dropped with a warning (#5)
3741
* fix bug where vectors are not bound along the right dimensions (#7)
3842
* add `rep` functions for arrays (and `rrep` and `crep` aliases for rows and
3943
columns, respectively)
4044

4145
# narray 0.1.1
46+
4247
* Initial release on CRAN

src/stack.cpp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ using namespace Rcpp;
55
using namespace std;
66

77
template<int RTYPE> Vector<RTYPE> cpp_stack_impl(List array_list, int along, Vector<RTYPE> fill, bool ovr) {
8-
auto dimnames = vector<CharacterVector>(along); // dim: names along
8+
auto dimnames = vector<vector<String>>(along); // dim: names along
99
auto axmap = vector<unordered_map<string, int>>(along); // dim: element name->index
1010
auto ax_unnamed = vector<int>(along); // counter for unnamed dimension elements
11-
auto a2r = vector<vector<vector<int>>>(array_list.size()); // array > dim > element
11+
auto a2r = vector<vector<vector<int>>>(array_list.size()); // index array>dim>element
1212

1313
// create lookup tables for all present dimension names
1414
for (int ai=0; ai<Rf_xlength(array_list); ai++) { // array index
@@ -46,12 +46,16 @@ template<int RTYPE> Vector<RTYPE> cpp_stack_impl(List array_list, int along, Vec
4646
} else {
4747
auto dni = as<vector<string>>(dn[d]);
4848
for (int e=0; e<da[d]; e++) { // element in dimension
49-
if (axmap[d].count(dni[e]) == 0) {
50-
axmap[d].emplace(dni[e], axmap[d].size() + ax_unnamed[d]);
49+
auto it = axmap[d].find(dni[e]);
50+
if (it == axmap[d].end()) {
51+
int val = axmap[d].size() + ax_unnamed[d];
52+
axmap[d].emplace(dni[e], val);
5153
dimnames[d].push_back(dni[e]);
54+
a2r[ai][d].push_back(val);
55+
} else {
56+
a2r[ai][d].push_back(it->second);
5257
}
5358
// Rprintf("array %i dim %i: %s -> %i\n", ai, d, dni[e].c_str(), axmap[d][dni[e]]);
54-
a2r[ai][d].push_back(axmap[d][dni[e]]);
5559
}
5660
}
5761
}
@@ -73,10 +77,13 @@ template<int RTYPE> Vector<RTYPE> cpp_stack_impl(List array_list, int along, Vec
7377
auto rdnames = List(dimnames.size());
7478
for (int i=0; i<dimnames.size(); i++) {
7579
rdim[i] = dimnames[i].size();
76-
if (all(is_na(dimnames[i])))
80+
auto rdni = CharacterVector(dimnames[i].size());
81+
for (int j=0; j<rdni.size(); j++)
82+
rdni[j] = dimnames[i][j];
83+
if (all(is_na(rdni)))
7784
rdnames[i] = R_NilValue;
7885
else
79-
rdnames[i] = dimnames[i];
86+
rdnames[i] = rdni;
8087
}
8188
auto n = accumulate(rdim.begin(), rdim.end(), 1, multiplies<int>());
8289
auto result = Vector<RTYPE>(n, fill[0]);

tests/testthat/test-stack.r

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,13 +107,22 @@ test_that("keep_empty arg when stacking zero-length vectors", {
107107
test_that("performance", {
108108
skip_on_cran()
109109

110-
size = 500 # 500x500, 500 arrays
110+
# stack 500 arrays, 500x500 with overwriting
111+
size = 500
111112
syms = c(letters, LETTERS, 0:9)
112113
idx = do.call(paste0, expand.grid(syms, syms))
113-
114114
ars = replicate(size, simplify=FALSE,
115115
matrix(runif(size*size), nrow=size, ncol=size,
116116
dimnames=list(sample(idx, size), sample(idx, size))))
117117
tt = system.time(stack(ars, along=2, allow_overwrite=TRUE))
118-
expect_lt(tt["user.self"], 6)
118+
expect_lt(tt["user.self"], 6) # 1.5 sec locally
119+
120+
# stack 10 arrays, 10k rows and 1 column
121+
size = 5e4
122+
idx = do.call(paste0, expand.grid(syms, syms, syms))[1:size]
123+
ars2 = replicate(10, simplify=FALSE,
124+
matrix(runif(size), nrow=size, ncol=1,
125+
dimnames=list(sample(idx, size), sample(idx,1))))
126+
tt = system.time(stack(ars2, along=2))
127+
expect_lt(tt["user.self"], 2) # 0.1 sec locally
119128
})

0 commit comments

Comments
 (0)