diff --git a/.github/workflows/R-CMD-check.yml b/.github/workflows/R-CMD-check.yml index 2b1f4d35..4c5033a2 100644 --- a/.github/workflows/R-CMD-check.yml +++ b/.github/workflows/R-CMD-check.yml @@ -15,6 +15,7 @@ on: - "**.git" - "**.json" - "**.md" + - "**.yaml" - "**.yml" - "!**R-CMD-check.yml" - "**.R[dD]ata" @@ -30,6 +31,7 @@ on: - "**.git" - "**.json" - "**.md" + - "**.yaml" - "**.yml" - "!**R-CMD-check.yml" - "**.R[dD]ata" @@ -38,24 +40,66 @@ on: name: R-CMD-check jobs: - R-CMD-check: - runs-on: ${{ matrix.config.os }} + check-release: + runs-on: ubuntu-24.04 + name: ubuntu-24.04 (release) + + env: + _R_CHECK_CRAN_INCOMING_: true # Seemingly not set by --as-cran + _R_CHECK_FORCE_SUGGESTS_: false # CRAN settings + R_COMPILE_AND_INSTALL_PACKAGES: 'never' + _R_CHECK_THINGS_IN_CHECK_DIR_: false + R_REMOTES_STANDALONE: true + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: "https://packagemanager.posit.co/cran/__linux__/noble/latest" + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - name: Checkout git repo + uses: actions/checkout@v5 + - name: Temporarily bump package version + run: | + old_version=$(grep "Version:" DESCRIPTION | awk '{print $2}') + if [[ $(echo "$old_version" | tr -cd '.' | wc -c) -eq 2 ]]; then + new_version="${old_version}.8888" + if [[ "$RUNNER_OS" == "macOS" ]]; then + sed -i "" "s/Version: .*/Version: ${new_version}/" DESCRIPTION + else + sed -i "s/Version: .*/Version: ${new_version}/" DESCRIPTION + fi + fi + shell: bash + + - name: Set up R + uses: r-lib/actions/setup-r@v2 + + - name: Set up R dependencies + uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: | + ms609/TreeDistData + needs: | + check + + - name: Check package + uses: r-lib/actions/check-r-package@v2 + + + check-all: + runs-on: ${{ matrix.config.os }} name: ${{ matrix.config.os }} (${{ matrix.config.r }}) strategy: fail-fast: false matrix: config: - - {os: windows-latest, r: 'release'} - - {os: macOS-latest, r: 'release'} - - {os: macos-15-intel, r: 'release'} # Until Intel architecture retired 2027-11 - - {os: ubuntu-22.04, r: '4.1', - rspm: "https://packagemanager.posit.co/cran/__linux__/jammy/latest"} - - {os: ubuntu-24.04-arm, r: "release", - rspm: "https://packagemanager.posit.co/cran/__linux__/noble/latest"} - - {os: ubuntu-latest, r: 'devel', - rspm: "https://packagemanager.posit.co/cran/__linux__/noble/latest"} + - {os: windows-latest, r: "release"} + - {os: macos-15-intel, r: "release"} # Until Intel architecture retired 2027-11 + - {os: macOS-latest, r: "release"} + - {os: ubuntu-22.04, r: '4.1', rspm: "https://packagemanager.posit.co/cran/__linux__/jammy/latest"} + - {os: ubuntu-24.04-arm, r: "release", rspm: "https://packagemanager.posit.co/cran/__linux__/noble/latest"} + - {os: ubuntu-24.04, r: "devel", rspm: "https://packagemanager.posit.co/cran/__linux__/noble/latest"} env: _R_CHECK_CRAN_INCOMING_: ${{ github.event_name == 'pull_request' }} @@ -64,6 +108,7 @@ jobs: _R_CHECK_THINGS_IN_CHECK_DIR_: false R_REMOTES_STANDALONE: true R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + R_REALLY_FORCE_SYMBOLS: true # Until R4.3 RSPM: ${{ matrix.config.rspm }} CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} @@ -164,3 +209,21 @@ jobs: state: 'open' }); } + rev-dep-check: + runs-on: ubuntu-latest + needs: check-release + + name: revdepcheck + + env: + R_COMPILE_AND_INSTALL_PACKAGES: 'never' + R_REMOTES_STANDALONE: true + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: ms609/actions/revdepcheck@main + with: + deps: ${{ matrix.config.deps }} + extra-packages: ms609/TreeDistData diff --git a/.github/workflows/revdepcheck.yml b/.github/workflows/revdepcheck.yml index c22b3ae3..288e268c 100644 --- a/.github/workflows/revdepcheck.yml +++ b/.github/workflows/revdepcheck.yml @@ -1,21 +1,14 @@ +# Now integrated into R CMD check.yml. name: rev-dep-check on: workflow_dispatch: - workflow_run: - workflows: ["R-CMD-check"] - types: - - completed - push: - paths: "**revdepcheck.yml" - pull_request: - paths: "**revdepcheck.yml" jobs: mem-check: runs-on: ubuntu-latest - name: revdepcheck + name: revdepcheck (manual) env: R_COMPILE_AND_INSTALL_PACKAGES: 'never' diff --git a/DESCRIPTION b/DESCRIPTION index ded388d8..e5f072a0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: TreeDist Type: Package Title: Calculate and Map Distances Between Phylogenetic Trees -Version: 2.11.1.9001 +Version: 2.12.0 Authors@R: c(person("Martin R.", "Smith", email = "martin.smith@durham.ac.uk", role = c("aut", "cre", "cph", "prg"), @@ -50,7 +50,7 @@ Imports: Rdpack (>= 0.7), shiny, shinyjs, - TreeTools (>= 2.0.0.9004), + TreeTools (>= 2.1.0), Suggests: bookdown, cluster, @@ -71,6 +71,7 @@ Suggests: rgl, Rogue, spelling, + TBRDist, testthat (>= 3.0), Ternary (>= 1.1.2), TreeDistData (> 0.1.0), diff --git a/NAMESPACE b/NAMESPACE index 2b2d02ab..640491ff 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -152,6 +152,7 @@ export(is.HPart) importFrom(Rdpack,reprompt) importFrom(TreeTools,AllAncestors) importFrom(TreeTools,DropTip) +importFrom(TreeTools,FirstMatchingSplit) importFrom(TreeTools,KeepTip) importFrom(TreeTools,KeepTipPostorder) importFrom(TreeTools,LnRooted.int) diff --git a/NEWS.md b/NEWS.md index cd43b4f6..f50ebf5f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,12 +1,14 @@ -# TreeDist 2.11.1.9001 (2026-02-05) - -- Support larger trees by updating some functions to use 32-bit integers, per TreeTools v2.1.0. - - -# TreeDist 2.11.1.9000 (2025-11-13) +# TreeDist 2.12.0 (2026-02-12) +- Support larger trees in some functions by updating some functions to use + 32-bit integers, per TreeTools v2.1.0. + - `AHMI()` now returns negative values (previously zeroed in error). +- Experimental support for a new method of SPR distance calculation: + subject to change or removal. + + # TreeDist 2.11.1 (2025-10-13) - Improve robustness of `SpectralEigens()` tests. diff --git a/R/RcppExports.R b/R/RcppExports.R index 36cca506..81e688d3 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,6 +1,10 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 +binary_entropy_counts <- function(inSplit, nLeaves) { + .Call(`_TreeDist_binary_entropy_counts`, inSplit, nLeaves) +} + COMCLUST <- function(trees) { .Call(`_TreeDist_COMCLUST`, trees) } @@ -58,6 +62,10 @@ entropy_int <- function(n) { .Call(`_TreeDist_entropy_int`, n) } +expected_mi <- function(ni, nj) { + .Call(`_TreeDist_expected_mi`, ni, nj) +} + lapjv <- function(x, maxX) { .Call(`_TreeDist_lapjv`, x, maxX) } @@ -102,6 +110,10 @@ keep_and_reduce <- function(tree1, tree2, keep) { .Call(`_TreeDist_keep_and_reduce`, tree1, tree2, keep) } +spr_table_7 <- function(sp1, sp2) { + .Call(`_TreeDist_spr_table_7`, sp1, sp2) +} + cpp_robinson_foulds_distance <- function(x, y, nTip) { .Call(`_TreeDist_cpp_robinson_foulds_distance`, x, y, nTip) } diff --git a/R/tree_distance_spr.R b/R/tree_distance_spr.R index 9559698b..b5b54d58 100644 --- a/R/tree_distance_spr.R +++ b/R/tree_distance_spr.R @@ -1,31 +1,39 @@ -#' Approximate the Subtree Prune and Regraft (SPR) distance. -#' -#' `SPRDist()` calculates an upper bound on the SPR distance between trees -#' using the heuristic method of \insertCite{deOliveira2008;textual}{TreeDist}. -#' Other approximations are available -#' \insertCite{@e.g. @Hickey2008, @Goloboff2008SPR, @Whidden2018}{TreeDist}. +#' Approximate the Subtree Prune and Regraft distance +#' +#' `SPRDist()` approximates the \acronym{SPR} distance between +#' trees. #' +#' The function currently defaults to the heuristic method of +#' \insertCite{deOliveira2008;textual}{TreeDist}, which purports to provide an +#' upper bound on the \acronym{SPR} distance (though exceptions exist). +#' Other approximations +#' \insertCite{@e.g. @Hickey2008, @Goloboff2008SPR, @Whidden2018}{TreeDist} are +#' not yet implemented. +#' #' @template tree12ListParams #' @param method Character specifying which method to use to approximate the -#' SPR distance. Currently defaults to `"deOliveira"`, the only available -#' option; a new method will eventually become the default. -#' @param symmetric Ignored (redundant after fix of +#' SPR distance. Currently defaults to `"deOliveira"``. +#' `"Rogue"` implements an experimental method whose details are pending +#' publication; this function is under development, and may be modified or +#' removed without notice. Once formally validated, it is anticipated that this +#' method will become the default. +#' @param symmetric Deprecated (redundant after fix of #' [phangorn#97](https://github.com/KlausVigo/phangorn/issues/97)). -#' -#' @return `SPRDist()` returns a vector or distance matrix of distances +#' +#' @return `SPRDist()` returns a vector or distance matrix of distances #' between trees. -#' +#' #' @references \insertAllCited{} -#' +#' #' @examples #' library("TreeTools", quietly = TRUE) -#' +#' #' # Compare single pair of trees #' SPRDist(BalancedTree(7), PectinateTree(7)) -#' -#' # Compare all pairs of trees +#' +#' # Compare all pairs of trees #' SPRDist(as.phylo(30:33, 8)) -#' +#' #' # Compare each tree in one list with each tree in another #' SPRDist(BalancedTree(7), as.phylo(0:2, 7)) #' SPRDist(as.phylo(0:2, 7), PectinateTree(7)) @@ -33,25 +41,25 @@ #' SPRDist(list(bal = BalancedTree(7), pec = PectinateTree(7)), #' as.phylo(0:2, 7)) #' @template MRS -#' +#' #' @seealso Exact calculation with [\pkg{TBRDist}]( #' https://ms609.github.io/TBRDist/reference/TreeRearrangementDistances.html) #' functions `USPRDist()` and `ReplugDist()`. -#' +#' #' \pkg{phangorn} function \code{\link[phangorn:treedist]{SPR.dist()}} employs #' the \insertCite{deOliveira2008;textual}{TreeDist} algorithm but can crash #' when sent trees of certain formats, and tends to have a longer running time. -#' +#' #' @family tree distances #' @importFrom TreeTools PairwiseDistances Postorder #' @export -SPRDist <- function (tree1, tree2 = NULL, method = "deOliveira", symmetric) { +SPRDist <- function(tree1, tree2 = NULL, method = "deOliveira", symmetric) { UseMethod("SPRDist") } #' @rdname SPRDist #' @export -SPRDist.phylo <- function (tree1, tree2 = NULL, method = "deOliveira", symmetric) { +SPRDist.phylo <- function(tree1, tree2 = NULL, method = "deOliveira", symmetric) { if (is.null(tree2)) { NULL } else if (inherits(tree2, "phylo")) { @@ -62,15 +70,20 @@ SPRDist.phylo <- function (tree1, tree2 = NULL, method = "deOliveira", symmetric } .SPRFunc <- function(method) { - .SPRPairDeO + switch(pmatch(tolower(gsub("\\s", "", method)), + c("deoliveira2008", "rogue")), + .SPRPairDeO, .SPRRogue) } #' @rdname SPRDist #' @export -SPRDist.list <- function (tree1, tree2 = NULL, method = "deOliveira", symmetric) { +SPRDist.list <- function(tree1, tree2 = NULL, method = "deOliveira", symmetric) { if (is.null(tree2)) { - PairwiseDistances(RootTree(RenumberTips(tree1, tree1), 1), - .SPRFunc(method), check = FALSE) + PairwiseDistances( + RootTree(RenumberTips(tree1, tree1), 1), + .SPRFunc(method), + check = FALSE + ) } else if (inherits(tree2, 'phylo')) { vapply(tree1, .SPRFunc(method), double(1), tree2) } else { @@ -82,6 +95,9 @@ SPRDist.list <- function (tree1, tree2 = NULL, method = "deOliveira", symmetric) #' @export SPRDist.multiPhylo <- SPRDist.list +# if x <- which(condition, arr.ind = FALSE), and +# y <- which(condition, arr.ind = TRUE), then +# .Which1 and .Which2 give y[1] and y[2]. .Which1 <- function (x, nSplits) { ret <- x %% nSplits if (ret == 0L) { @@ -92,6 +108,315 @@ SPRDist.multiPhylo <- SPRDist.list } .Which2 <- function (x, nSplits) (x - 1) %/% nSplits + 1L +.SPRExact5 <- function(sp1, sp2) { + # Trees have shape (r, (p1, p2), (q1, q2)) after reduction. + # There are two cases: + if (all(xor(sp1[[1]] , sp1[[2]]) == xor(sp2[[1]] , sp2[[2]]))) { + # Case 1: `r` has the same label in each tree + # As the tree cannot be reduced by the reduction rule, we have + # (X, (A, B), (C, D)) vs (X, (A, C), (B, C)) = 2 moves + # + 2 + } else { + # Case 2: `r` has a different label in each tree. + # Assign `r` the label X in tree 1 and Y in tree 2. + # (X, (Y, ?), (?, ?)) vs (Y, (X, ?), (?, ?)) + # + # Then label the sister to X in tree2 X', and Y mutas mutantis + # Notice that if X' == Y', the unlabelled cherry reduces by reduction rule + # Hence we have + # (X, ((Y, Y'), (X', ?))) vs (Y, ((X, X'), (Y', ?))) = 1 moves + 1 + } +} + +.SPRExact6 <- function(sp1, sp2) { + # Surprisingly, there is only one configuration with a distance of 1: + # (((Lb, Lc), La), (Ra, (Rb, Rc))) vs (((Ra, Rb), La), (Lb, (Lc, Rc))) + pairs1 <- TipsInSplits(sp1, smallest = TRUE) == 2 + pairs2 <- TipsInSplits(sp2, smallest = TRUE) == 2 + if (all(pairs1) || all(pairs2)) { + return (2) + } + duo1 <- sp1[[pairs1]] + trio1 <- sp1[[!pairs1]] + + .Overlapper <- function(s1, s2) { + res <- as.logical(xor(s1, s2)) + if (sum(res) == 1) res else !res + } + middle1a <- .Overlapper(duo1[[1]], trio1) + middle1b <- .Overlapper(duo1[[2]], trio1) + + duo2 <- sp2[[pairs2]] + trio2 <- sp2[[!pairs2]] + middle2a <- .Overlapper(duo2[[1]], trio2) + middle2b <- .Overlapper(duo2[[2]], trio2) + + inMiddleEachTime <- (middle1a | middle1b) & (middle2a | middle2b) + if (sum(inMiddleEachTime) == 1) { + La <- if (inMiddleEachTime[middle1a]) middle1a else middle1b + Lbc1 <- as.logical(duo1[[if (inMiddleEachTime[middle1a]) 1 else 2]]) + if (Lbc1[La]) { + Lbc1 <- !Lbc1 + } + if (any(Lbc1[middle2a | middle2b])) { + # Lb is in the other middle position in tree 2: + # (((?, ?), La), (Lb, (?, ?))) + Lbc2 <- as.logical(duo2[[if (La[middle2a]) 1 else 2]]) + if (Lbc2[La]) { + Lbc2 <- !Lbc2 + } + if (!any(Lbc2[Lbc1])) { + # (((?, ?), La), (Lb, (Lc, ?))) + # (((Ra, Rb), La), (Lb, (Lc, Rc))) = 1 !!! + return(1) + } + # (((Lc, ?), La), (Lb, (?, ?))) + # (((Lc, Rc), La), (Lb, (Ra, Rb))) = 2 + } + # (((?, ?), La), (Rb, (?, ?))) + # (((?, Ra), La), (Rb, (?, ?))) + # (((Lb, Ra), La), (Rb, (Rc, Lc))) = 2 + # (((?, ?), La), (Rb, (Ra, ?))) + # (((Rc, Lc), La), (Rb, (Ra, Lb))) = 2 + # + } + + # All other tree pairs have a distance of 2 - see below + return(2) + + # Trees may be one of two shapes: + # ((a1, a2), (b1, b2), (c1, c2)) + # (((Lb, Lc), La), (Ra, (Rb, Rc))) + # balanced1 <- all(pairs1) + # balanced2 <- all(pairs2) + # if (balanced1 && balanced2) { + # There's only one possible configuration: + # ((ab, ac), (ba, bc), (ca, cb)) vs ((ba, ca), (ab, cb), (ac, bc)) = 2 + # } + # if (!balanced1 && !balanced2) { + # Both trees have the shape + # (((Lb, Lc), La), (Ra, (Rb, Rc))) + # We will use the same labels for Tree 2, matching where possible. + # if (La1 == La2 && Ra1 == Ra2) { + # La = La, Ra = Ra: + # (((Lb, Lc), La), (Ra, (Rb, Rc))), (((Lb, Rb), La), (Ra, (Rc, Lc))) = 2 + # } + # As we can't match La and Ra, we'll match La if we can. + # if (La1 != La2 && Ra1 != Ra2) { + # LO != La, Ra != Ra + # La and Ra are both in the cherries + # (((?, La), Lb), (Lc, (?, ?))) + # (((Rb, La), Lb), (Lc, (Ra, Rc))) = 2 + # + # (((?, ?), Rb), (Rc, (?, ?))) + # (((Lc, Ra), Rb), (Rc, (La, Lb))) = 2 + # + # (((?, ?), Lb), (Rb, (?, ?))) + # (((?, La), Lb), (Rb, (?, ?))) + # (((Lc, La), Lb), (Rb, (Ra, Rc))) = 2 + # (((Ra, La), Lb), (Rb, (Lc, Rc))) = 2 + # (((Rc, La), Lb), (Rb, (Ra, Lc))) = 2 + # + # (((?, ?), Lb), (Rb, (La, ?))) + # (((Ra, Rc), Lb), (Rb, (La, Lc))) = 2 + # (((Lc, Rc), Lb), (Rb, (La, Ra))) = 2 + # (((Ra, Lc), Lb), (Rb, (La, Rc))) = 2 + # + # (((?, ?), Lb), (Rb, (?, ?))) + # (((Lc, Rc), Lb), (Rb, (La, Ra))) = 2 + # (((Lc, Ra), Lb), (Rb, (La, Rc))) = 2 + # (((Ra, Rc), Lb), (Rb, (La, Lc))) = 2 + # (((Ra, La), Lb), (Rb, (Rc, Lc))) = 2 + # (((Rc, La), Lb), (Rb, (Ra, Lc))) = 2 + # (((Lc, La), Lb), (Rb, (Ra, Rc))) = 2 + # } + # Else exactly one of the bridging leaves is the same; call this La. + # + # (((?, ?), La), (Rb, (?, ?))) + # (((?, Ra), La), (Rb, (?, ?))) + # (((Lb, Ra), La), (Rb, (Rc, Lc))) = 2 + # (((?, ?), La), (Rb, (Ra, ?))) + # (((Rc, Lc), La), (Rb, (Ra, Lb))) = 2 + # + # (((?, ?), La), (Lb, (?, ?))) + # (((Lc, ?), La), (Lb, (?, ?))) + # (((Lc, Rc), La), (Lb, (Ra, Rb))) = 2 + # (((?, ?), La), (Lb, (Lc, ?))) + # (((Ra, Rb), La), (Lb, (Lc, Rc))) = 1 !!! + # + # + # } +} + +.SPRExact7 <- function(sp1, sp2) { + spr_table_7(sp1, sp2) +} + +# Takes a 'Rogue' approach: finds the leaf that introduces the most conflict, +# and nixes it. +#' @importFrom TreeTools FirstMatchingSplit +.SPRRogue <- function(tree1, tree2, check = TRUE) { + moves <- 0 + + ProxyDistance <- switch( + pmatch(toupper(getOption("sprProxy", "C")), c("C", "P", "Q", "R")), + ClusteringInfoDist, + PhylogeneticInfoDistance, + function(x, y) Quartet::QuartetDivergence(Quartet::QuartetStatus(x, y)), + RobinsonFoulds + ) + + reduced <- ReduceTrees(tree1, tree2, check = check) + + while (!is.null(reduced)) { + + tr1 <- reduced[[1]] + tr2 <- reduced[[2]] + nTip <- NTip(tr1) + if (nTip == 4 && getOption("sprShortcut", Inf) >= 4) { + return(moves + 1) + } + + sp1 <- as.Splits(tr1) + sp2 <- as.Splits(tr2, tr1) + if (nTip == 5 && getOption("sprShortcut", Inf) >= 5) { + return(moves + .SPRExact5(sp1, sp2)) + } + if (nTip == 6 && getOption("sprShortcut", Inf) >= 6) { + return(moves + .SPRExact6(sp1, sp2)) + } + if (nTip == 7 && getOption("sprShortcut", Inf) >= 7) { + return(moves + .SPRExact7(sp1, sp2)) + } + + firstMatchedSplit <- FirstMatchingSplit(sp1, sp2) + if (!isFALSE(getOption("sprMatches")) && firstMatchedSplit > 0) { + # At least one split exists in both trees + subtips1 <- as.logical(sp1[[firstMatchedSplit]]) + subtips2 <- !subtips1 + + # Add dummy tip as placeholder for other half of tree + subtips1[!subtips1][[1]] <- TRUE + + # Repeat for other half-tree + subtips2[!subtips2][[1]] <- TRUE + + moves1 <- .SPRRogue( + KeepTipPostorder(tr1, subtips1), + KeepTipPostorder(tr2, subtips1) + ) + moves2 <- .SPRRogue( + KeepTipPostorder(tr1, subtips2), + KeepTipPostorder(tr2, subtips2) + ) + return(moves + moves1 + moves2) + } + + labels <- TipLabels(tr1) + scores <- numeric(length(labels)) + blank <- rep_len(TRUE, length(labels)) + + depth <- max(getOption("sprDepth", 1), 1) + + + .ScoreWithout <- function(idx) { + keep <- blank + keep[idx] <- FALSE + + outcome <- keep_and_reduce(tr1, tr2, keep) + if (is.null(outcome[[1]])) { + return(-Inf) + } + + oSpl <- as.Splits(outcome) + firstMatch <- FirstMatchingSplit(oSpl[[1]], oSpl[[2]]) + + if (firstMatch > 0) { + subtips1 <- as.logical(oSpl[[1]][[firstMatch]]) + subtips2 <- !subtips1 + # Anchor to shared edge + subtips1[!subtips1][[1]] <- TRUE + subtips2[!subtips2][[1]] <- TRUE + + sub1 <- ReduceTrees(KeepTipPostorder(outcome[[1]], subtips1), + KeepTipPostorder(outcome[[2]], subtips1)) + + sub2 <- ReduceTrees(KeepTipPostorder(outcome[[1]], subtips2), + KeepTipPostorder(outcome[[2]], subtips2)) + + # Return: + ProxyDistance(sub1[[1]], sub1[[2]]) + + ProxyDistance(sub2[[1]], sub2[[2]]) + + } else { + # Return: + ProxyDistance(outcome[[1]], outcome[[2]]) + } + } + for (i in seq_along(labels)) { + scores[[i]] <- .ScoreWithout(i) + if (!is.finite(scores[[i]])) break + } + if (any(!is.finite(scores))) { + depth <- 1 + } + if (depth > 1) { + pairs <- combn(seq_along(labels), 2) + nPairs <- dim(pairs)[[2]] + pairScores <- double(nPairs) + for (i in seq_len(nPairs)) { + pairScores[[i]] <- .ScoreWithout(pairs[, i]) + if (!is.finite(pairScores[[i]])) break + } + } + + drop <- logical(length(labels)) + couldDrop <- scores == min(scores) + + if (depth > 1 && min(pairScores) < min(scores)) { + pairDrop <- pairScores == min(pairScores) + dropTions <- pairs[, pairDrop] + if (any(couldDrop[dropTions]) && !any(!is.finite(pairScores))) { + # Dropping two at once doesn't give us any benefit over dropping one + # at a time – but will mean we can't spot a handy pair next time. + drop[dropTions[which.min(scores[dropTions])]] <- TRUE + } else { + # If dropping two gives us a better solution, drop both at once - + # failing to do so can cause an optimal path to be missed. + drop[pairs[, which.max(pairDrop)]] <- TRUE + } + } else { + drop[[which.min(scores)]] <- TRUE + } + + reduced <- keep_and_reduce(tr1, tr2, !drop) + if (length(reduced) == 1L) { + reduced <- NULL + } + + moves <- sum(moves, drop) + } + + # Return: + moves +} + +# An attempt to reproduce the phangorn results using the algorithm of +# \insertCite{deOliveira2008;textual}{TreeDist} +# An exact match is unlikely due to the arbitrary breaking of ties when leaves +# are selected for removal. +#' @examples +#' # de Oliveira Martins et al 2008, fig. 7 +#' tree1 <- ape::read.tree(text = "((1, 2), ((a, b), (c, d)), (3, (4, (5, (6, 7)))));") +#' tree2 <- ape::read.tree(text = "((1, 2), 3, (4, (5, (((a, b), (c, d)), (6, 7)))));") +#' oPar <- par(mfrow =c(2, 1), mar = rep(0, 4)) +#' plot(tree1) +#' plot(tree2) +#' par(oPar) +#' SPRDist(tree1, tree2, method = "deO") +#' @keywords internal #' @importFrom TreeTools DropTip TipsInSplits KeepTipPostorder #' @importFrom TreeTools edge_to_splits .SPRPairDeO <- function(tree1, tree2, check = TRUE) { @@ -99,7 +424,7 @@ SPRDist.multiPhylo <- SPRDist.list # Reduce trees (Fig. 7A in deOliveira2008) reduced <- ReduceTrees(tree1, tree2, check = check) - + while (!is.null(reduced)) { tr1 <- reduced[[1]] tr2 <- reduced[[2]] @@ -120,6 +445,7 @@ SPRDist.multiPhylo <- SPRDist.list nSplits <- length(sp1) # Compute size of disagreement splits - see Fig. 7C in @deOliv2008 mmSize <- mismatch_size(sp1, sp2) + stopifnot(all(mmSize > 0)) # Arbitrary selection of leaves to remove introduces a stochastic element minMismatch <- which.min(mmSize) diff --git a/R/tree_information.R b/R/tree_information.R index 08fab0a9..d83f5168 100644 --- a/R/tree_information.R +++ b/R/tree_information.R @@ -73,7 +73,7 @@ #' #' #' As entropy measures the bits required to transmit the cluster label of each -#' leaf \insertCite{@@Vinh2010: p. 2840}{TreeDist}, the information content of +#' leaf \insertCite{@Vinh2010: p. 2840}{TreeDist}, the information content of #' a split is its entropy multiplied by the number of leaves. #' #' @section Phylogenetic information: @@ -265,15 +265,42 @@ ClusteringEntropy.multiPhylo <- ClusteringEntropy.list #' @export ClusteringEntropy.Splits <- function(x, p = NULL, sum = TRUE) { nLeaves <- attr(x, "nTip") - inSplit <- TipsInSplits(x) - splitP <- rbind(inSplit, nLeaves - inSplit, deparse.level = 0L) / nLeaves - if (is.null(p)) { - p <- 1L + if (is.null(nLeaves) || !is.finite(nLeaves)) { + stop("`x` must have a valid 'nTip' attribute.") + } + if (nLeaves <= 0) { + return(if (sum) 0 else double(0)) + } + + inSplit <- TipsInSplits(x) # integer vector, names correspond to splits + nSplits <- length(inSplit) + + H <- tryCatch( + binary_entropy_counts(inSplit, as.integer(nLeaves)), + error = function(e) { + pi <- inSplit / nLeaves + H <- numeric(nSplits) + nz <- (pi > 0) & (pi < 1) + pz <- pi[nz] + H[nz] <- -(pz * log(pz) + (1 - pz) * log1p(-pz)) + H[!nz] <- 0.0 + H + } + ) + + # Weighting + if (is.null(p)) p <- 1 + if (!(length(p) %in% c(1L, nSplits))) { + stop("`p` must be length 1 or length equal to number of splits (", nSplits, ").") } + ret <- H * p - ret <- p * apply(splitP, 2, Entropy) # Return: - if (sum) sum(ret) else ret + if (isTRUE(sum)) { + sum(ret) + } else { + setNames(ret, names(inSplit)) + } } #' @export diff --git a/data-raw/spr-exact-7.R b/data-raw/spr-exact-7.R new file mode 100644 index 00000000..c831495a --- /dev/null +++ b/data-raw/spr-exact-7.R @@ -0,0 +1,131 @@ +library("TreeTools") + +Tree <- function(txt) ape::read.tree(text = txt) + +pec7 <- Tree("((c1, c2), (s, (t, (u, (h1, h2)))));") +pecSp <- as.Splits(pec7) + +nTip <- 7 + +tis <- TipsInSplits(pecSp) +tisBig <- tis > nTip / 2 +tiss <- tis +tiss[tisBig] <- nTip - tis[tisBig] + +trios <- which(tiss == 3) +pairs <- (1:4)[-trios] + +midpoint <- xor(pecSp[[trios[[1]]]], pecSp[[trios[[2]]]]) +# Align pair1 with trio1 +if (!TipsInSplits(xor(pecSp[[pairs[[1]]]], pecSp[[trios[[1]]]])) %in% c(1, 6)) { + pairs <- pairs[2:1] +} + +midTip <- which(as.logical(midpoint)) +trio1Tip <- as.logical(xor(pecSp[[trios[[1]]]], pecSp[[pairs[[1]]]])) +if (sum(trio1Tip) == 6) trio1Tip <- !trio1Tip +trio2Tip <- as.logical(xor(pecSp[[trios[[2]]]], pecSp[[pairs[[2]]]])) +if (sum(trio2Tip) == 6) trio2Tip <- !trio2Tip + +duo1Tips <- as.logical(pecSp[[pairs[[1]]]]) +if (sum(duo1Tips) == 5) duo1Tips <- !duo1Tips +duo2Tips <- as.logical(pecSp[[pairs[[2]]]]) +if (sum(duo2Tips) == 5) duo2Tips <- !duo2Tips + +canonOrder <- TipLabels(pec7)[ + c(midTip, which(trio1Tip), which(duo1Tips), which(trio2Tip), which(duo2Tips)) +] +pec7 <- RenumberTips(pec7, canonOrder) + +pecTrees <- as.phylo(seq_len(NUnrooted(nTip)), nTip, canonOrder) +pecScores <- sapply(seq_along(pecTrees), function(i) { + reduced <- ReduceTrees(pec7, pecTrees[[i]]) + r1 <- reduced[[1]] + if (is.null(r1) || NTip(r1) != nTip) return(NA) + r2 <- reduced[[2]] + TBRDist::USPRDist(r1, r2) +}) +pecValid <- !is.na(pecScores) + +pecSplits <- vapply(which(pecValid), function(i) { + as.integer(!(pecTrees[[i]] |> as.Splits() |> PolarizeSplits(7))) |> sort() +}, integer(4)) + + +bal7 <- Tree("(((p1, p2), (q1, q2)), (s, (r1, r2)));") +balSp <- as.Splits(bal7) +tis <- TipsInSplits(balSp) +tisBig <- tis > nTip / 2 +tiss <- tis +tiss[tisBig] <- nTip - tis[tisBig] + +firstTrio <- which.max(tiss) +firstTrioSp <- balSp[[firstTrio]] +for (trioPair in seq_along(tis)[-firstTrio]) { + soloSp <- xor(balSp[[trioPair]], firstTrioSp) + if (TipsInSplits(soloSp) == nTip - 1) break +} +otherSp <- seq_along(tis)[-c(trioPair, firstTrio)] +singleton <- !soloSp +singleTip <- which(as.logical(singleton)) +trioPairTip <- as.logical(balSp[[trioPair]]) +if (tisBig[[trioPair]]) trioPairTip <- !as.logical(trioPairTip) +otherSp1 <- as.logical(balSp[[otherSp[[1]]]]) +if (tisBig[[otherSp[[1]]]]) otherSp1 <- !as.logical(otherSp1) +otherSp2 <- as.logical(balSp[[otherSp[[2]]]]) +if (tisBig[[otherSp[[2]]]]) otherSp2 <- !as.logical(otherSp2) +canonOrder <- TipLabels(bal7)[ + c(singleTip, which(trioPairTip), which(otherSp1), which(otherSp2))] + +bal7 <- RenumberTips(bal7, canonOrder) + +balTrees <- as.phylo(seq_len(NUnrooted(nTip)), nTip, canonOrder) +balScores <- vapply(seq_along(balTrees), function(i) { + reduced <- ReduceTrees(bal7, balTrees[[i]]) + r1 <- reduced[[1]] + if (is.null(r1) || NTip(r1) != nTip) return(NA_real_) + r2 <- reduced[[2]] + TBRDist::USPRDist(r1, r2) +}, double(1)) + +balValid <- !is.na(balScores) + +balSplits <- vapply(which(balValid), function(i) { + as.integer(!(balTrees[[i]] |> as.Splits() |> PolarizeSplits(7))) |> + sort() +}, integer(4)) + +# Define packing algorithm based on range +range(pecSplits[1, ], balSplits[1, ]) +range(pecSplits[2, ], balSplits[2, ]) +range(pecSplits[3, ], balSplits[3, ]) +range(pecSplits[4, ], balSplits[4, ]) +BitPack7 <- function(vec) { + bitwShiftL(vec[1] - 3, 18) + + bitwShiftL(vec[2] - 7, 12) + + bitwShiftL(vec[3] - 15, 6) + + vec[4] - 33 +} + +pecPack <- apply(pecSplits, 2, BitPack7) +pecDF <- data.frame(key = pecPack, score = pecScores[pecValid]) +pecDF <- pecDF[order(pecDF$key), ] + +balPack <- apply(balSplits, 2, BitPack7) +balDF <- data.frame(key = balPack, score = balScores[balValid]) +balDF <- balDF[order(balDF$key), ] + + +header_content <- paste0( + "// Generated from data-raw/spr-exact.R\n", + "#include \n#include \n#include \n\n", + "struct SPRScore { uint32_t key; int score; };\n\n", + "static constexpr std::array PEC_LOOKUP = {{\n", + paste0(" {", pecDF$key, "u, ", pecDF$score, "}", collapse = ",\n"), + "\n}};\n", + "static constexpr std::array BAL_LOOKUP = {{\n", + paste0(" {", balDF$key, "u, ", balDF$score, "}", collapse = ",\n"), + "\n}};" +) + +writeLines(header_content, "src/spr/lookup_table_7.h") diff --git a/data-raw/spr-exact-8.R b/data-raw/spr-exact-8.R new file mode 100644 index 00000000..21749ec4 --- /dev/null +++ b/data-raw/spr-exact-8.R @@ -0,0 +1,260 @@ +library("TreeTools") +ReduceTrees <- TreeDist::ReduceTrees + +Tree <- function(txt) ape::read.tree(text = txt) +nTip <- 8 + +# There are four UnrootedTreeShapes for 8 leaves +pec <- Tree("((c1, c2), (s, (t, (u, (v, (h1, h2))))));") +pecSp <- as.Splits(pec) + + +tis <- TipsInSplits(pecSp) +tisBig <- tis > nTip / 2 +tiss <- tis +tiss[tisBig] <- nTip - tis[tisBig] + +fours <- unname(which(tiss == 4)) +trios <- unname(which(tiss == 3)) +pairs <- seq_len(nTip - 3)[-c(trios, fours)] + +mid1 <- xor(pecSp[[fours]], pecSp[[trios[[1]]]]) +mid2 <- xor(pecSp[[fours]], pecSp[[trios[[2]]]]) +mid1Tip <- which(as.logical(mid1)) +mid2Tip <- which(as.logical(mid2)) + +# Align trio1 with mid1 +if (!TipsInSplits(xor(pecSp[[trios[[1]]]], pecSp[[pairs[[1]]]])) %in% + c(1, nTip - 1)) { + pairs <- pairs[2:1] +} + +trio1Tip <- as.logical(xor(pecSp[[trios[[1]]]], pecSp[[pairs[[1]]]])) +if (sum(trio1Tip) == nTip - 1) trio1Tip <- !trio1Tip +trio2Tip <- as.logical(xor(pecSp[[trios[[2]]]], pecSp[[pairs[[2]]]])) +if (sum(trio2Tip) == nTip - 1) trio2Tip <- !trio2Tip + +duo1Tips <- as.logical(pecSp[[pairs[[1]]]]) +if (sum(duo1Tips) == nTip - 2) duo1Tips <- !duo1Tips +duo2Tips <- as.logical(pecSp[[pairs[[2]]]]) +if (sum(duo2Tips) == nTip - 2) duo2Tips <- !duo2Tips + +canonOrder <- TipLabels(pec)[ + c(mid1Tip, which(trio1Tip), which(duo1Tips), + mid2Tip, which(trio2Tip), which(duo2Tips)) +] +pec <- RenumberTips(pec, canonOrder) + +pecTrees <- as.phylo(seq_len(NUnrooted(nTip)), nTip, canonOrder) +pecScores <- sapply(seq_along(pecTrees), function(i) { + reduced <- ReduceTrees(pec, pecTrees[[i]]) + r1 <- reduced[[1]] + if (is.null(r1) || NTip(r1) != nTip) return(NA) + r2 <- reduced[[2]] + TBRDist::USPRDist(r1, r2) +}) +pecValid <- !is.na(pecScores) + +pecSplits <- vapply(which(pecValid), function(i) { + as.integer(!(pecTrees[[i]] |> as.Splits() |> PolarizeSplits(nTip))) |> sort() +}, integer(nTip - 3)) + + + + +mix <- Tree("(((p1, p2), (q1, q2)), (s, (t, (c1, c2))));") +mixSp <- as.Splits(mix) +tis <- TipsInSplits(mixSp) +tisBig <- tis > nTip / 2 +tiss <- tis +tiss[tisBig] <- nTip - tis[tisBig] + +quad <- which(tiss == 4) +trio <- which(tiss == 3) +pairs <- (1:5)[-c(quad, trio)] + +trioSp <- mixSp[[trio]] +sTip <- xor(mixSp[[quad]], trioSp) +for (trioPair in pairs) { + soloSp <- xor(trioSp, mixSp[[trioPair]]) + if (TipsInSplits(soloSp) %in% c(1, nTip - 1)) break +} +cherries <- pairs[pairs != trioPair] + +.FewerTips <- function(sp) { + which(as.logical(if (TipsInSplits(sp) > nTip / 2) !sp else sp)) +} +midTip <- .FewerTips(sTip) +trioTip <- .FewerTips(xor(mixSp[[trioPair]], trioSp)) +trioPairTip <- .FewerTips(mixSp[[trioPair]]) +otherSp1 <- .FewerTips(mixSp[[cherries[[1]]]]) +otherSp2 <- .FewerTips(mixSp[[cherries[[2]]]]) +canonOrder <- TipLabels(mix)[ + c(midTip, trioTip, trioPairTip, otherSp1, otherSp2) + ] + +mix <- RenumberTips(mix, canonOrder) + +mixTrees <- as.phylo(seq_len(NUnrooted(nTip)), nTip, canonOrder) +mixScores <- vapply(seq_along(mixTrees), function(i) { + reduced <- ReduceTrees(mix, mixTrees[[i]]) + r1 <- reduced[[1]] + if (is.null(r1) || NTip(r1) != nTip) return(NA_real_) + r2 <- reduced[[2]] + TBRDist::USPRDist(r1, r2) +}, double(1)) + +mixValid <- !is.na(mixScores) + +mixSplits <- vapply(which(mixValid), function(i) { + as.integer(!(mixTrees[[i]] |> as.Splits() |> PolarizeSplits(nTip))) |> + sort() +}, integer(nTip - 3)) + + + + +mid <- Tree("((((p1, p2), p0), (q0, (q1, q2))), (c1, c2));") +midSp <- as.Splits(mid) +tis <- TipsInSplits(midSp) +tisBig <- tis > nTip / 2 +tiss <- tis +tiss[tisBig] <- nTip - tis[tisBig] + +trios <- unname(which(tiss == 3)) +trioSp1 <- midSp[[trios[[1]]]] +for (trioPair1 in seq_along(tis)[-trios]) { + solo1 <- xor(midSp[[trioPair1]], trioSp1) + if (TipsInSplits(solo1) %in% c(1, nTip - 1)) break +} + +trioSp2 <- midSp[[trios[[2]]]] +for (trioPair2 in seq_along(tis)[-c(trios, trioPair1)]) { + solo2 <- xor(midSp[[trioPair2]], trioSp2) + if (TipsInSplits(solo2) %in% c(1, nTip - 1)) break +} + +canonOrder <- TipLabels(mid)[c( + .FewerTips(solo1), + .FewerTips(midSp[[trioPair1]]), + .FewerTips(solo2), + .FewerTips(midSp[[trioPair2]]), + .FewerTips(midSp[[setdiff(1:5, c(trios, trioPair1, trioPair2))]]) +)] + +mid <- RenumberTips(mid, canonOrder) + +midTrees <- as.phylo(seq_len(NUnrooted(nTip)), nTip, canonOrder) +midScores <- vapply(seq_along(midTrees), function(i) { + reduced <- ReduceTrees(mid, midTrees[[i]]) + r1 <- reduced[[1]] + if (is.null(r1) || NTip(r1) != nTip) return(NA_real_) + r2 <- reduced[[2]] + TBRDist::USPRDist(r1, r2) +}, double(1)) + +midValid <- !is.na(midScores) + +midSplits <- vapply(which(midValid), function(i) { + as.integer(!(midTrees[[i]] |> as.Splits() |> PolarizeSplits(nTip))) |> + sort() +}, integer(nTip - 3)) + + + + +bal <- Tree("(((p1, p2), (q1, q2)), ((s1, s2), (r1, r2)));") +balSp <- as.Splits(bal) +tis <- TipsInSplits(balSp) +tisBig <- tis > nTip / 2 +tiss <- tis +tiss[tisBig] <- nTip - tis[tisBig] + +cherries <- tiss == 2 + +canonOrder <- TipLabels(bal)[unlist( + lapply(which(cherries), function(idx) .FewerTips(balSp[[idx]])), + use.names = FALSE) +] +bal <- RenumberTips(bal, canonOrder) + +balTrees <- as.phylo(seq_len(NUnrooted(nTip)), nTip, canonOrder) +balScores <- vapply(seq_along(balTrees), function(i) { + reduced <- ReduceTrees(bal, balTrees[[i]]) + r1 <- reduced[[1]] + if (is.null(r1) || NTip(r1) != nTip) return(NA_real_) + r2 <- reduced[[2]] + TBRDist::USPRDist(r1, r2) +}, double(1)) + +balValid <- !is.na(balScores) + +balSplits <- vapply(which(balValid), function(i) { + as.integer(!(balTrees[[i]] |> as.Splits() |> PolarizeSplits(nTip))) |> + sort() +}, integer(nTip - 3)) + +# Define packing algorithm based on range +library("bit64") +offset <- c( + min(pecSplits[1, ], mixSplits[1, ], midSplits[1, ], balSplits[1, ]), + min(pecSplits[2, ], mixSplits[2, ], midSplits[2, ], balSplits[2, ]), + min(pecSplits[3, ], mixSplits[3, ], midSplits[3, ], balSplits[3, ]), + min(pecSplits[4, ], mixSplits[4, ], midSplits[4, ], balSplits[4, ]), + min(pecSplits[5, ], mixSplits[5, ], midSplits[5, ], balSplits[5, ])) |> + as.integer64() + +BitPack8 <- function(vec) { + v <- as.integer64(vec) + as.character( + (v[1] - offset[[1]]) * 134217728L + + (v[2] - offset[[2]]) * 1048576L + + (v[3] - offset[[3]]) * 8192L + + (v[4] - offset[[4]]) * 64L + + (v[5] - offset[[5]])) +} + +pecPack <- apply(pecSplits, 2, BitPack8) +pecDF <- data.frame(key = pecPack, score = pecScores[pecValid]) +pecDF <- pecDF[order(pecDF$key), ] + +mixPack <- apply(mixSplits, 2, BitPack8) +mixDF <- data.frame(key = mixPack, score = mixScores[mixValid]) +mixDF <- mixDF[order(mixDF$key), ] + +midPack <- apply(midSplits, 2, BitPack8) +midDF <- data.frame(key = midPack, score = midScores[midValid]) +midDF <- midDF[order(midDF$key), ] + +balPack <- apply(balSplits, 2, BitPack8) +balDF <- data.frame(key = balPack, score = balScores[balValid]) +balDF <- balDF[order(balDF$key), ] + + +header_content <- paste0( + "// Generated from data-raw/spr-exact.R\n", + "#include \n#include \n#include \n\n", + "struct SPRScore64 { uint64_t key; int score; };\n\n", + + "static constexpr std::array PEC_LOOKUP", + nTip, " = {{\n", + paste0(" {", pecDF$key, "ULL, ", pecDF$score, "}", collapse = ",\n"), + "\n}};\n", + + "static constexpr std::array MIX_LOOKUP", + nTip, " = {{\n", + paste0(" {", mixDF$key, "ULL, ", mixDF$score, "}", collapse = ",\n"), + "\n}};\n", + + "static constexpr std::array MID_LOOKUP", + nTip, " = {{\n", + paste0(" {", midDF$key, "ULL, ", midDF$score, "}", collapse = ",\n"), + "\n}};\n", + + "static constexpr std::array BAL_LOOKUP", + nTip, " = {{\n", + paste0(" {", balDF$key, "ULL, ", balDF$score, "}", collapse = ",\n"), + "\n}};" +) + +writeLines(header_content, sprintf("src/spr/lookup_table_%d.h", nTip)) diff --git a/data-raw/spr-exact-9.R b/data-raw/spr-exact-9.R new file mode 100644 index 00000000..7c7bf11b --- /dev/null +++ b/data-raw/spr-exact-9.R @@ -0,0 +1,426 @@ +library("TreeTools") +ReduceTrees <- TreeDist::ReduceTrees + +Tree <- function(txt) ape::read.tree(text = txt) +AsTips <- function(sp) { + which(as.logical(if (TipsInSplits(sp) > nTip / 2) !sp else sp)) +} + +nTip <- 9 + +# There are six UnrootedTreeShapes for 9 leaves +# plot(UnrootedTreeWithKey(0, 9)) +shape0 <- Tree("((c1, c2), (s, (t, (u, (v, (w, (h1, h2)))))));") +sp0 <- as.Splits(shape0) +tis <- TipsInSplits(sp0) +tisBig <- tis > nTip / 2 +tiss <- tis +tiss[tisBig] <- nTip - tis[tisBig] + +fours <- unname(which(tiss == 4)) +trios <- unname(which(tiss == 3)) +pairs <- seq_len(nTip - 3)[-c(trios, fours)] + +mid1 <- xor(sp0[[fours[[1]]]], sp0[[trios[[1]]]]) +if (TipsInSplits(mid1) %in% c(1, nTip - 1)) { + mid2 <- xor(sp0[[fours[[2]]]], sp0[[trios[[2]]]]) +} else { + trios <- trios[2:1] + mid1 <- xor(sp0[[fours[[1]]]], sp0[[trios[[1]]]]) + mid2 <- xor(sp0[[fours[[2]]]], sp0[[trios[[2]]]]) +} + +# Align trio1 with mid1 +if (!TipsInSplits(xor(sp0[[trios[[1]]]], sp0[[pairs[[1]]]])) %in% + c(1, nTip - 1)) { + pairs <- pairs[2:1] +} + +canonOrder <- TipLabels(shape0)[c( + centre = AsTips(xor(sp0[[fours[[1]]]], sp0[[fours[[2]]]])), + mid1 = AsTips(mid1), + trio1 = AsTips(xor(sp0[[trios[[1]]]], sp0[[pairs[[1]]]])), + pair1 = AsTips(sp0[[pairs[[1]]]]), + mid2 = AsTips(mid2), + trio2 = AsTips(xor(sp0[[trios[[2]]]], sp0[[pairs[[2]]]])), + pair2 = AsTips(sp0[[pairs[[2]]]]) + )] +shape0 <- RenumberTips(shape0, canonOrder) + +trees0 <- as.phylo(seq_len(NUnrooted(nTip)), nTip, canonOrder) +scores0 <- sapply(seq_along(trees0), function(i) { + reduced <- ReduceTrees(shape0, trees0[[i]]) + r1 <- reduced[[1]] + if (is.null(r1) || NTip(r1) != nTip) return(NA) + r2 <- reduced[[2]] + TBRDist::USPRDist(r1, r2) +}) +valid0 <- !is.na(scores0) + +splits0 <- vapply(which(valid0), function(i) { + as.integer(!(trees0[[i]] |> as.Splits() |> PolarizeSplits(nTip))) |> sort() +}, integer(nTip - 3)) + + +shape1 <- Tree("((c1, c2), (s, (t, (u, ((p1, p2), (q1, q2))))));") +sp1 <- as.Splits(shape1) +tis <- TipsInSplits(sp1) +tisBig <- tis > nTip / 2 +tiss <- tis +tiss[tisBig] <- nTip - tis[tisBig] + +fours <- unname(which(tiss == 4)) +trio <- unname(which.max(tiss == 3)) +pairs <- seq_len(nTip - 3)[-c(trio, fours)] + +trioSp <- sp1[[trio]] +for (trioPair in pairs) { + soloSp <- xor(trioSp, sp1[[trioPair]]) + if (TipsInSplits(soloSp) %in% c(1, nTip - 1)) break +} + +if (!TipsInSplits(xor(trioSp, sp1[[fours[[1]]]])) %in% c(1, nTip - 1)) { + fours <- fours[2:1] +} + +otherPairs <- pairs[pairs != trioPair] + +canonOrder <- TipLabels(shape1)[c( + centre = AsTips(xor(sp1[[fours[[1]]]], sp1[[trio]])), + mid1 = AsTips(soloSp), + cherry = AsTips(sp1[[trioPair]]), + mid2 = AsTips(xor(sp1[[fours[[2]]]], sp1[[fours[[1]]]])), + pair1 = AsTips(sp1[[otherPairs[[1]]]]), + pair2 = AsTips(sp1[[otherPairs[[2]]]]) + )] +shape1 <- RenumberTips(shape1, canonOrder) + +trees1 <- as.phylo(seq_len(NUnrooted(nTip)), nTip, canonOrder) +scores1 <- sapply(seq_along(trees1), function(i) { + reduced <- ReduceTrees(shape1, trees1[[i]]) + r1 <- reduced[[1]] + if (is.null(r1) || NTip(r1) != nTip) return(NA) + r2 <- reduced[[2]] + TBRDist::USPRDist(r1, r2) +}) +valid1 <- !is.na(scores1) + +splits1 <- vapply(which(valid1), function(i) { + as.integer(!(trees1[[i]] |> as.Splits() |> PolarizeSplits(nTip))) |> sort() +}, integer(nTip - 3)) + + + +shape2 <- Tree("((c1, c2), (s, (t, ((p1, p2), (u, (q1, q2))))));") +sp2 <- as.Splits(shape2) +tis <- TipsInSplits(sp2) +tisBig <- tis > nTip / 2 +tiss <- tis +tiss[tisBig] <- nTip - tis[tisBig] + +fours <- unname(which.max(tiss == 4)) +trios <- unname(which(tiss == 3)) +pairs <- seq_len(nTip - 3)[-c(trios, fours)] + +if (TipsInSplits(xor(sp2[[trios[[2]]]], sp2[[fours]])) %in% c(1, nTip - 1)) { + trios <- trios[2:1] +} + +trioSp <- sp2[[trios[[1]]]] +for (trioPair1 in pairs) { + soloSp <- xor(trioSp, sp2[[trioPair1]]) + if (TipsInSplits(soloSp) %in% c(1, nTip - 1)) break +} +trioSp2 <- sp2[[trios[[2]]]] +for (trioPair2 in setdiff(pairs, trioPair1)) { + soloSp2 <- xor(trioSp2, sp2[[trioPair2]]) + if (TipsInSplits(soloSp2) %in% c(1, nTip - 1)) break +} + +canonOrder <- TipLabels(shape2)[c( + s = AsTips(soloSp), + c = AsTips(sp2[[trioPair1]]), + t = AsTips(xor(sp2[[fours]], trioSp)), + u = AsTips(soloSp2), + q = AsTips(sp2[[trioPair2]]), + p = AsTips(sp2[[setdiff(pairs, c(trioPair1, trioPair2))]]) +)] +shape2 <- RenumberTips(shape2, canonOrder) + +trees2 <- as.phylo(seq_len(NUnrooted(nTip)), nTip, canonOrder) +scores2 <- sapply(seq_along(trees2), function(i) { + reduced <- ReduceTrees(shape2, trees2[[i]]) + r1 <- reduced[[1]] + if (is.null(r1) || NTip(r1) != nTip) return(NA) + r2 <- reduced[[2]] + TBRDist::USPRDist(r1, r2) +}) +valid2 <- !is.na(scores2) + +splits2 <- vapply(which(valid2), function(i) { + as.integer(!(trees2[[i]] |> as.Splits() |> PolarizeSplits(nTip))) |> sort() +}, integer(nTip - 3)) + + +shape3 <- Tree("((c1, c2), (s, ((h1, h2), ((p1, p2), (q1, q2)))));") +sp3 <- as.Splits(shape3) +tis <- TipsInSplits(sp3) +tisBig <- tis > nTip / 2 +tiss <- tis +tiss[tisBig] <- nTip - tis[tisBig] + +fours <- unname(which(tiss == 4)) +trios <- unname(which(tiss == 3)) +pairs <- seq_len(nTip - 3)[-c(trios, fours)] + +mid1 <- xor(sp3[[fours[[1]]]], sp3[[trios[[1]]]]) +if (TipsInSplits(mid1) %in% c(1, nTip - 1)) { + mid2 <- xor(sp3[[fours[[2]]]], sp3[[trios[[2]]]]) +} else { + trios <- trios[2:1] + mid1 <- xor(sp3[[fours[[1]]]], sp3[[trios[[1]]]]) + mid2 <- xor(sp3[[fours[[2]]]], sp3[[trios[[2]]]]) +} + +# Align trio1 with mid1 +if (!TipsInSplits(xor(sp3[[trios[[1]]]], sp3[[pairs[[1]]]])) %in% + c(1, nTip - 1)) { + pairs <- pairs[2:1] +} + +canonOrder <- TipLabels(shape3)[c( + centre = AsTips(xor(sp3[[fours[[1]]]], sp3[[fours[[2]]]])), + mid1 = AsTips(mid1), + trio1 = AsTips(xor(sp3[[trios[[1]]]], sp3[[pairs[[1]]]])), + pair1 = AsTips(sp3[[pairs[[1]]]]), + mid2 = AsTips(mid2), + trio2 = AsTips(xor(sp3[[trios[[2]]]], sp3[[pairs[[2]]]])), + pair2 = AsTips(sp3[[pairs[[2]]]]) +)] +shape3 <- RenumberTips(shape3, canonOrder) + +trees3 <- as.phylo(seq_len(NUnrooted(nTip)), nTip, canonOrder) +scores3 <- sapply(seq_along(trees3), function(i) { + reduced <- ReduceTrees(shape3, trees3[[i]]) + r1 <- reduced[[1]] + if (is.null(r1) || NTip(r1) != nTip) return(NA) + r2 <- reduced[[2]] + TBRDist::USPRDist(r1, r2) +}) +valid3 <- !is.na(scores3) + +splits3 <- vapply(which(valid3), function(i) { + as.integer(!(trees3[[i]] |> as.Splits() |> PolarizeSplits(nTip))) |> sort() +}, integer(nTip - 3)) + + + +shape4 <- Tree("((c1, c2), (s, ((t, (p1, p2)), (u, (q1, q2)))));") +sp4 <- as.Splits(shape4) +tis <- TipsInSplits(sp4) +tisBig <- tis > nTip / 2 +tiss <- tis +tiss[tisBig] <- nTip - tis[tisBig] + +fours <- unname(which(tiss == 4)) +trios <- unname(which(tiss == 3)) +pairs <- seq_len(nTip - 3)[-c(trios, fours)] + +mid1 <- xor(sp4[[fours[[1]]]], sp4[[trios[[1]]]]) +if (TipsInSplits(mid1) %in% c(1, nTip - 1)) { + mid2 <- xor(sp4[[fours[[2]]]], sp4[[trios[[2]]]]) +} else { + trios <- trios[2:1] + mid1 <- xor(sp4[[fours[[1]]]], sp4[[trios[[1]]]]) + mid2 <- xor(sp4[[fours[[2]]]], sp4[[trios[[2]]]]) +} + +# Align trio1 with mid1 +if (!TipsInSplits(xor(sp4[[trios[[1]]]], sp4[[pairs[[1]]]])) %in% + c(1, nTip - 1)) { + pairs <- pairs[2:1] +} + +canonOrder <- TipLabels(shape4)[c( + centre = AsTips(xor(sp4[[fours[[1]]]], sp4[[fours[[2]]]])), + mid1 = AsTips(mid1), + trio1 = AsTips(xor(sp4[[trios[[1]]]], sp4[[pairs[[1]]]])), + pair1 = AsTips(sp4[[pairs[[1]]]]), + mid2 = AsTips(mid2), + trio2 = AsTips(xor(sp4[[trios[[2]]]], sp4[[pairs[[2]]]])), + pair2 = AsTips(sp4[[pairs[[2]]]]) +)] +shape4 <- RenumberTips(shape4, canonOrder) + +trees4 <- as.phylo(seq_len(NUnrooted(nTip)), nTip, canonOrder) +scores4 <- sapply(seq_along(trees4), function(i) { + reduced <- ReduceTrees(shape4, trees4[[i]]) + r1 <- reduced[[1]] + if (is.null(r1) || NTip(r1) != nTip) return(NA) + r2 <- reduced[[2]] + TBRDist::USPRDist(r1, r2) +}) +valid4 <- !is.na(scores4) + +splits4 <- vapply(which(valid4), function(i) { + as.integer(!(trees4[[i]] |> as.Splits() |> PolarizeSplits(nTip))) |> sort() +}, integer(nTip - 3)) + + + + +shape5 <- Tree("(((c1, c2), (h1, h2)), (s, ((p1, p2)), (q1, q2)));") +sp5 <- as.Splits(shape5) +tis <- TipsInSplits(sp5) +tisBig <- tis > nTip / 2 +tiss <- tis +tiss[tisBig] <- nTip - tis[tisBig] + +fours <- unname(which(tiss == 4)) +trios <- unname(which(tiss == 3)) +pairs <- seq_len(nTip - 3)[-c(trios, fours)] + +mid1 <- xor(sp5[[fours[[1]]]], sp5[[trios[[1]]]]) +if (TipsInSplits(mid1) %in% c(1, nTip - 1)) { + mid2 <- xor(sp5[[fours[[2]]]], sp5[[trios[[2]]]]) +} else { + trios <- trios[2:1] + mid1 <- xor(sp5[[fours[[1]]]], sp5[[trios[[1]]]]) + mid2 <- xor(sp5[[fours[[2]]]], sp5[[trios[[2]]]]) +} + +# Align trio1 with mid1 +if (!TipsInSplits(xor(sp5[[trios[[1]]]], sp5[[pairs[[1]]]])) %in% + c(1, nTip - 1)) { + pairs <- pairs[2:1] +} + +canonOrder <- TipLabels(shape5)[c( + centre = AsTips(xor(sp5[[fours[[1]]]], sp5[[fours[[2]]]])), + mid1 = AsTips(mid1), + trio1 = AsTips(xor(sp5[[trios[[1]]]], sp5[[pairs[[1]]]])), + pair1 = AsTips(sp5[[pairs[[1]]]]), + mid2 = AsTips(mid2), + trio2 = AsTips(xor(sp5[[trios[[2]]]], sp5[[pairs[[2]]]])), + pair2 = AsTips(sp5[[pairs[[2]]]]) +)] +shape5 <- RenumberTips(shape5, canonOrder) + +trees5 <- as.phylo(seq_len(NUnrooted(nTip)), nTip, canonOrder) +scores5 <- sapply(seq_along(trees5), function(i) { + reduced <- ReduceTrees(shape5, trees5[[i]]) + r1 <- reduced[[1]] + if (is.null(r1) || NTip(r1) != nTip) return(NA) + r2 <- reduced[[2]] + TBRDist::USPRDist(r1, r2) +}) +valid5 <- !is.na(scores5) + +splits5 <- vapply(which(valid5), function(i) { + as.integer(!(trees5[[i]] |> as.Splits() |> PolarizeSplits(nTip))) |> sort() +}, integer(nTip - 3)) + + + + +# Define packing algorithm based on range +library("bit64") +offset <- c( + min(splits1[1, ], splits2[1, ], splits3[1, ], + splits4[1, ], splits5[1, ], splits6[1, ]), + min(splits1[2, ], splits2[2, ], splits3[2, ], + splits4[2, ], splits5[2, ], splits6[2, ]), + min(splits1[3, ], splits2[3, ], splits3[3, ], + splits4[3, ], splits5[3, ], splits6[3, ]), + min(splits1[4, ], splits2[4, ], splits3[4, ], + splits4[4, ], splits5[4, ], splits6[4, ]), + min(splits1[5, ], splits2[5, ], splits3[5, ], + splits4[5, ], splits5[5, ], splits6[5, ]), + min(splits1[6, ], splits2[6, ], splits3[6, ], + splits4[6, ], splits5[6, ], splits6[6, ]) + ) |> + as.integer64() + +rng <- c( + max(splits1[1, ], splits2[1, ], splits3[1, ], + splits4[1, ], splits5[1, ], splits6[1, ]), + max(splits1[2, ], splits2[2, ], splits3[2, ], + splits4[2, ], splits5[2, ], splits6[2, ]), + max(splits1[3, ], splits2[3, ], splits3[3, ], + splits4[3, ], splits5[3, ], splits6[3, ]), + max(splits1[4, ], splits2[4, ], splits3[4, ], + splits4[4, ], splits5[4, ], splits6[4, ]), + max(splits1[5, ], splits2[5, ], splits3[5, ], + splits4[5, ], splits5[5, ], splits6[5, ]), + max(splits1[6, ], splits2[6, ], splits3[6, ], + splits4[6, ], splits5[6, ], splits6[6, ]) +) - c( + min(splits1[1, ], splits2[1, ], splits3[1, ], + splits4[1, ], splits5[1, ], splits6[1, ]), + min(splits1[2, ], splits2[2, ], splits3[2, ], + splits4[2, ], splits5[2, ], splits6[2, ]), + min(splits1[3, ], splits2[3, ], splits3[3, ], + splits4[3, ], splits5[3, ], splits6[3, ]), + min(splits1[4, ], splits2[4, ], splits3[4, ], + splits4[4, ], splits5[4, ], splits6[4, ]), + min(splits1[5, ], splits2[5, ], splits3[5, ], + splits4[5, ], splits5[5, ], splits6[5, ]), + min(splits1[6, ], splits2[6, ], splits3[6, ], + splits4[6, ], splits5[6, ], splits6[6, ]) +) + +BitPack9 <- function(vec) { + v <- as.integer64(vec) + as.character( + (v[1] - offset[[1]]) * 134217728L + + (v[2] - offset[[2]]) * 1048576L + + (v[3] - offset[[3]]) * 8192L + + (v[4] - offset[[4]]) * 64L + + (v[5] - offset[[5]]) * 64L + + (v[6] - offset[[6]])) +} + +pecPack <- apply(pecSplits, 2, BitPack8) +pecDF <- data.frame(key = pecPack, score = pecScores[pecValid]) +pecDF <- pecDF[order(pecDF$key), ] + +mixPack <- apply(mixSplits, 2, BitPack8) +mixDF <- data.frame(key = mixPack, score = mixScores[mixValid]) +mixDF <- mixDF[order(mixDF$key), ] + +midPack <- apply(midSplits, 2, BitPack8) +midDF <- data.frame(key = midPack, score = midScores[midValid]) +midDF <- midDF[order(midDF$key), ] + +balPack <- apply(balSplits, 2, BitPack8) +balDF <- data.frame(key = balPack, score = balScores[balValid]) +balDF <- balDF[order(balDF$key), ] + + +header_content <- paste0( + "// Generated from data-raw/spr-exact.R\n", + "#include \n#include \n#include \n\n", + "struct SPRScore64 { uint64_t key; int score; };\n\n", + + "static constexpr std::array PEC_LOOKUP", + nTip, " = {{\n", + paste0(" {", pecDF$key, "ULL, ", pecDF$score, "}", collapse = ",\n"), + "\n}};\n", + + "static constexpr std::array MIX_LOOKUP", + nTip, " = {{\n", + paste0(" {", mixDF$key, "ULL, ", mixDF$score, "}", collapse = ",\n"), + "\n}};\n", + + "static constexpr std::array MID_LOOKUP", + nTip, " = {{\n", + paste0(" {", midDF$key, "ULL, ", midDF$score, "}", collapse = ",\n"), + "\n}};\n", + + "static constexpr std::array BAL_LOOKUP", + nTip, " = {{\n", + paste0(" {", balDF$key, "ULL, ", balDF$score, "}", collapse = ",\n"), + "\n}};" +) + +writeLines(header_content, sprintf("src/spr/lookup_table_%d.h", nTip)) diff --git a/man/SPRDist.Rd b/man/SPRDist.Rd index 3ab40775..42fc9b2c 100644 --- a/man/SPRDist.Rd +++ b/man/SPRDist.Rd @@ -5,7 +5,7 @@ \alias{SPRDist.phylo} \alias{SPRDist.list} \alias{SPRDist.multiPhylo} -\title{Approximate the Subtree Prune and Regraft (SPR) distance.} +\title{Approximate the Subtree Prune and Regraft distance} \usage{ SPRDist(tree1, tree2 = NULL, method = "deOliveira", symmetric) @@ -23,10 +23,12 @@ or lists of such trees to undergo pairwise comparison. Where implemented, \insertCite{Day1985;textual}{TreeDist}.} \item{method}{Character specifying which method to use to approximate the -SPR distance. Currently defaults to \code{"deOliveira"}, the only available -option; a new method will eventually become the default.} +SPR distance. Currently defaults to \verb{"deOliveira"``. }"Rogue"` implements an experimental method whose details are pending +publication; this function is under development, and may be modified or +removed without notice. Once formally validated, it is anticipated that this +method will become the default.} -\item{symmetric}{Ignored (redundant after fix of +\item{symmetric}{Deprecated (redundant after fix of \href{https://github.com/KlausVigo/phangorn/issues/97}{phangorn#97}).} } \value{ @@ -34,10 +36,16 @@ option; a new method will eventually become the default.} between trees. } \description{ -\code{SPRDist()} calculates an upper bound on the SPR distance between trees -using the heuristic method of \insertCite{deOliveira2008;textual}{TreeDist}. -Other approximations are available -\insertCite{@e.g. @Hickey2008, @Goloboff2008SPR, @Whidden2018}{TreeDist}. +\code{SPRDist()} approximates the \acronym{SPR} distance between +trees. +} +\details{ +The function currently defaults to the heuristic method of +\insertCite{deOliveira2008;textual}{TreeDist}, which purports to provide an +upper bound on the \acronym{SPR} distance (though exceptions exist). +Other approximations +\insertCite{@e.g. @Hickey2008, @Goloboff2008SPR, @Whidden2018}{TreeDist} are +not yet implemented. } \examples{ library("TreeTools", quietly = TRUE) @@ -45,7 +53,7 @@ library("TreeTools", quietly = TRUE) # Compare single pair of trees SPRDist(BalancedTree(7), PectinateTree(7)) -# Compare all pairs of trees +# Compare all pairs of trees SPRDist(as.phylo(30:33, 8)) # Compare each tree in one list with each tree in another diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 0e067715..5c338bf3 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -10,6 +10,18 @@ Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif +// binary_entropy_counts +NumericVector binary_entropy_counts(IntegerVector inSplit, int nLeaves); +RcppExport SEXP _TreeDist_binary_entropy_counts(SEXP inSplitSEXP, SEXP nLeavesSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< IntegerVector >::type inSplit(inSplitSEXP); + Rcpp::traits::input_parameter< int >::type nLeaves(nLeavesSEXP); + rcpp_result_gen = Rcpp::wrap(binary_entropy_counts(inSplit, nLeaves)); + return rcpp_result_gen; +END_RCPP +} // COMCLUST int COMCLUST(const List& trees); RcppExport SEXP _TreeDist_COMCLUST(SEXP treesSEXP) { @@ -149,6 +161,18 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// expected_mi +double expected_mi(const IntegerVector& ni, const IntegerVector& nj); +RcppExport SEXP _TreeDist_expected_mi(SEXP niSEXP, SEXP njSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const IntegerVector& >::type ni(niSEXP); + Rcpp::traits::input_parameter< const IntegerVector& >::type nj(njSEXP); + rcpp_result_gen = Rcpp::wrap(expected_mi(ni, nj)); + return rcpp_result_gen; +END_RCPP +} // lapjv Rcpp::List lapjv(Rcpp::NumericMatrix& x, Rcpp::NumericVector& maxX); RcppExport SEXP _TreeDist_lapjv(SEXP xSEXP, SEXP maxXSEXP) { @@ -235,55 +259,67 @@ BEGIN_RCPP END_RCPP } // mismatch_size -IntegerVector mismatch_size(const RawMatrix x, const RawMatrix y); +IntegerVector mismatch_size(const RawMatrix& x, const RawMatrix& y); RcppExport SEXP _TreeDist_mismatch_size(SEXP xSEXP, SEXP ySEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const RawMatrix >::type x(xSEXP); - Rcpp::traits::input_parameter< const RawMatrix >::type y(ySEXP); + Rcpp::traits::input_parameter< const RawMatrix& >::type x(xSEXP); + Rcpp::traits::input_parameter< const RawMatrix& >::type y(ySEXP); rcpp_result_gen = Rcpp::wrap(mismatch_size(x, y)); return rcpp_result_gen; END_RCPP } // confusion -IntegerVector confusion(const RawMatrix x, const RawMatrix y); +IntegerVector confusion(const RawMatrix& x, const RawMatrix& y); RcppExport SEXP _TreeDist_confusion(SEXP xSEXP, SEXP ySEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const RawMatrix >::type x(xSEXP); - Rcpp::traits::input_parameter< const RawMatrix >::type y(ySEXP); + Rcpp::traits::input_parameter< const RawMatrix& >::type x(xSEXP); + Rcpp::traits::input_parameter< const RawMatrix& >::type y(ySEXP); rcpp_result_gen = Rcpp::wrap(confusion(x, y)); return rcpp_result_gen; END_RCPP } // keep_and_reroot -List keep_and_reroot(const List tree1, const List tree2, const LogicalVector keep); +List keep_and_reroot(const List& tree1, const List& tree2, const LogicalVector& keep); RcppExport SEXP _TreeDist_keep_and_reroot(SEXP tree1SEXP, SEXP tree2SEXP, SEXP keepSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const List >::type tree1(tree1SEXP); - Rcpp::traits::input_parameter< const List >::type tree2(tree2SEXP); - Rcpp::traits::input_parameter< const LogicalVector >::type keep(keepSEXP); + Rcpp::traits::input_parameter< const List& >::type tree1(tree1SEXP); + Rcpp::traits::input_parameter< const List& >::type tree2(tree2SEXP); + Rcpp::traits::input_parameter< const LogicalVector& >::type keep(keepSEXP); rcpp_result_gen = Rcpp::wrap(keep_and_reroot(tree1, tree2, keep)); return rcpp_result_gen; END_RCPP } // keep_and_reduce -List keep_and_reduce(const List tree1, const List tree2, const LogicalVector keep); +List keep_and_reduce(const List& tree1, const List& tree2, const LogicalVector& keep); RcppExport SEXP _TreeDist_keep_and_reduce(SEXP tree1SEXP, SEXP tree2SEXP, SEXP keepSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const List >::type tree1(tree1SEXP); - Rcpp::traits::input_parameter< const List >::type tree2(tree2SEXP); - Rcpp::traits::input_parameter< const LogicalVector >::type keep(keepSEXP); + Rcpp::traits::input_parameter< const List& >::type tree1(tree1SEXP); + Rcpp::traits::input_parameter< const List& >::type tree2(tree2SEXP); + Rcpp::traits::input_parameter< const LogicalVector& >::type keep(keepSEXP); rcpp_result_gen = Rcpp::wrap(keep_and_reduce(tree1, tree2, keep)); return rcpp_result_gen; END_RCPP } +// spr_table_7 +int spr_table_7(const Rcpp::RawVector& sp1, const Rcpp::RawVector& sp2); +RcppExport SEXP _TreeDist_spr_table_7(SEXP sp1SEXP, SEXP sp2SEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const Rcpp::RawVector& >::type sp1(sp1SEXP); + Rcpp::traits::input_parameter< const Rcpp::RawVector& >::type sp2(sp2SEXP); + rcpp_result_gen = Rcpp::wrap(spr_table_7(sp1, sp2)); + return rcpp_result_gen; +END_RCPP +} // cpp_robinson_foulds_distance List cpp_robinson_foulds_distance(const RawMatrix& x, const RawMatrix& y, const IntegerVector& nTip); RcppExport SEXP _TreeDist_cpp_robinson_foulds_distance(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) { @@ -379,6 +415,7 @@ END_RCPP } static const R_CallMethodDef CallEntries[] = { + {"_TreeDist_binary_entropy_counts", (DL_FUNC) &_TreeDist_binary_entropy_counts, 2}, {"_TreeDist_COMCLUST", (DL_FUNC) &_TreeDist_COMCLUST, 1}, {"_TreeDist_robinson_foulds_all_pairs", (DL_FUNC) &_TreeDist_robinson_foulds_all_pairs, 1}, {"_TreeDist_consensus_info", (DL_FUNC) &_TreeDist_consensus_info, 3}, @@ -391,6 +428,7 @@ static const R_CallMethodDef CallEntries[] = { {"_TreeDist_clone_hpart", (DL_FUNC) &_TreeDist_clone_hpart, 1}, {"_TreeDist_relabel_hpart", (DL_FUNC) &_TreeDist_relabel_hpart, 2}, {"_TreeDist_entropy_int", (DL_FUNC) &_TreeDist_entropy_int, 1}, + {"_TreeDist_expected_mi", (DL_FUNC) &_TreeDist_expected_mi, 2}, {"_TreeDist_lapjv", (DL_FUNC) &_TreeDist_lapjv, 2}, {"_TreeDist_cpp_mast", (DL_FUNC) &_TreeDist_cpp_mast, 3}, {"_TreeDist_cpp_nni_distance", (DL_FUNC) &_TreeDist_cpp_nni_distance, 3}, @@ -402,6 +440,7 @@ static const R_CallMethodDef CallEntries[] = { {"_TreeDist_confusion", (DL_FUNC) &_TreeDist_confusion, 2}, {"_TreeDist_keep_and_reroot", (DL_FUNC) &_TreeDist_keep_and_reroot, 3}, {"_TreeDist_keep_and_reduce", (DL_FUNC) &_TreeDist_keep_and_reduce, 3}, + {"_TreeDist_spr_table_7", (DL_FUNC) &_TreeDist_spr_table_7, 2}, {"_TreeDist_cpp_robinson_foulds_distance", (DL_FUNC) &_TreeDist_cpp_robinson_foulds_distance, 3}, {"_TreeDist_cpp_robinson_foulds_info", (DL_FUNC) &_TreeDist_cpp_robinson_foulds_info, 3}, {"_TreeDist_cpp_matching_split_distance", (DL_FUNC) &_TreeDist_cpp_matching_split_distance, 3}, diff --git a/src/binary_entropy_counts.cpp b/src/binary_entropy_counts.cpp new file mode 100644 index 00000000..4453232d --- /dev/null +++ b/src/binary_entropy_counts.cpp @@ -0,0 +1,41 @@ +// src/binary_entropy_counts.cpp +#include +#include +using namespace Rcpp; + +// Compute H(p) = -p log p - (1-p) log(1-p) for p = count / nLeaves +// Counts at 0 or nLeaves get entropy 0; NA counts return NA. +// [[Rcpp::export]] +NumericVector binary_entropy_counts(IntegerVector inSplit, int nLeaves) { + int K = inSplit.size(); + NumericVector out(K); + + if (nLeaves <= 0) { + std::fill(out.begin(), out.end(), NA_REAL); + return out; + } + + const double invN = 1.0 / static_cast(nLeaves); + + for (int j = 0; j < K; ++j) { + int a = inSplit[j]; + if (IntegerVector::is_na(a)) { + out[j] = NA_REAL; + continue; + } + if (a <= 0 || a >= nLeaves) { + // p ∈ {0,1} ⇒ H = 0 + out[j] = 0.0; + continue; + } + + double x = a * invN; + const double one_minus_x = 1.0 - x; + + // Use log1p(-x) for better precision near x≈1 + double H = -(x * std::log(x) + one_minus_x * std::log1p(-x)); + out[j] = H; + } + constexpr double invLog2 = 1.442695040888963387005; + return out * invLog2; +} diff --git a/src/information.h b/src/information.h index a2b8027e..dd79bd61 100644 --- a/src/information.h +++ b/src/information.h @@ -116,4 +116,88 @@ double entropy_int(const Rcpp::IntegerVector &n) { return (N == 0) ? 0.0 : (N < LOG_MAX ? log2_table[N] : std::log2(N)) - sum / N; } + +// TODO This is copied from TreeSearch/src/expected_mi.cpp - double definition +// is bad practice; define once (here? TreeTools?) and cross-reference. +// TODO this also somewhat duplicates the above... +#define MAX_FACTORIAL_LOOKUP 8192 +static double log2_factorial_table[MAX_FACTORIAL_LOOKUP + 1]; +static const double LOG2_E = 1.4426950408889634; + +__attribute__((constructor)) +void initialize_factorial_cache() { + log2_factorial_table[0] = 0.0; + for (int i = 1; i <= MAX_FACTORIAL_LOOKUP; i++) { + log2_factorial_table[i] = log2_factorial_table[i - 1] + std::log2(i); + } +} +// Fast lookup with bounds checking +inline double l2factorial(int n) { + if (n <= MAX_FACTORIAL_LOOKUP) { + return log2_factorial_table[n]; + } else { + return lgamma(n + 1) * LOG2_E; + } +} + +// ni and nj are vectors listing the number of entitites in each cluster +// [[Rcpp::export]] +double expected_mi(const IntegerVector &ni, const IntegerVector &nj) { + // ni = {a, N-a}; nj = counts of character states + const int a = ni[0]; + const int N = ni[0] + ni[1]; + if (a <= 0 || a >= N) return 0.0; // trivial split + + const double invN = 1.0 / static_cast(N); + const double log2N = std::log2(static_cast(N)); + const double log2a = std::log2(static_cast(a)); + const double log2Na = std::log2(static_cast(N - a)); + const double log2_denom = l2factorial(N) - l2factorial(a) - l2factorial(N - a); + + double emi = 0.0; + + for (int j = 0; j < nj.size(); ++j) { + int mj = nj[j]; + if (mj <= 0) continue; + + int kmin = std::max(0, a + mj - N); + int kmax = std::min(a, mj); + if (kmin > kmax) continue; + + const double log2mj = std::log2(static_cast(mj)); + + // compute P(K=kmin) + double log2P = (l2factorial(mj) - l2factorial(kmin) - l2factorial(mj - kmin)) + + (l2factorial(N - mj) - l2factorial(a - kmin) - l2factorial(N - mj - (a - kmin))) + - log2_denom; + double Pk = std::pow(2.0, log2P); + + for (int k = kmin; k <= kmax; ++k) { + if (Pk > 0.0) { + // contribution from inside the split + if (k > 0) { + double mi_in = std::log2(static_cast(k)) + log2N - (log2a + log2mj); + emi += (static_cast(k) * invN) * mi_in * Pk; + } + // contribution from outside the split + int kout = mj - k; + if (kout > 0) { + double mi_out = std::log2(static_cast(kout)) + log2N - (log2Na + log2mj); + emi += (static_cast(kout) * invN) * mi_out * Pk; + } + } + // Update P(k) → P(k+1) + if (k < kmax) { + double numer = static_cast((mj - k) * (a - k)); + double denom = static_cast((k + 1) * (N - mj - a + k + 1)); + Pk *= numer / denom; + } + } + } + + return emi; +} + + #endif + diff --git a/src/spr.cpp b/src/spr.cpp index 2401afbe..7eee66d6 100644 --- a/src/spr.cpp +++ b/src/spr.cpp @@ -8,61 +8,35 @@ using namespace Rcpp; -// [[Rcpp::export]] -IntegerVector mismatch_size (const RawMatrix x, const RawMatrix y) { - if (double(x.rows()) > double(std::numeric_limits::max())) { - Rcpp::stop("This many splits are not (yet) supported."); - } - const int16 n_split = int16(x.rows()); - if (n_split != y.rows()) { - throw std::invalid_argument("`x` and `y` differ in number of splits."); - } - if (!x.hasAttribute("nTip")) { - Rcpp::stop("`x` lacks nTip attribute"); - } - if (!y.hasAttribute("nTip")) { - Rcpp::stop("`y` lacks nTip attribute"); - } - const int16 n_tip = x.attr("nTip"); - if (n_tip != int16(y.attr("nTip"))) { - Rcpp::stop("`x` and `y` differ in `nTip`"); - } +IntegerVector calc_mismatch_size(const RawMatrix& x, const RawMatrix& y) { const TreeTools::SplitList a(x), b(y); - const int16 - half_tip = n_tip / 2, - last_bin = a.n_bins - 1, - unset_tips = (n_tip % SL_BIN_SIZE) ? SL_BIN_SIZE - n_tip % SL_BIN_SIZE : 0 - ; + + const int32 n_split = int32(x.rows()); + const int32 n_tip = x.attr("nTip"); + const int32 half_tip = n_tip / 2; + const int32 last_bin = a.n_bins - 1; + const int32 unset_tips = (n_tip % SL_BIN_SIZE) + ? SL_BIN_SIZE - n_tip % SL_BIN_SIZE : 0; + constexpr splitbit all_ones = ~(splitbit(0U)); const splitbit unset_mask = all_ones >> unset_tips; - + IntegerVector ret(n_split * n_split); int *ret_ptr = ret.end(); - for (int16 bi = b.n_splits; bi--; ) { - // Rcout << "a = " << ai << ".\n"; - for (int16 ai = a.n_splits; ai--; ) { - // Rcout << " - b = " << bi << ".\n"; + for (int32 bi = b.n_splits; bi--; ) { + for (int32 ai = a.n_splits; ai--; ) { --ret_ptr; - // Rcout << " - last_bin: " << ((a.state[ai][last_bin] ^ b.state[bi][last_bin]) & unset_mask) - // << " = " << TreeTools::count_bits( - // (a.state[ai][last_bin] ^ b.state[bi][last_bin]) & unset_mask - // ) << "\n"; *ret_ptr = TreeTools::count_bits( (a.state[ai][last_bin] ^ b.state[bi][last_bin]) & unset_mask - ); - for (int16 bin = last_bin; bin--; ) { - // Rcout << " - bin = " << bin << ".\n"; - // Rcout << " " << (a.state[ai][bin]); - // Rcout << " ^ " << (b.state[bi][bin]); - // Rcout << " = " << (a.state[ai][bin] ^ b.state[bi][bin]) << std::endl; + ); + + for (int32 bin = last_bin; bin--; ) { *ret_ptr += TreeTools::count_bits(a.state[ai][bin] ^ b.state[bi][bin]); - // Rcout << " ret[" << ret_ptr << "] = " - // << TreeTools::count_bits(a.state[ai][bin] ^ b.state[bi][bin]) - // <<".\n"; } + if (*ret_ptr > half_tip) { *ret_ptr = n_tip - *ret_ptr; } @@ -72,13 +46,9 @@ IntegerVector mismatch_size (const RawMatrix x, const RawMatrix y) { } // [[Rcpp::export]] -IntegerVector confusion (const RawMatrix x, const RawMatrix y) { - if (double(x.rows()) > double(std::numeric_limits::max())) { - Rcpp::stop("This many splits are not (yet) supported."); - } - const int16 n_split = int16(x.rows()); - if (n_split != y.rows()) { - throw std::invalid_argument("Input splits contain same number of splits."); +IntegerVector mismatch_size (const RawMatrix& x, const RawMatrix& y) { + if (x.rows() != y.rows()) { + throw std::invalid_argument("`x` and `y` differ in number of splits."); } if (!x.hasAttribute("nTip")) { Rcpp::stop("`x` lacks nTip attribute"); @@ -86,38 +56,40 @@ IntegerVector confusion (const RawMatrix x, const RawMatrix y) { if (!y.hasAttribute("nTip")) { Rcpp::stop("`y` lacks nTip attribute"); } - const int16 n_tip = x.attr("nTip"); - if (n_tip != int16(y.attr("nTip"))) { + if (static_cast(x.attr("nTip")) != static_cast(y.attr("nTip"))) { Rcpp::stop("`x` and `y` differ in `nTip`"); } + return calc_mismatch_size(x, y); +} + +IntegerVector calc_confusion(const RawMatrix &x, const RawMatrix &y) { const TreeTools::SplitList a(x), b(y); - const int16 - n_bin = a.n_bins, - confusion_size = 4 - ; + + const int32 n_split = static_cast(x.rows()); + const int32 n_tip = x.attr("nTip"); + const int32 n_bin = a.n_bins; + const int32 confusion_size = 4; + IntegerVector ret(n_split * n_split * confusion_size); int *ret_ptr = ret.end(); - for (int16 bi = n_split; bi--; ) { - const int16 - nb = b.in_split[bi], - nB = n_tip - nb - ; + for (int32 bi = n_split; bi--; ) { + const int32 nb = b.in_split[bi]; + const int32 nB = n_tip - nb; - for (int16 ai = n_split; ai--; ) { + for (int32 ai = n_split; ai--; ) { // x divides tips into a|A; y divides tips into b|B - int16 a_and_b = 0; - for (int16 bin = n_bin; bin--; ) { + int32 a_and_b = 0; + for (int32 bin = n_bin; bin--; ) { a_and_b += TreeTools::count_bits(a.state[ai][bin] & b.state[bi][bin]); } - const int16 - na = a.in_split[ai], - a_and_B = na - a_and_b, - A_and_b = nb - a_and_b, - A_and_B = nB - a_and_B - ; + const int32 na = a.in_split[ai]; + const int32 a_and_B = na - a_and_b; + const int32 A_and_b = nb - a_and_b; + const int32 A_and_B = nB - a_and_B; + *(--ret_ptr) = A_and_B; *(--ret_ptr) = A_and_b; *(--ret_ptr) = a_and_B; @@ -128,6 +100,23 @@ IntegerVector confusion (const RawMatrix x, const RawMatrix y) { return ret; } +// [[Rcpp::export]] +IntegerVector confusion(const RawMatrix& x, const RawMatrix& y) { + if (x.rows() != y.rows()) { + throw std::invalid_argument("Input splits contain same number of splits."); + } + if (!x.hasAttribute("nTip")) { + Rcpp::stop("`x` lacks nTip attribute"); + } + if (!y.hasAttribute("nTip")) { + Rcpp::stop("`y` lacks nTip attribute"); + } + if (static_cast(x.attr("nTip")) != static_cast(y.attr("nTip"))) { + Rcpp::stop("`x` and `y` differ in `nTip`"); + } + return calc_confusion(x, y); +} + IntegerMatrix reverse (const IntegerMatrix x) { if (double(x.nrow()) > double(std::numeric_limits::max())) { Rcpp::stop("This many edges are not (yet) supported."); @@ -148,33 +137,31 @@ IntegerMatrix reverse (const IntegerMatrix x) { // tree1 and tree2 are binary trees in postorder with identical tip.labels // [[Rcpp::export]] -List keep_and_reroot(const List tree1, - const List tree2, - const LogicalVector keep) { - IntegerMatrix - postorder1 = tree1["edge"], - postorder2 = tree2["edge"] - ; +List keep_and_reroot(const List& tree1, + const List& tree2, + const LogicalVector& keep) { + IntegerMatrix postorder1 = tree1["edge"]; ASSERT(postorder1.nrow() % 2 == 0); // Tree is binary + IntegerMatrix pre1 = reverse(postorder1); + + IntegerMatrix postorder2 = tree2["edge"]; ASSERT(postorder2.nrow() % 2 == 0); // Tree is binary + IntegerMatrix pre2 = reverse(postorder2); - IntegerMatrix - pre1 = reverse(postorder1), - pre2 = reverse(postorder2) - ; + ASSERT((postorder1.nrow() / 2 + 1) == keep.length()); - ASSERT(postorder1.nrow() / 2 + 1 == keep.length()); - // Rcout << "\n \n === Keep & Reroot ===\n"; - // Rcout << " Keeping: "; - // for (int i = 0; i != keep.size(); i++) Rcout << (keep[i] ? "*" : "."); - IntegerMatrix ret_edge1 = TreeTools::keep_tip(pre1, keep); - IntegerMatrix ret_edge2 = TreeTools::keep_tip(pre2, keep); + bool any_kept = false; + for (auto i : keep) { + if (i) { + any_kept = true; + break; + } + } - const intx n_node = ret_edge1.nrow() / 2; - if (!n_node) { - List nullTree = List::create(Named("edge") = ret_edge1, - _["Nnode"] = n_node, + if (!any_kept) { + List nullTree = List::create(Named("edge") = IntegerMatrix(0, 2), + _["Nnode"] = 0, _["tip.label"] = CharacterVector(0)); nullTree.attr("class") = "phylo"; @@ -182,20 +169,37 @@ List keep_and_reroot(const List tree1, return List::create(nullTree, nullTree); } - const intx n_tip = n_node + 1; + IntegerMatrix ret_edge1 = TreeTools::keep_tip(pre1, keep); + IntegerMatrix ret_edge2 = TreeTools::keep_tip(pre2, keep); + + const int32 n_edge = ret_edge1.nrow(); + const int32 n_node = n_edge / 2; + + if (n_node == 0) { + const CharacterVector all_labels = tree1["tip.label"]; + const CharacterVector kept_labels = all_labels[keep]; + ASSERT(kept_labels.length() == 1); + IntegerVector e_val = IntegerVector::create(2, 1); + e_val.attr("dim") = Dimension(1, 2); + List oneTipTree = List::create( + Named("edge") = as(e_val), + _["tip.label"] = kept_labels, + _["Nnode"] = 1 + ); + + oneTipTree.attr("class") = "phylo"; + oneTipTree.attr("order") = "preorder"; + return List::create(oneTipTree, oneTipTree); + } + + const int32 n_tip = n_node + 1; CharacterVector old_label = tree1["tip.label"], new_labels(n_tip) ; - // Rcout << ret_edge1.nrow() << " rows; Kept " << n_tip << " tips and " - // << n_node << " nodes.\n"; - - if (old_label.size() > std::numeric_limits::max()) { - Rcpp::stop("This many leaves are not (yet) supported."); - } - intx next_tip = n_tip; - for (intx i = intx(old_label.size()); i--; ) { + int32 next_tip = n_tip; + for (int32 i = int32(old_label.size()); i--; ) { if (keep[i]) { --next_tip; new_labels[next_tip] = old_label[i]; @@ -219,10 +223,13 @@ List keep_and_reroot(const List tree1, } // [[Rcpp::export]] -List keep_and_reduce(const List tree1, - const List tree2, - const LogicalVector keep) { +List keep_and_reduce(const List& tree1, + const List& tree2, + const LogicalVector& keep) { List rerooted = keep_and_reroot(tree1, tree2, keep); + if (rerooted.size() == 1) { + return Rcpp::List::create(R_NilValue); + } List rerooted1 = rerooted[0]; List rerooted2 = rerooted[1]; diff --git a/src/spr/lookup_table_7.h b/src/spr/lookup_table_7.h new file mode 100644 index 00000000..a1b3abaf --- /dev/null +++ b/src/spr/lookup_table_7.h @@ -0,0 +1,1433 @@ +// Generated from data-raw/spr-exact.R +#include +#include +#include + +struct SPRScore { uint32_t key; int score; }; + +static constexpr std::array PEC_LOOKUP7 = {{ + {14u, 2}, + {15u, 2}, + {519u, 2}, + {534u, 2}, + {582u, 2}, + {599u, 2}, + {1550u, 2}, + {1558u, 3}, + {1614u, 2}, + {1623u, 3}, + {2134u, 2}, + {2135u, 2}, + {16398u, 2}, + {16399u, 2}, + {16714u, 2}, + {16723u, 2}, + {17155u, 2}, + {17178u, 2}, + {17742u, 2}, + {17747u, 3}, + {18190u, 2}, + {18202u, 3}, + {18515u, 2}, + {18522u, 2}, + {49671u, 2}, + {49686u, 2}, + {49923u, 2}, + {49946u, 2}, + {50507u, 2}, + {50518u, 2}, + {50763u, 2}, + {50778u, 2}, + {51478u, 2}, + {51482u, 2}, + {53767u, 2}, + {53782u, 2}, + {54082u, 2}, + {54107u, 2}, + {54538u, 3}, + {54550u, 3}, + {54858u, 3}, + {54875u, 3}, + {55638u, 3}, + {55643u, 3}, + {70403u, 2}, + {70426u, 2}, + {70466u, 2}, + {70491u, 2}, + {70918u, 3}, + {70938u, 3}, + {70982u, 3}, + {71003u, 3}, + {72282u, 3}, + {72283u, 3}, + {116238u, 2}, + {116246u, 3}, + {116494u, 2}, + {116506u, 3}, + {117014u, 2}, + {117018u, 2}, + {120334u, 2}, + {120342u, 3}, + {120654u, 2}, + {120667u, 2}, + {121174u, 3}, + {121179u, 3}, + {136974u, 2}, + {136986u, 3}, + {137038u, 2}, + {137051u, 2}, + {137818u, 3}, + {137819u, 3}, + {170262u, 2}, + {170266u, 2}, + {170326u, 2}, + {170331u, 2}, + {170586u, 2}, + {170587u, 2}, + {524302u, 3}, + {524303u, 3}, + {524807u, 2}, + {524822u, 2}, + {524870u, 3}, + {524887u, 3}, + {525838u, 3}, + {525846u, 3}, + {525902u, 3}, + {525911u, 3}, + {526422u, 3}, + {526423u, 3}, + {536590u, 2}, + {536591u, 2}, + {536969u, 2}, + {536980u, 2}, + {537284u, 2}, + {537305u, 2}, + {537998u, 2}, + {538004u, 3}, + {538318u, 2}, + {538329u, 3}, + {538708u, 2}, + {538713u, 2}, + {548878u, 2}, + {548879u, 2}, + {549068u, 2}, + {549073u, 2}, + {549761u, 2}, + {549788u, 2}, + {550094u, 2}, + {550097u, 3}, + {550798u, 2}, + {550812u, 3}, + {550993u, 2}, + {551004u, 2}, + {569863u, 2}, + {569878u, 2}, + {570052u, 3}, + {570073u, 3}, + {570764u, 3}, + {570774u, 3}, + {570956u, 3}, + {570969u, 3}, + {571606u, 3}, + {571609u, 3}, + {582151u, 2}, + {582166u, 2}, + {582529u, 2}, + {582556u, 2}, + {582857u, 2}, + {582870u, 2}, + {583241u, 2}, + {583260u, 2}, + {584086u, 2}, + {584092u, 2}, + {594628u, 3}, + {594649u, 3}, + {594817u, 2}, + {594844u, 2}, + {595142u, 3}, + {595161u, 3}, + {595334u, 3}, + {595356u, 3}, + {596569u, 3}, + {596572u, 3}, + {636430u, 3}, + {636438u, 3}, + {636622u, 2}, + {636633u, 3}, + {637142u, 3}, + {637145u, 3}, + {648718u, 3}, + {648726u, 3}, + {649102u, 2}, + {649116u, 3}, + {649622u, 3}, + {649628u, 3}, + {661198u, 2}, + {661209u, 3}, + {661390u, 2}, + {661404u, 3}, + {662105u, 3}, + {662108u, 3}, + {694486u, 2}, + {694489u, 2}, + {694678u, 2}, + {694684u, 2}, + {694873u, 3}, + {694876u, 3}, + {786446u, 2}, + {786447u, 2}, + {786951u, 1}, + {786966u, 1}, + {787014u, 2}, + {787031u, 2}, + {787982u, 2}, + {787990u, 2}, + {788046u, 2}, + {788055u, 2}, + {788566u, 2}, + {788567u, 2}, + {794638u, 2}, + {794639u, 2}, + {795080u, 2}, + {795093u, 2}, + {795269u, 2}, + {795288u, 2}, + {796110u, 2}, + {796117u, 3}, + {796302u, 2}, + {796312u, 3}, + {796757u, 2}, + {796760u, 2}, + {815118u, 2}, + {815119u, 2}, + {815245u, 2}, + {815248u, 2}, + {816064u, 2}, + {816093u, 2}, + {816270u, 2}, + {816272u, 3}, + {817102u, 2}, + {817117u, 3}, + {817232u, 2}, + {817245u, 2}, + {827911u, 2}, + {827926u, 2}, + {828037u, 2}, + {828056u, 2}, + {828877u, 2}, + {828886u, 2}, + {829005u, 2}, + {829016u, 2}, + {829590u, 2}, + {829592u, 2}, + {848391u, 2}, + {848406u, 2}, + {848832u, 2}, + {848861u, 2}, + {849032u, 2}, + {849046u, 2}, + {849480u, 2}, + {849501u, 2}, + {850390u, 2}, + {850397u, 2}, + {856709u, 2}, + {856728u, 2}, + {857024u, 2}, + {857053u, 2}, + {857222u, 2}, + {857240u, 3}, + {857542u, 2}, + {857565u, 3}, + {858712u, 2}, + {858717u, 2}, + {894478u, 2}, + {894486u, 2}, + {894606u, 2}, + {894616u, 3}, + {895126u, 3}, + {895128u, 3}, + {914958u, 2}, + {914966u, 2}, + {915406u, 2}, + {915421u, 3}, + {915926u, 3}, + {915933u, 3}, + {923278u, 2}, + {923288u, 3}, + {923598u, 2}, + {923613u, 3}, + {924248u, 2}, + {924253u, 2}, + {956566u, 2}, + {956568u, 2}, + {956886u, 2}, + {956893u, 2}, + {957016u, 2}, + {957021u, 2}, + {1589262u, 3}, + {1589263u, 3}, + {1589578u, 3}, + {1589587u, 3}, + {1590019u, 2}, + {1590042u, 2}, + {1590606u, 3}, + {1590611u, 3}, + {1591054u, 3}, + {1591066u, 3}, + {1591379u, 3}, + {1591386u, 3}, + {1597454u, 2}, + {1597455u, 2}, + {1597644u, 2}, + {1597649u, 2}, + {1598337u, 2}, + {1598364u, 2}, + {1598670u, 2}, + {1598673u, 3}, + {1599374u, 2}, + {1599388u, 3}, + {1599569u, 2}, + {1599580u, 2}, + {1618376u, 3}, + {1618389u, 3}, + {1618691u, 2}, + {1618714u, 2}, + {1619276u, 3}, + {1619285u, 3}, + {1619596u, 3}, + {1619610u, 3}, + {1620181u, 3}, + {1620186u, 3}, + {1626568u, 3}, + {1626581u, 3}, + {1627009u, 2}, + {1627036u, 2}, + {1627338u, 3}, + {1627349u, 3}, + {1627786u, 3}, + {1627804u, 3}, + {1628501u, 3}, + {1628508u, 3}, + {1647363u, 2}, + {1647386u, 2}, + {1647489u, 2}, + {1647516u, 2}, + {1647813u, 2}, + {1647834u, 2}, + {1647941u, 2}, + {1647964u, 2}, + {1649306u, 2}, + {1649308u, 2}, + {1684942u, 2}, + {1684949u, 3}, + {1685262u, 3}, + {1685274u, 3}, + {1685717u, 3}, + {1685722u, 3}, + {1693134u, 2}, + {1693141u, 3}, + {1693582u, 2}, + {1693596u, 3}, + {1694037u, 3}, + {1694044u, 3}, + {1713934u, 3}, + {1713946u, 3}, + {1714062u, 2}, + {1714076u, 3}, + {1714842u, 3}, + {1714844u, 3}, + {1743061u, 2}, + {1743066u, 2}, + {1743189u, 3}, + {1743196u, 3}, + {1743514u, 2}, + {1743516u, 2}, + {1851406u, 2}, + {1851407u, 2}, + {1851722u, 2}, + {1851731u, 2}, + {1852163u, 1}, + {1852186u, 1}, + {1852750u, 2}, + {1852755u, 2}, + {1853198u, 2}, + {1853210u, 2}, + {1853523u, 2}, + {1853530u, 2}, + {1863694u, 2}, + {1863695u, 2}, + {1863821u, 2}, + {1863824u, 2}, + {1864640u, 2}, + {1864669u, 2}, + {1864846u, 2}, + {1864848u, 3}, + {1865678u, 2}, + {1865693u, 3}, + {1865808u, 2}, + {1865821u, 2}, + {1876361u, 2}, + {1876372u, 2}, + {1876739u, 2}, + {1876762u, 2}, + {1877325u, 2}, + {1877332u, 2}, + {1877709u, 2}, + {1877722u, 2}, + {1878164u, 2}, + {1878170u, 2}, + {1888649u, 2}, + {1888660u, 2}, + {1889216u, 2}, + {1889245u, 2}, + {1889418u, 2}, + {1889428u, 3}, + {1889994u, 2}, + {1890013u, 3}, + {1890644u, 2}, + {1890653u, 2}, + {1913603u, 2}, + {1913626u, 2}, + {1913792u, 2}, + {1913821u, 2}, + {1913988u, 2}, + {1914010u, 2}, + {1914180u, 2}, + {1914205u, 2}, + {1915610u, 2}, + {1915613u, 2}, + {1942926u, 2}, + {1942932u, 3}, + {1943310u, 2}, + {1943322u, 2}, + {1943700u, 3}, + {1943706u, 3}, + {1955214u, 2}, + {1955220u, 3}, + {1955790u, 2}, + {1955805u, 3}, + {1956180u, 2}, + {1956189u, 2}, + {1980174u, 2}, + {1980186u, 2}, + {1980366u, 2}, + {1980381u, 3}, + {1981146u, 3}, + {1981149u, 3}, + {2005140u, 2}, + {2005146u, 2}, + {2005332u, 2}, + {2005341u, 2}, + {2005722u, 2}, + {2005725u, 2}, + {3719687u, 2}, + {3719702u, 2}, + {3719939u, 2}, + {3719962u, 2}, + {3720523u, 2}, + {3720534u, 2}, + {3720779u, 2}, + {3720794u, 2}, + {3721494u, 2}, + {3721498u, 2}, + {3727879u, 2}, + {3727894u, 2}, + {3728257u, 2}, + {3728284u, 2}, + {3728585u, 3}, + {3728598u, 3}, + {3728969u, 3}, + {3728988u, 3}, + {3729814u, 3}, + {3729820u, 3}, + {3744515u, 2}, + {3744538u, 2}, + {3744641u, 2}, + {3744668u, 2}, + {3744965u, 3}, + {3744986u, 3}, + {3745093u, 3}, + {3745116u, 3}, + {3746458u, 3}, + {3746460u, 3}, + {3782093u, 2}, + {3782102u, 3}, + {3782349u, 2}, + {3782362u, 3}, + {3782934u, 2}, + {3782938u, 2}, + {3790285u, 3}, + {3790294u, 3}, + {3790669u, 2}, + {3790684u, 2}, + {3791254u, 3}, + {3791260u, 3}, + {3806925u, 3}, + {3806938u, 3}, + {3807053u, 2}, + {3807068u, 2}, + {3807898u, 3}, + {3807900u, 3}, + {3844374u, 2}, + {3844378u, 2}, + {3844502u, 3}, + {3844508u, 2}, + {3844762u, 3}, + {3844764u, 2}, + {3981831u, 3}, + {3981846u, 3}, + {3982083u, 3}, + {3982106u, 3}, + {3982667u, 3}, + {3982678u, 3}, + {3982923u, 3}, + {3982938u, 3}, + {3983638u, 3}, + {3983642u, 3}, + {3994119u, 2}, + {3994134u, 2}, + {3994560u, 2}, + {3994589u, 2}, + {3994760u, 3}, + {3994774u, 3}, + {3995208u, 3}, + {3995229u, 3}, + {3996118u, 3}, + {3996125u, 3}, + {4010755u, 2}, + {4010778u, 2}, + {4010944u, 2}, + {4010973u, 2}, + {4011140u, 3}, + {4011162u, 3}, + {4011332u, 3}, + {4011357u, 3}, + {4012762u, 3}, + {4012765u, 3}, + {4040076u, 3}, + {4040086u, 3}, + {4040332u, 3}, + {4040346u, 3}, + {4040982u, 2}, + {4040986u, 2}, + {4052364u, 3}, + {4052374u, 3}, + {4052812u, 2}, + {4052829u, 2}, + {4053462u, 3}, + {4053469u, 3}, + {4069004u, 3}, + {4069018u, 3}, + {4069196u, 2}, + {4069213u, 2}, + {4070106u, 3}, + {4070109u, 3}, + {4110614u, 2}, + {4110618u, 2}, + {4110806u, 3}, + {4110813u, 3}, + {4111066u, 3}, + {4111069u, 3}, + {4514311u, 3}, + {4514326u, 3}, + {4514689u, 3}, + {4514716u, 3}, + {4515017u, 3}, + {4515030u, 3}, + {4515401u, 3}, + {4515420u, 3}, + {4516246u, 3}, + {4516252u, 3}, + {4518407u, 2}, + {4518422u, 2}, + {4518848u, 3}, + {4518877u, 3}, + {4519048u, 3}, + {4519062u, 3}, + {4519496u, 3}, + {4519517u, 3}, + {4520406u, 3}, + {4520413u, 3}, + {4543361u, 2}, + {4543388u, 2}, + {4543424u, 2}, + {4543453u, 2}, + {4543618u, 2}, + {4543644u, 2}, + {4543682u, 3}, + {4543709u, 2}, + {4545372u, 3}, + {4545373u, 2}, + {4564234u, 2}, + {4564246u, 2}, + {4564618u, 3}, + {4564636u, 3}, + {4565398u, 3}, + {4565404u, 3}, + {4568330u, 3}, + {4568342u, 3}, + {4568778u, 2}, + {4568797u, 3}, + {4569558u, 3}, + {4569565u, 3}, + {4593290u, 3}, + {4593308u, 3}, + {4593354u, 2}, + {4593373u, 3}, + {4594524u, 3}, + {4594525u, 2}, + {4643222u, 2}, + {4643228u, 2}, + {4643286u, 3}, + {4643293u, 3}, + {4643676u, 3}, + {4643677u, 2}, + {5579523u, 3}, + {5579546u, 3}, + {5579649u, 3}, + {5579676u, 3}, + {5579973u, 3}, + {5579994u, 3}, + {5580101u, 3}, + {5580124u, 3}, + {5581466u, 3}, + {5581468u, 3}, + {5583619u, 2}, + {5583642u, 2}, + {5583808u, 3}, + {5583837u, 3}, + {5584004u, 3}, + {5584026u, 3}, + {5584196u, 3}, + {5584221u, 3}, + {5585626u, 3}, + {5585629u, 3}, + {5591937u, 2}, + {5591964u, 2}, + {5592000u, 2}, + {5592029u, 2}, + {5592194u, 2}, + {5592220u, 2}, + {5592258u, 3}, + {5592285u, 2}, + {5593948u, 3}, + {5593949u, 2}, + {5612806u, 2}, + {5612826u, 2}, + {5612934u, 3}, + {5612956u, 3}, + {5614234u, 3}, + {5614236u, 3}, + {5616902u, 3}, + {5616922u, 3}, + {5617094u, 2}, + {5617117u, 3}, + {5618394u, 3}, + {5618397u, 3}, + {5625222u, 3}, + {5625244u, 3}, + {5625286u, 2}, + {5625309u, 3}, + {5626716u, 3}, + {5626717u, 2}, + {5708442u, 2}, + {5708444u, 2}, + {5708506u, 3}, + {5708509u, 3}, + {5708636u, 3}, + {5708637u, 2}, + {7980558u, 2}, + {7980566u, 2}, + {7980814u, 2}, + {7980826u, 2}, + {7981334u, 2}, + {7981338u, 2}, + {7988750u, 3}, + {7988758u, 3}, + {7989134u, 2}, + {7989148u, 3}, + {7989654u, 3}, + {7989660u, 3}, + {8005390u, 3}, + {8005402u, 3}, + {8005518u, 2}, + {8005532u, 3}, + {8006298u, 3}, + {8006300u, 3}, + {8038678u, 2}, + {8038682u, 2}, + {8038806u, 3}, + {8038812u, 3}, + {8039066u, 3}, + {8039068u, 3}, + {8242702u, 2}, + {8242710u, 3}, + {8242958u, 2}, + {8242970u, 3}, + {8243478u, 3}, + {8243482u, 3}, + {8254990u, 2}, + {8254998u, 2}, + {8255438u, 2}, + {8255453u, 3}, + {8255958u, 3}, + {8255965u, 3}, + {8271630u, 2}, + {8271642u, 2}, + {8271822u, 2}, + {8271837u, 3}, + {8272602u, 3}, + {8272605u, 3}, + {8304918u, 2}, + {8304922u, 2}, + {8305110u, 3}, + {8305117u, 3}, + {8305370u, 3}, + {8305373u, 3}, + {8775182u, 3}, + {8775190u, 3}, + {8775566u, 2}, + {8775580u, 3}, + {8776086u, 3}, + {8776092u, 3}, + {8779278u, 2}, + {8779286u, 2}, + {8779726u, 2}, + {8779741u, 3}, + {8780246u, 3}, + {8780253u, 3}, + {8804238u, 2}, + {8804252u, 2}, + {8804302u, 1}, + {8804317u, 2}, + {8805212u, 3}, + {8805213u, 2}, + {8837526u, 2}, + {8837532u, 2}, + {8837590u, 3}, + {8837597u, 3}, + {8837980u, 3}, + {8837981u, 2}, + {9840398u, 3}, + {9840410u, 3}, + {9840526u, 2}, + {9840540u, 3}, + {9841306u, 3}, + {9841308u, 3}, + {9844494u, 2}, + {9844506u, 2}, + {9844686u, 2}, + {9844701u, 3}, + {9845466u, 3}, + {9845469u, 3}, + {9852814u, 2}, + {9852828u, 2}, + {9852878u, 1}, + {9852893u, 2}, + {9853788u, 3}, + {9853789u, 2}, + {9902746u, 2}, + {9902748u, 2}, + {9902810u, 3}, + {9902813u, 3}, + {9902940u, 3}, + {9902941u, 2}, + {11970838u, 1}, + {11970842u, 1}, + {11970966u, 2}, + {11970972u, 2}, + {11971226u, 2}, + {11971228u, 2}, + {11974934u, 2}, + {11974938u, 2}, + {11975126u, 2}, + {11975133u, 2}, + {11975386u, 2}, + {11975389u, 2}, + {11983254u, 2}, + {11983260u, 2}, + {11983318u, 3}, + {11983325u, 3}, + {11983708u, 2}, + {11983709u, 2}, + {11999898u, 2}, + {11999900u, 2}, + {11999962u, 3}, + {11999965u, 3}, + {12000092u, 2}, + {12000093u, 2} +}}; +static constexpr std::array BAL_LOOKUP7 = {{ + {14u, 2}, + {15u, 2}, + {519u, 2}, + {534u, 2}, + {1550u, 2}, + {1558u, 2}, + {1614u, 3}, + {1623u, 2}, + {2134u, 3}, + {2135u, 2}, + {16398u, 2}, + {16399u, 2}, + {16714u, 2}, + {16723u, 2}, + {17155u, 2}, + {17178u, 2}, + {17742u, 2}, + {17747u, 2}, + {18190u, 2}, + {18202u, 2}, + {18515u, 2}, + {18522u, 2}, + {20494u, 2}, + {20495u, 2}, + {20747u, 2}, + {20754u, 2}, + {21314u, 2}, + {21339u, 2}, + {21774u, 2}, + {21778u, 2}, + {22350u, 3}, + {22363u, 2}, + {22610u, 3}, + {22619u, 2}, + {49671u, 2}, + {49686u, 2}, + {49923u, 2}, + {49946u, 2}, + {50507u, 2}, + {50518u, 2}, + {50763u, 2}, + {50778u, 2}, + {51478u, 2}, + {51482u, 2}, + {53767u, 2}, + {53782u, 2}, + {54082u, 2}, + {54107u, 2}, + {54538u, 2}, + {54550u, 2}, + {54858u, 3}, + {54875u, 2}, + {55638u, 3}, + {55643u, 2}, + {116238u, 2}, + {116246u, 2}, + {116494u, 2}, + {116506u, 2}, + {117014u, 2}, + {117018u, 2}, + {120334u, 2}, + {120342u, 2}, + {120654u, 3}, + {120667u, 2}, + {121174u, 3}, + {121179u, 2}, + {136974u, 3}, + {136986u, 3}, + {137038u, 3}, + {137051u, 2}, + {137818u, 2}, + {137819u, 2}, + {170262u, 3}, + {170266u, 3}, + {170326u, 3}, + {170331u, 2}, + {170586u, 2}, + {170587u, 2}, + {524302u, 2}, + {524303u, 2}, + {524807u, 2}, + {524822u, 2}, + {525838u, 2}, + {525846u, 2}, + {525902u, 3}, + {525911u, 2}, + {526422u, 3}, + {526423u, 2}, + {536590u, 2}, + {536591u, 2}, + {536969u, 2}, + {536980u, 2}, + {537284u, 2}, + {537305u, 2}, + {537998u, 2}, + {538004u, 2}, + {538318u, 3}, + {538329u, 2}, + {538708u, 3}, + {538713u, 2}, + {548878u, 2}, + {548879u, 2}, + {549068u, 2}, + {549073u, 2}, + {549761u, 2}, + {549788u, 2}, + {550094u, 2}, + {550097u, 2}, + {550798u, 2}, + {550812u, 2}, + {550993u, 2}, + {551004u, 2}, + {569863u, 2}, + {569878u, 2}, + {570052u, 2}, + {570073u, 2}, + {570764u, 2}, + {570774u, 2}, + {570956u, 3}, + {570969u, 2}, + {571606u, 3}, + {571609u, 2}, + {582151u, 2}, + {582166u, 2}, + {582529u, 2}, + {582556u, 2}, + {582857u, 2}, + {582870u, 2}, + {583241u, 2}, + {583260u, 2}, + {584086u, 2}, + {584092u, 2}, + {636430u, 2}, + {636438u, 2}, + {636622u, 3}, + {636633u, 2}, + {637142u, 3}, + {637145u, 2}, + {648718u, 2}, + {648726u, 2}, + {649102u, 2}, + {649116u, 2}, + {649622u, 2}, + {649628u, 2}, + {661198u, 3}, + {661209u, 2}, + {661390u, 3}, + {661404u, 3}, + {662105u, 2}, + {662108u, 2}, + {694486u, 3}, + {694489u, 2}, + {694678u, 3}, + {694684u, 3}, + {694873u, 2}, + {694876u, 2}, + {1589262u, 2}, + {1589263u, 2}, + {1589578u, 3}, + {1589587u, 3}, + {1590019u, 2}, + {1590042u, 2}, + {1590606u, 3}, + {1590611u, 3}, + {1591054u, 3}, + {1591066u, 3}, + {1591379u, 3}, + {1591386u, 3}, + {1597454u, 2}, + {1597455u, 2}, + {1597644u, 3}, + {1597649u, 3}, + {1598337u, 2}, + {1598364u, 2}, + {1598670u, 3}, + {1598673u, 3}, + {1599374u, 3}, + {1599388u, 3}, + {1599569u, 3}, + {1599580u, 3}, + {1618376u, 2}, + {1618389u, 2}, + {1618691u, 3}, + {1618714u, 3}, + {1619276u, 3}, + {1619285u, 3}, + {1619596u, 3}, + {1619610u, 3}, + {1620181u, 3}, + {1620186u, 3}, + {1626568u, 2}, + {1626581u, 2}, + {1627009u, 3}, + {1627036u, 3}, + {1627338u, 3}, + {1627349u, 3}, + {1627786u, 3}, + {1627804u, 3}, + {1628501u, 3}, + {1628508u, 3}, + {1647363u, 2}, + {1647386u, 2}, + {1647489u, 2}, + {1647516u, 2}, + {1647813u, 2}, + {1647834u, 3}, + {1647941u, 2}, + {1647964u, 3}, + {1649306u, 2}, + {1649308u, 2}, + {1684942u, 2}, + {1684949u, 3}, + {1685262u, 3}, + {1685274u, 3}, + {1685717u, 3}, + {1685722u, 3}, + {1693134u, 2}, + {1693141u, 3}, + {1693582u, 3}, + {1693596u, 3}, + {1694037u, 3}, + {1694044u, 3}, + {1713934u, 3}, + {1713946u, 3}, + {1714062u, 3}, + {1714076u, 3}, + {1714842u, 3}, + {1714844u, 3}, + {1743061u, 3}, + {1743066u, 3}, + {1743189u, 3}, + {1743196u, 3}, + {1743514u, 2}, + {1743516u, 2}, + {1851406u, 3}, + {1851407u, 3}, + {1851722u, 3}, + {1851731u, 3}, + {1852163u, 2}, + {1852186u, 2}, + {1852750u, 3}, + {1852755u, 3}, + {1853198u, 3}, + {1853210u, 3}, + {1853523u, 3}, + {1853530u, 3}, + {1863694u, 2}, + {1863695u, 2}, + {1863821u, 2}, + {1863824u, 2}, + {1864640u, 2}, + {1864669u, 2}, + {1864846u, 2}, + {1864848u, 3}, + {1865678u, 2}, + {1865693u, 3}, + {1865808u, 2}, + {1865821u, 2}, + {1876361u, 3}, + {1876372u, 3}, + {1876739u, 3}, + {1876762u, 3}, + {1877325u, 3}, + {1877332u, 3}, + {1877709u, 3}, + {1877722u, 3}, + {1878164u, 3}, + {1878170u, 3}, + {1888649u, 3}, + {1888660u, 3}, + {1889216u, 3}, + {1889245u, 3}, + {1889418u, 3}, + {1889428u, 3}, + {1889994u, 3}, + {1890013u, 3}, + {1890644u, 3}, + {1890653u, 3}, + {1913603u, 2}, + {1913626u, 2}, + {1913792u, 2}, + {1913821u, 2}, + {1913988u, 2}, + {1914010u, 3}, + {1914180u, 2}, + {1914205u, 3}, + {1915610u, 2}, + {1915613u, 2}, + {1942926u, 3}, + {1942932u, 3}, + {1943310u, 3}, + {1943322u, 3}, + {1943700u, 3}, + {1943706u, 3}, + {1955214u, 3}, + {1955220u, 3}, + {1955790u, 2}, + {1955805u, 3}, + {1956180u, 3}, + {1956189u, 3}, + {1980174u, 3}, + {1980186u, 3}, + {1980366u, 2}, + {1980381u, 3}, + {1981146u, 3}, + {1981149u, 3}, + {2005140u, 2}, + {2005146u, 2}, + {2005332u, 3}, + {2005341u, 3}, + {2005722u, 3}, + {2005725u, 3}, + {2383886u, 3}, + {2383887u, 3}, + {2384076u, 3}, + {2384081u, 3}, + {2384769u, 2}, + {2384796u, 2}, + {2385102u, 3}, + {2385105u, 3}, + {2385806u, 3}, + {2385820u, 3}, + {2386001u, 3}, + {2386012u, 3}, + {2387982u, 2}, + {2387983u, 2}, + {2388109u, 2}, + {2388112u, 2}, + {2388928u, 2}, + {2388957u, 2}, + {2389134u, 2}, + {2389136u, 3}, + {2389966u, 2}, + {2389981u, 3}, + {2390096u, 2}, + {2390109u, 2}, + {2400523u, 3}, + {2400530u, 3}, + {2401153u, 3}, + {2401180u, 3}, + {2401485u, 3}, + {2401490u, 3}, + {2402125u, 3}, + {2402140u, 3}, + {2402450u, 3}, + {2402460u, 3}, + {2404619u, 3}, + {2404626u, 3}, + {2405312u, 3}, + {2405341u, 3}, + {2405516u, 3}, + {2405522u, 3}, + {2406220u, 3}, + {2406237u, 3}, + {2406610u, 3}, + {2406621u, 3}, + {2446209u, 2}, + {2446236u, 2}, + {2446272u, 2}, + {2446301u, 2}, + {2446466u, 2}, + {2446492u, 3}, + {2446530u, 2}, + {2446557u, 3}, + {2448220u, 2}, + {2448221u, 2}, + {2467086u, 3}, + {2467090u, 3}, + {2467726u, 3}, + {2467740u, 3}, + {2467986u, 3}, + {2467996u, 3}, + {2471182u, 3}, + {2471186u, 3}, + {2471886u, 2}, + {2471901u, 3}, + {2472146u, 3}, + {2472157u, 3}, + {2512782u, 3}, + {2512796u, 3}, + {2512846u, 2}, + {2512861u, 3}, + {2513756u, 3}, + {2513757u, 3}, + {2529426u, 2}, + {2529436u, 2}, + {2529490u, 3}, + {2529501u, 3}, + {2530140u, 3}, + {2530141u, 3}, + {3719687u, 2}, + {3719702u, 2}, + {3719939u, 2}, + {3719962u, 2}, + {3720523u, 3}, + {3720534u, 3}, + {3720779u, 3}, + {3720794u, 3}, + {3721494u, 3}, + {3721498u, 3}, + {3727879u, 2}, + {3727894u, 2}, + {3728257u, 2}, + {3728284u, 2}, + {3728585u, 3}, + {3728598u, 3}, + {3728969u, 3}, + {3728988u, 3}, + {3729814u, 3}, + {3729820u, 3}, + {3744515u, 2}, + {3744538u, 2}, + {3744641u, 2}, + {3744668u, 2}, + {3744965u, 2}, + {3744986u, 3}, + {3745093u, 2}, + {3745116u, 3}, + {3746458u, 2}, + {3746460u, 2}, + {3782093u, 3}, + {3782102u, 2}, + {3782349u, 3}, + {3782362u, 3}, + {3782934u, 3}, + {3782938u, 3}, + {3790285u, 3}, + {3790294u, 2}, + {3790669u, 3}, + {3790684u, 3}, + {3791254u, 3}, + {3791260u, 3}, + {3806925u, 3}, + {3806938u, 3}, + {3807053u, 3}, + {3807068u, 3}, + {3807898u, 2}, + {3807900u, 2}, + {3844374u, 3}, + {3844378u, 3}, + {3844502u, 3}, + {3844508u, 3}, + {3844762u, 3}, + {3844764u, 3}, + {3981831u, 3}, + {3981846u, 3}, + {3982083u, 2}, + {3982106u, 2}, + {3982667u, 3}, + {3982678u, 3}, + {3982923u, 3}, + {3982938u, 3}, + {3983638u, 3}, + {3983642u, 3}, + {3994119u, 2}, + {3994134u, 2}, + {3994560u, 2}, + {3994589u, 2}, + {3994760u, 3}, + {3994774u, 2}, + {3995208u, 2}, + {3995229u, 2}, + {3996118u, 2}, + {3996125u, 3}, + {4010755u, 2}, + {4010778u, 2}, + {4010944u, 2}, + {4010973u, 2}, + {4011140u, 2}, + {4011162u, 3}, + {4011332u, 2}, + {4011357u, 3}, + {4012762u, 2}, + {4012765u, 2}, + {4040076u, 3}, + {4040086u, 3}, + {4040332u, 3}, + {4040346u, 3}, + {4040982u, 3}, + {4040986u, 3}, + {4052364u, 3}, + {4052374u, 3}, + {4052812u, 3}, + {4052829u, 3}, + {4053462u, 2}, + {4053469u, 3}, + {4069004u, 2}, + {4069018u, 2}, + {4069196u, 3}, + {4069213u, 3}, + {4070106u, 3}, + {4070109u, 3}, + {4110614u, 3}, + {4110618u, 3}, + {4110806u, 2}, + {4110813u, 3}, + {4111066u, 3}, + {4111069u, 3}, + {4514311u, 3}, + {4514326u, 3}, + {4514689u, 2}, + {4514716u, 2}, + {4515017u, 3}, + {4515030u, 3}, + {4515401u, 3}, + {4515420u, 3}, + {4516246u, 3}, + {4516252u, 3}, + {4518407u, 2}, + {4518422u, 2}, + {4518848u, 2}, + {4518877u, 2}, + {4519048u, 3}, + {4519062u, 2}, + {4519496u, 2}, + {4519517u, 2}, + {4520406u, 2}, + {4520413u, 3}, + {4543361u, 2}, + {4543388u, 2}, + {4543424u, 2}, + {4543453u, 2}, + {4543618u, 2}, + {4543644u, 3}, + {4543682u, 2}, + {4543709u, 3}, + {4545372u, 2}, + {4545373u, 2}, + {4564234u, 3}, + {4564246u, 3}, + {4564618u, 3}, + {4564636u, 3}, + {4565398u, 3}, + {4565404u, 3}, + {4568330u, 3}, + {4568342u, 3}, + {4568778u, 3}, + {4568797u, 3}, + {4569558u, 2}, + {4569565u, 3}, + {4593290u, 2}, + {4593308u, 2}, + {4593354u, 3}, + {4593373u, 3}, + {4594524u, 3}, + {4594525u, 3}, + {4643222u, 3}, + {4643228u, 3}, + {4643286u, 2}, + {4643293u, 3}, + {4643676u, 3}, + {4643677u, 3}, + {7980558u, 2}, + {7980566u, 2}, + {7980814u, 3}, + {7980826u, 3}, + {7981334u, 3}, + {7981338u, 3}, + {7988750u, 2}, + {7988758u, 2}, + {7989134u, 3}, + {7989148u, 3}, + {7989654u, 3}, + {7989660u, 3}, + {8005390u, 3}, + {8005402u, 3}, + {8005518u, 3}, + {8005532u, 3}, + {8006298u, 2}, + {8006300u, 2}, + {8038678u, 3}, + {8038682u, 3}, + {8038806u, 3}, + {8038812u, 3}, + {8039066u, 2}, + {8039068u, 2}, + {8242702u, 3}, + {8242710u, 3}, + {8242958u, 3}, + {8242970u, 3}, + {8243478u, 3}, + {8243482u, 3}, + {8254990u, 2}, + {8254998u, 2}, + {8255438u, 2}, + {8255453u, 2}, + {8255958u, 2}, + {8255965u, 2}, + {8271630u, 3}, + {8271642u, 3}, + {8271822u, 2}, + {8271837u, 3}, + {8272602u, 3}, + {8272605u, 3}, + {8304918u, 3}, + {8304922u, 3}, + {8305110u, 2}, + {8305117u, 3}, + {8305370u, 3}, + {8305373u, 3}, + {8775182u, 3}, + {8775190u, 3}, + {8775566u, 3}, + {8775580u, 3}, + {8776086u, 3}, + {8776092u, 3}, + {8779278u, 2}, + {8779286u, 2}, + {8779726u, 2}, + {8779741u, 2}, + {8780246u, 2}, + {8780253u, 2}, + {8804238u, 3}, + {8804252u, 3}, + {8804302u, 2}, + {8804317u, 3}, + {8805212u, 3}, + {8805213u, 3}, + {8837526u, 3}, + {8837532u, 3}, + {8837590u, 2}, + {8837597u, 3}, + {8837980u, 3}, + {8837981u, 3}, + {9840398u, 2}, + {9840410u, 2}, + {9840526u, 2}, + {9840540u, 2}, + {9841306u, 2}, + {9841308u, 2}, + {9844494u, 3}, + {9844506u, 3}, + {9844686u, 2}, + {9844701u, 3}, + {9845466u, 3}, + {9845469u, 3}, + {9852814u, 3}, + {9852828u, 3}, + {9852878u, 2}, + {9852893u, 3}, + {9853788u, 3}, + {9853789u, 3}, + {9902746u, 2}, + {9902748u, 2}, + {9902810u, 3}, + {9902813u, 2}, + {9902940u, 3}, + {9902941u, 2}, + {11970838u, 2}, + {11970842u, 2}, + {11970966u, 2}, + {11970972u, 2}, + {11971226u, 2}, + {11971228u, 2}, + {11974934u, 3}, + {11974938u, 3}, + {11975126u, 2}, + {11975133u, 3}, + {11975386u, 3}, + {11975389u, 3}, + {11983254u, 3}, + {11983260u, 3}, + {11983318u, 2}, + {11983325u, 3}, + {11983708u, 3}, + {11983709u, 3}, + {11999898u, 2}, + {11999900u, 2}, + {11999962u, 3}, + {11999965u, 2}, + {12000092u, 3}, + {12000093u, 2} +}}; diff --git a/src/spr_lookup.cpp b/src/spr_lookup.cpp new file mode 100644 index 00000000..0e440bdf --- /dev/null +++ b/src/spr_lookup.cpp @@ -0,0 +1,240 @@ +#include +#include +#include +#include +#include "spr/lookup_table_7.h" +#include "TreeTools/assert.h" + +using Split7 = uint8_t; // 7 bits used +using SplitSet7 = std::array; +using Perm7 = std::array; + +inline int popcount7(uint8_t x) { + return __builtin_popcount(x & 0x7F); +} + +inline int tips_in_smallest7(uint8_t x) { + const int count = __builtin_popcount(x & 0x7F); + return count < 4 ? count : 7 - count; +} + +inline Split7 xor_split7(Split7 a, Split7 b) { + return (a ^ b) & 0x7F; +} + +inline Split7 smaller_split7(Split7 s) { + if (popcount7(s) > 3) { + s ^= 0x7F; + } + return s; +} + +enum class Shape7 { Pectinate, Balanced }; + +struct CanonicalInfo7 { + Shape7 shape; + Perm7 perm; +}; + +inline Shape7 detect_shape7(const SplitSet7& sp) { + int trio_count = 0; + for (auto s : sp) { + int t = popcount7(s); + if (t == 3) ++trio_count; + } + return (trio_count == 2) ? Shape7::Pectinate : Shape7::Balanced; +} + +CanonicalInfo7 canonical_pectinate(const SplitSet7& sp) { + std::array tiss; // Tips in split (smallest) + for (int i = 0; i < 4; ++i) { + tiss[i] = popcount7(sp[i]); + } + + int trio1 = -1, trio2 = -1; + int p = 0; + for (int i = 0; i < 4; ++i) { + if (tiss[i] == 3) { + (p++ == 0 ? trio1 : trio2) = i; + } + } + int pair1 = -1, pair2 = -1; + for (int i = 0; i < 4; ++i) { + if (i != trio1 && i != trio2) { + (pair1 == -1 ? pair1 : pair2) = i; + } + } + + const Split7 mid = xor_split7(sp[trio1], sp[trio2]) ^ 0x7F; + + if (tips_in_smallest7(xor_split7(sp[pair1], sp[trio1])) != 1) { + std::swap(pair1, pair2); + } + Split7 trio1Tip = smaller_split7(xor_split7(sp[trio1], sp[pair1])); + Split7 trio2Tip = smaller_split7(xor_split7(sp[trio2], sp[pair2])); + Split7 duo1 = smaller_split7(sp[pair1]); + Split7 duo2 = smaller_split7(sp[pair2]); + + Perm7 perm{}; + int k = 0; + + auto emit = [&](Split7 s) { + for (int i = 0; i < 7; ++i) { + if (s & (1 << i)) { + perm[k++] = i; + } + } + }; + + emit(mid); + emit(trio1Tip); + emit(duo1); + emit(trio2Tip); + emit(duo2); + + return { Shape7::Pectinate, perm }; +} + +CanonicalInfo7 canonical_balanced(const SplitSet7& sp) { + std::array tiss; // Tips in smallest split + for (int i = 0; i < 4; ++i) { + tiss[i] = popcount7(sp[i]); + } + + int firstTrio = std::max_element(tiss.begin(), tiss.end()) - tiss.begin(); + Split7 firstSp = sp[firstTrio]; + + int trioPair = -1; + Split7 solo{}; + for (int i = 0; i < 4; ++i) { + if (i == firstTrio) continue; + Split7 s = xor_split7(sp[i], firstSp); + const int s_count = popcount7(s); + if (s_count == 1) { + trioPair = i; + solo = s; + break; + } else if (s_count == 6) { + trioPair = i; + solo = s ^ 0x7F; + break; + } + } + ASSERT(trioPair > -1); + + int other1 = -1, other2 = -1; + for (int i = 0; i < 4; ++i) { + if (i != firstTrio && i != trioPair) + (other1 == -1 ? other1 : other2) = i; + } + ASSERT(other1 > -1); + ASSERT(other2 > -1); + + Split7 singleton = solo; + Split7 trio = smaller_split7(sp[trioPair]); + Split7 o1 = smaller_split7(sp[other1]); + Split7 o2 = smaller_split7(sp[other2]); + + Perm7 perm{}; + int k = 0; + + auto emit = [&](Split7 s) { + for (int i = 0; i < 7; ++i) { + if (s & (1 << i)) { + perm[k++] = i; + } + } + }; + + emit(singleton); + emit(trio); + emit(o1); + emit(o2); + + return { Shape7::Balanced, perm }; +} + +Split7 permute_split(Split7 s, const Perm7& p) { + Split7 out = 0; + for (int i = 0; i < 7; ++i) { + if (s & (1 << p[i])) { + out |= (1 << i); + } + } + return out; +} + +inline Split7 polarize7(Split7 s) { + if (s & (1 << 6)) s ^= 0x7F; + return s; +} + +inline uint32_t BitPack7(const std::array& v) { + return ((v[0] - 3) << 18) | + ((v[1] - 7) << 12) | + ((v[2] - 15) << 6) | + ( v[3] - 33); +} + +template +int lookup(uint32_t key, const std::array& table) { + auto it = std::lower_bound( + table.begin(), table.end(), key, + [](const SPRScore& a, uint32_t k) { return a.key < k; } + ); + return (it != table.end() && it->key == key) ? it->score : -1; +} + +int lookup_7(const SplitSet7& sp1, const SplitSet7& sp2) { + Shape7 shape = detect_shape7(sp1); + + CanonicalInfo7 canon = + (shape == Shape7::Pectinate) + ? canonical_pectinate(sp1) + : canonical_balanced(sp1); + + std::array packed{}; + for (int i = 0; i < 4; ++i) { + Split7 s = permute_split(sp2[i], canon.perm); + s = polarize7(s); + packed[i] = s; + } + + std::sort(packed.begin(), packed.end()); + + uint32_t key = BitPack7(packed); + return (shape == Shape7::Pectinate) + ? lookup(key, PEC_LOOKUP7) + : lookup(key, BAL_LOOKUP7); +} + +inline SplitSet7 read_splits(const Rcpp::RawVector& r) { + if (r.size() != 4) + Rcpp::stop("Expected a length-4 raw vector of splits"); + + SplitSet7 sp{}; + for (int i = 0; i < 4; ++i) { + sp[i] = static_cast(r[i]); + } + + return sp; +} + +inline SplitSet7 smallest_splits(SplitSet7 sp) { + for (auto& s : sp) { + if (popcount7(s) > 3) { + s ^= 0x7F; + } + } + return sp; +} + +// [[Rcpp::export]] +int spr_table_7(const Rcpp::RawVector& sp1, const Rcpp::RawVector& sp2) { + SplitSet7 s1_raw = read_splits(sp1); + SplitSet7 s1 = smallest_splits(s1_raw); + + SplitSet7 s2 = read_splits(sp2); + + return lookup_7(s1, s2); +} diff --git a/tests/testthat/test-binary_entropy_counts.R b/tests/testthat/test-binary_entropy_counts.R new file mode 100644 index 00000000..7990e4cc --- /dev/null +++ b/tests/testthat/test-binary_entropy_counts.R @@ -0,0 +1,6 @@ +test_that("binary_entropy_counts() fails gracefully", { + expect_equal(binary_entropy_counts(integer(0), 0), numeric(0)) + expect_equal(binary_entropy_counts(integer(3), 0), rep(NA_real_, 3)) + expect_equal(binary_entropy_counts(integer(3), 2), rep(0, 3)) + expect_equal(binary_entropy_counts(c(1, NA, 2), 2), c(1, NA, 0)) +}) diff --git a/tests/testthat/test-tree_distance_spr.R b/tests/testthat/test-tree_distance_spr.R index edd485a4..d97c0263 100644 --- a/tests/testthat/test-tree_distance_spr.R +++ b/tests/testthat/test-tree_distance_spr.R @@ -1,19 +1,286 @@ -library("TreeTools", quietly = TRUE) - test_that("SPR: keep_and_reroot()", { + library("TreeTools", quietly = TRUE) + tree1 <- Postorder(BalancedTree(12)) tree2 <- Postorder(PectinateTree(12)) keep <- as.logical(tabulate(8:12, 12)) - + result <- keep_and_reroot(tree1, tree2, keep) expect_equal(result[[1]], RootTree(KeepTip(tree1, keep), 1)) expect_equal(result[[2]], RootTree(KeepTip(tree2, keep), 1)) + result1 <- keep_and_reroot(tree1, tree2, 1:12 %in% 4) + expect_equal(result1[[1]], Preorder(SingleTaxonTree("t4"))) + expect_equal(result1[[2]], Preorder(SingleTaxonTree("t4"))) + + result0 <- keep_and_reduce(tree1, tree2, logical(12)) + expect_equal(result0[[1]], Preorder(ZeroTaxonTree())) + + reduced <- keep_and_reduce(tree1, tree2, keep) + expect_equal(Preorder(reduced[[1]]), Preorder(DropTip(result[[1]], "t9"))) + expect_equal(Preorder(reduced[[2]]), Preorder(DropTip(result[[2]], "t9"))) + + result0 <- keep_and_reduce(tree1, tree2, 1:12 %in% 4) + expect_null(result0[[1]]) +}) + +test_that("SPRDist() handles different inputs", { + library("TreeTools", quietly = TRUE) + + expect_error( + mismatch_size(as.Splits(c(T, T, F)), as.Splits(c(T, T, T, T))), + "differ in `nTip" + ) + expect_error( + mismatch_size(matrix(as.raw(3), 1, 1), as.Splits(c(T, T, T, T))), + "nTip attribute" + ) + expect_error( + mismatch_size(as.Splits(c(T, T, T, T)), matrix(as.raw(3), 1, 1)), + "nTip attribute" + ) + expect_error( + mismatch_size(as.Splits(matrix(T, 2, 4)), as.Splits(c(T, T, T, T))), + "number of splits" + ) + splits <- as.Splits(rbind(c(T, T, T, F, F), c(T, F, F, F, T))) + Test <- function(s1, s2) { + expect_equal(length(s1), length(s2)) + nSplits <- length(s1) + i <- rep(seq_len(nSplits), nSplits) + j <- rep(seq_len(nSplits), each = nSplits) + expect_equal( + mismatch_size(s1, s2), + TipsInSplits(xor(s1[[i]], s2[[j]]), smallest = TRUE) + ) + } + Test(as.Splits(c(T, T, T, F, F)), as.Splits(c(T, F, F, F, T))) + + set.seed(0) + splits <- as.Splits(t(replicate(10, sample(c(T, F), 99, replace = TRUE)))) + Test(splits[[1]], splits[[2]]) + Test(splits[[1:2]], splits[[2:3]]) + Test(splits, rev(splits)) +}) + +test_that("confusion()", { + library("TreeTools", quietly = TRUE) + + TestConfusion <- function(a, b) { + i <- rep(seq_along(a), each = length(b)) + j <- rep(seq_along(b), length(a)) + expect_equal( + confusion(a, b), + aperm(array( + c(TipsInSplits(a[[i]] & b[[j]]), + TipsInSplits(a[[i]] & !b[[j]]), + TipsInSplits(!a[[i]] & b[[j]]), + TipsInSplits(!a[[i]] & !b[[j]]) + ), + c(length(a), length(b), 4)), + c(3, 2, 1) + ) + ) + } + + TestConfusion(as.Splits(c(T, T, T, F, F)), as.Splits(c(T, F, F, F, T))) + + set.seed(0) + splits <- as.Splits(t(replicate(10, sample(c(T, F), 99, replace = TRUE)))) + TestConfusion(splits[[1]], splits[[2]]) + TestConfusion(splits[[1:2]], splits[[2:3]]) + TestConfusion(splits, rev(splits)) +}) + +test_that("SPR shortcuts okay - exhaustive", { + skip_if_not(getOption("slowMode", FALSE)) + skip_if_not_installed("TBRDist") + library("TreeTools", quietly = TRUE) + nTip <- 7 + allTrees <- as.phylo(seq_len(NUnrooted(nTip)), nTip) + + sum(apply(combn(length(allTrees), 2), 2, function(ij) { + reduced <- ReduceTrees(allTrees[[ij[[1]]]], allTrees[[ij[[2]]]]) + r1 <- reduced[[1]] + if (is.null(r1) || NTip(r1) != nTip) return(NA) + r2 <- reduced[[2]] + exact <- TBRDist::USPRDist(r1, r2) + shortcut <- SPRDist(r1, r2, method = "Rogue") + equal <- exact == shortcut + if (!equal) message("Mismatch: ", paste(ij, collapse = ", ")) + expect_true(equal) + equal + }), na.rm = TRUE) +}) + +test_that("SPR shortcuts okay - known answer", { + skip_if_not(getOption("slowMode", FALSE)) + library("TreeTools", quietly = TRUE) + set.seed(0) + trees <- lapply(1:8, function(XX) RandomTree(9, root = TRUE)) + + exact <- apply(combn(length(trees), 2), 2, function(ij) { + TBRDist::USPRDist(trees[[ij[[1]]]], trees[[ij[[2]]]]) + }) + opt <- options("sprShortcut" = 0) + on.exit(options(opt)) + noCuts <- SPRDist(trees, method = "rogue") + + options("sprShortcut" = 4) + cuts4 <- SPRDist(trees, method = "rogue") + expect_true(all(cuts4 == noCuts)) + + options("sprShortcut" = 5) + cuts5 <- SPRDist(trees, method = "rogue") + expect_true(all(cuts5 <= noCuts)) + expect_true(all(cuts5 >= exact)) + + options("sprShortcut" = 6) + cuts6 <- SPRDist(trees, method = "rogue") + expect_true(all(cuts6 <= noCuts)) + expect_true(all(cuts6 >= exact)) + # Aspirational: + expect_true(all(cuts6 == exact)) + + options("sprShortcut" = 7) + cuts7 <- SPRDist(trees, method = "rogue") + expect_true(all(cuts7 <= noCuts)) + expect_true(all(cuts7 >= exact)) + # Aspirational: + expect_true(all(cuts7 == exact)) +}) + +test_that("SPR shortcuts okay - larger trees", { + skip_if_not(getOption("slowMode", FALSE)) + + library("TreeTools", quietly = TRUE) + set.seed(0) + trees <- lapply(1:8, function(XX) RandomTree(45, root = TRUE)) + opt <- options("sprShortcut" = 0) + on.exit(options(opt)) + noCuts <- SPRDist(trees, method = "rogue") + + options("sprShortcut" = 4) + cuts4 <- SPRDist(trees, method = "rogue") + expect_true(all(cuts4 == noCuts)) + + options("sprShortcut" = 5) + cuts5 <- SPRDist(trees, method = "rogue") + expect_true(all(cuts5 <= noCuts)) + + options("sprShortcut" = 6) + cuts6 <- SPRDist(trees, method = "rogue") + expect_true(all(cuts6 <= noCuts)) +}) + +test_that("SPR calculated correctly", { + library("TreeTools", quietly = TRUE) + + expect_exact <- function(x, y, method = "rogue") { + if (is.character(x)) x <- Tree(x) + if (is.character(y)) y <- Tree(y) + expect_equal( + SPRDist(x, y, method = method)[[1]], + TBRDist::USPRDist(x, y) + ) + } + + Tree <- function(txt) ape::read.tree(text = txt) + + expect_equal( + .SPRRogue( + ape::read.tree(text = "((a, b), (c, d));"), + ape::read.tree(text = "((a, c), (b, d));") + )[[1]], + 1L + ) + expect_equal( + .SPRRogue(PectinateTree(letters[1:26]), PectinateTree(letters[c(2:26, 1)]))[[1]], + 1L + ) + + # Looks simple, but requires ties to be broken suitably + expect_exact("(a,(d,(b,(c,X))));", "(a,((b,c),(X,d)));", "rogue") # distance = 1 + + expect_exact("((((b,c),d),e),a);", "(a,(b,((e,c),d)));", "rogue") + expect_failure( + expect_exact("((((b,c),d),e),a);", "(a,(b,((e,c),d)));", "deo") + ) + + # Passes with ami, joint, vi + # Fails with viNorm - should tiebreaker be non-normalized? + expect_exact("(((t21,(((t15,t12),t23),t24)),t18),t19);", + "((t21,t18),(((t12,(t19,t15)),t23),t24));", "rogue") + + # Example AZ33: IJK and DEF are schlepped + # Passes with joint, ami, viNorm + # Fails with vi (5, 6 with tiebreaker), conf (7) + expect_equal( + .SPRRogue( + tree1 <- PectinateTree(letters[1:26]), + tree2 <- Tree( + "(g, (h, (i, (j, (k, (l, ((m, (c, (b, a))), (n, (o, (p, (q, (r, (s, (t, (u, (v, (w, (x, (y, (z, (f, (e, d))))))))))))))))))))));" + ) + )[[1]], + 2 + ) + + # Requires "divide and conquer" step + expect_equal( + .SPRRogue( + tree1 <- PectinateTree(letters[1:26]), + tree2 <- Tree( + "(g, (h, (i, (j, (k, (l, (m, (n, (o, (p, (q, (r, (s, (t, (u, (v, (w, (x, (y, (z, (f, ((e, (c, (b, a))), d))))))))))))))))))))));" + ) + )[[1]], + 2 + ) + + lockedMid1 <- Tree("((((a1, a2), a3), ((b1, b2), b3)), + (((c1, c2), c3), ((d1, d2), d3)));") + lockedMid2 <- Tree("(((a1, (a2, a3)), (c1, (c2, c3))), + ((b1, (b2, b3)), (d1, (d2, d3))));") + expect_equal(.SPRRogue(lockedMid1, lockedMid2)[[1]], 5) +}) + +test_that("SPR.dist called safely", { + skip("#TODO") + library("TreeTools") + PhangornSPR <- function(...) unname(phangorn::SPR.dist(...)) + tree1 <- as.phylo(0, 6) + tree2 <- BalancedTree(6) + expect_equal(SPRDist(as.phylo(0, 6), BalancedTree(6)), + PhangornSPR(Postorder(as.phylo(0, 6)), + Postorder(BalancedTree(6)))) + expect_equal(SPRDist(as.phylo(0:5, 6), BalancedTree(6)), + PhangornSPR(structure(lapply(as.phylo(0:5, 6), Postorder), + class = "multiPhylo"), + Postorder(BalancedTree(6)))) + expect_equal(SPRDist(BalancedTree(6), as.phylo(0:5, 6)), + SPRDist(as.phylo(0:5, 6), BalancedTree(6))) + expect_equal(SPRDist(BalancedTree(6), PectinateTree(6)), + SPRDist(list(BalancedTree(6), PectinateTree(6)))[[1]], + ignore_attr = TRUE) + reduced <- keep_and_reduce(tree1, tree2, keep) expect_equal(Preorder(reduced[[1]]), Preorder(DropTip(result[[1]], "t9"))) expect_equal(Preorder(reduced[[2]]), Preorder(DropTip(result[[2]], "t9"))) }) + +test_that("SPR deOliveira2008 calculation looks valid", { + library("TreeTools", quietly = TRUE) + + # We do not expect to obtain identical results to phangorn::SPR.dist, + # because ties are broken in a different arbitrary manner. + # We're thus left with quite a loose test. + expect_equal(SPRDist(PectinateTree(letters[1:26]), + PectinateTree(letters[c(2:26, 1)]), + method = "deOliv"), + 1L) +}) + + test_that("SPR: Under the hood", { expect_error(mismatch_size(as.Splits(c(T, T, F)), as.Splits(c(T, T, T, T))), "differ in `nTip") diff --git a/tests/testthat/test-tree_information.R b/tests/testthat/test-tree_information.R index 2c3763cf..96b4e0c8 100644 --- a/tests/testthat/test-tree_information.R +++ b/tests/testthat/test-tree_information.R @@ -1,5 +1,13 @@ library("TreeTools") +test_that("ClusteringInfo() fails gracefully", { + bal6 <- as.Splits(BalancedTree(6)) + bad6 <- bal6 + attr(bad6, "nTip") <- NULL + expect_error(ClusteringEntropy(bad6), "nTip") + expect_error(ClusteringEntropy(bal6, p = c(3, 5)), "number of splits .3\\)") +}) + test_that("SplitwiseInfo() / ClusteringInfo() handle probabilities", { Tree <- function(txt) ape::read.tree(text = txt) tree <- Tree("((a, b)60, (c, d)60);") diff --git a/vignettes/using-distances.Rmd b/vignettes/using-distances.Rmd index dc6d742e..95f2576c 100644 --- a/vignettes/using-distances.Rmd +++ b/vignettes/using-distances.Rmd @@ -28,8 +28,9 @@ Selecting an appropriate normalizing constant may require careful consideration of the purpose to which a tree distance metric is being put. The default normalization behaviour of each function when `normalize = TRUE` is -listed in the [function reference](../reference/), or can be viewed -by typing `?FunctionName` in the R terminal. +listed in the [function reference]( +https://ms609.github.io/TreeDist/reference/index.html), or can be viewed by +typing `?FunctionName` in the R terminal. ### Nye _et al._ tree similarity