Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 8 additions & 10 deletions R/bambu-extendAnnotations-utilityExtend.R
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,8 @@ filterTranscriptsByAnnotation <- function(rowDataCombined, annotationGrangesList

mcols(exonRangesCombined)$txid <- seq_along(exonRangesCombined)
minEq <- getMinimumEqClassByTx(exonRangesCombined)$eqClassById
rowDataCombined$relSubsetCount <- rowDataCombined$readCount/unlist(lapply(minEq, function(x){return(sum(rowDataCombined$readCount[x]))}))
rowDataCombined$relSubsetCount <- rowDataCombined$readCount /
calculateEqClassSums(rowDataCombined$readCount, minEq)
#post extend annotation filters applied here (currently only subset filter)
if(min.readFractionByEqClass>0 & sum(filterSet)>0) { # filter out subset transcripts based on relative expression
filterSet <- rowDataCombined$relSubsetCount > min.readFractionByEqClass
Expand Down Expand Up @@ -289,13 +290,10 @@ createExonByReadClass <- function(transcriptsTibble, annotationSeqLevels) {
unlistData <- unlist(exonsByReadClass, use.names = FALSE)
partitioning <- PartitioningByEnd(cumsum(elementNROWS(exonsByReadClass)),
names = NULL)
exon_rank <- lapply(width((partitioning)), seq, from = 1)
# * assumes positive for exon ranking
negative_strand <- which(transcriptsTibble$strand == "-")
exon_rank[negative_strand] <- lapply(exon_rank[negative_strand], rev)
exon_endRank <- lapply(exon_rank, rev)
unlistData$exon_rank <- unlist(exon_rank)
unlistData$exon_endRank <- unlist(exon_endRank)
# Create exon rankings using shared utility function
rankings <- createExonRankings(width(partitioning), transcriptsTibble$strand)
unlistData$exon_rank <- unlist(rankings$exon_rank)
unlistData$exon_endRank <- unlist(rankings$exon_endRank)
exonsByReadClass <- relist(unlistData, partitioning)
seqlevels(exonsByReadClass) <-
unique(c(seqlevels(exonsByReadClass), annotationSeqLevels))
Expand Down Expand Up @@ -730,8 +728,8 @@ calculateRelSubsetCount <- function(extendedAnnotationRanges, minEq, min.readFra
filter <- !is.na(mcols(extendedAnnotationRanges)$readCount)
mcols(extendedAnnotationRanges)$relSubsetCount <- NA
mcols(extendedAnnotationRanges)$relSubsetCount[filter] <-
mcols(extendedAnnotationRanges)$readCount[filter]/
unlist(lapply(minEq[filter], function(x){return(sum(mcols(extendedAnnotationRanges)$readCount[x], na.rm = TRUE))}))
mcols(extendedAnnotationRanges)$readCount[filter] /
calculateEqClassSums(mcols(extendedAnnotationRanges)$readCount, minEq[filter], na.rm = TRUE)
#post extend annotation filters applied here (currently only subset filter)
if(min.readFractionByEqClass>0) { # filter out subset transcripts based on relative expression
filterSet <- is.na(mcols(extendedAnnotationRanges)$relSubsetCount) | mcols(extendedAnnotationRanges)$relSubsetCount > min.readFractionByEqClass
Expand Down
11 changes: 4 additions & 7 deletions R/bambu-processReads_utilityConstructReadClasses.R
Original file line number Diff line number Diff line change
Expand Up @@ -221,13 +221,10 @@ createExonsByReadClass <- function(readTable){
unlistData <- unlist(exonsByReadClass, use.names = FALSE)
partitioning <- PartitioningByEnd(cumsum(elementNROWS(exonsByReadClass)),
names = NULL)
exon_rank <- lapply(width(partitioning), seq, from = 1)
exon_rank[which(readTable$strand == "-")] <-
lapply(exon_rank[which(readTable$strand == "-")], rev)
# * assumes positive for exon ranking
exon_endRank <- lapply(exon_rank, rev)
unlistData$exon_rank <- unlist(exon_rank)
unlistData$exon_endRank <- unlist(exon_endRank)
# Create exon rankings using shared utility function
rankings <- createExonRankings(width(partitioning), readTable$strand)
unlistData$exon_rank <- unlist(rankings$exon_rank)
unlistData$exon_endRank <- unlist(rankings$exon_endRank)
exonsByReadClass <- relist(unlistData, partitioning)
return(exonsByReadClass)
}
Expand Down
3 changes: 1 addition & 2 deletions R/bambu-quantify_utilityFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,7 @@ genEquiRCsBasedOnObservedReads <- function(readClass){
annotationTxId, readCount, GENEID, dist,equal, compatible, txid)]
distTable <- rcWidth[distTable, on = "readClassId"]
# filter out multiple geneIDs mapped to the same readClass using rowData(se)
compatibleData <- as.data.table(as.data.frame(rowData(readClass)),
keep.rownames = TRUE)
compatibleData <- extractRowDataAsDataTable(readClass, keep.rownames = TRUE)
setnames(compatibleData, old = c("rn", "geneId"),
new = c("readClassId", "GENEID"))
distTable <- distTable[compatibleData[ readClassId %in%
Expand Down
54 changes: 54 additions & 0 deletions R/bambu_utilityFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -337,3 +337,57 @@ merge_wrapper <- function(x,y){
merge.data.table(x,y,by = "GENEID",all=TRUE)
}

#' Add exon rank and exon endRank to GRangesList
#' @param size_list A list of integers representing the number of exons per transcript.
#' Typically obtained from elementNROWS(grlist) or width(partitioning).
#' @param strand_info Character vector of strand information ("+", "-", or "*") with
#' length equal to length(size_list). One strand value per transcript.
#' @return A list with two elements: exon_rank and exon_endRank, both as lists of integer vectors
#' @details This function creates exon rankings based on strand information.
#' For negative strand transcripts, exon_rank is reversed. exon_endRank is
#' always the reverse of exon_rank.
#' @noRd
createExonRankings <- function(size_list, strand_info) {
exon_rank <- lapply(size_list, seq, from = 1)
negative_strand <- which(strand_info == "-")
exon_rank[negative_strand] <- lapply(exon_rank[negative_strand], rev)
exon_endRank <- lapply(exon_rank, rev)
return(list(exon_rank = exon_rank, exon_endRank = exon_endRank))
}

#' Extract rowData from SummarizedExperiment as data.table
#' @param se A SummarizedExperiment object
#' @param columns Optional character vector of column names to extract
#' @param keep.rownames Logical, whether to keep row names (default: FALSE)
#' @return A data.table object
#' @details This function provides a convenient way to extract rowData from
#' a SummarizedExperiment object and convert it to a data.table in one step.
#' If columns are specified, they will be validated against available columns.
#' @importFrom data.table data.table
#' @noRd
extractRowDataAsDataTable <- function(se, columns = NULL, keep.rownames = FALSE) {
rd <- as.data.frame(rowData(se))
if (!is.null(columns)) {
missing_cols <- setdiff(columns, colnames(rd))
if (length(missing_cols) > 0) {
stop("Columns not found in rowData: ", paste(missing_cols, collapse = ", "))
}
rd <- rd[, columns, drop = FALSE]
}
return(data.table(rd, keep.rownames = keep.rownames))
}

#' Calculate sum of values by equivalence class groups
#' @param values A numeric vector of values to sum
#' @param eq_classes A list where each element contains indices for summing
#' @param na.rm Logical, whether to remove NA values (default: FALSE)
#' @return A numeric vector of sums for each equivalence class
#' @details This function computes the sum of values for each group defined
#' by equivalence classes. It's used for calculating relative read counts.
#' @noRd
calculateEqClassSums <- function(values, eq_classes, na.rm = FALSE) {
unlist(lapply(eq_classes, function(x) {
sum(values[x], na.rm = na.rm)
}))
}

12 changes: 4 additions & 8 deletions R/prepareAnnotations_utilityFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,18 +71,14 @@ prepareAnnotationsFromGTF <- function(file) {
partitioning <- PartitioningByEnd(cumsum(elementNROWS(grlist)),
names = NULL)
txIdForReorder <- togroup(PartitioningByWidth(grlist))
exon_rank <- lapply(elementNROWS(grlist), seq, from = 1)
exon_rank[which(unlist(unique(strand(grlist))) == "-")] <- lapply(
exon_rank[which(unlist(unique(strand(grlist))) == "-")], rev
) # * assumes positive for exon ranking
names(exon_rank) <- NULL
unlistedExons$exon_rank <- unlist(exon_rank)
# Create exon rankings using shared utility function
rankings <- createExonRankings(elementNROWS(grlist), unlist(unique(strand(grlist))))
unlistedExons$exon_rank <- unlist(rankings$exon_rank)
unlistedExons <- unlistedExons[order(txIdForReorder,
unlistedExons$exon_rank)]
# exonsByTx is always sorted by exon rank, not by strand,
# make sure that this is the case here
unlistedExons$exon_endRank <- unlist(lapply(elementNROWS(grlist),
seq, to = 1), use.names = FALSE)
unlistedExons$exon_endRank <- unlist(rankings$exon_endRank)
unlistedExons <- unlistedExons[order(txIdForReorder,
start(unlistedExons))]
mcols(unlistedExons) <- mcols(unlistedExons)[, c("exon_rank",
Expand Down
4 changes: 2 additions & 2 deletions R/readWrite.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ writeBambuOutput <- function(se, path, prefix = "", outputExtendedAnno = TRUE,
writeCountsOutput(seGene, varname='counts', feature='gene',outdir, prefix)
#utils::write.table(paste0(colnames(se), "-1"), file = paste0(outdir, "barcodes.tsv"), quote = FALSE, row.names = FALSE, col.names = FALSE)
#R.utils::gzip(paste0(outdir, "barcodes.tsv"))
txANDGenes <- data.table(as.data.frame(rowData(se))[,c("TXNAME","GENEID")])
txANDGenes <- extractRowDataAsDataTable(se, columns = c("TXNAME", "GENEID"))
utils::write.table(txANDGenes, file = paste0(transcript_gtffn, "txANDgenes.tsv"),
sep = "\t", quote = FALSE, row.names = FALSE, col.names = FALSE)
utils::write.table(names(seGene), file = paste0(transcript_gtffn, "genes.tsv"),
Expand Down Expand Up @@ -96,7 +96,7 @@ writeCountsOutput <- function(se, varname = "counts",
keep.rownames = TRUE)
if(feature == "transcript"){
setnames(estimates, "rn", "TXNAME")
geneIDs <- data.table(as.data.frame(rowData(se))[,c("TXNAME","GENEID")])
geneIDs <- extractRowDataAsDataTable(se, columns = c("TXNAME", "GENEID"))
estimates <- geneIDs[estimates, on = "TXNAME"]
}else{
setnames(estimates, "rn","GENEID")
Expand Down