Skip to content

Commit bc8e24d

Browse files
committed
Allow user to select seurat and/or multiseq HTO methods
1 parent 582537a commit bc8e24d

File tree

7 files changed

+38
-17
lines changed

7 files changed

+38
-17
lines changed

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequenceOutputHandler.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,11 @@ public static interface TracksVCF
247247
{
248248
public File getScatterJobOutput(JobContext ctx) throws PipelineJobException;
249249

250+
default File finalizeScatterJobOutput(JobContext ctx, File primaryOutput) throws PipelineJobException
251+
{
252+
return primaryOutput;
253+
}
254+
250255
public SequenceOutputFile createFinalSequenceOutput(PipelineJob job, File processed, List<SequenceOutputFile> inputFiles) throws PipelineJobException;
251256
}
252257

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,5 +126,5 @@ static public void setInstance(SequencePipelineService instance)
126126

127127
abstract public PreprocessingStep.Output simpleTrimFastqPair(File fq1, File fq2, List<String> params, File outDir, Logger log) throws PipelineJobException;
128128

129-
abstract public File runCiteSeqCount(PipelineStepOutput output, @Nullable String outputCategory, Readset htoReadset, File htoList, File cellBarcodeList, File outputDir, String basename, Logger log, List<String> extraArgs, boolean doHtoFiltering, @Nullable Integer minCountPerCell, File localPipelineDir, @Nullable Integer editDistance, boolean scanEditDistances, Readset parentReadset, @Nullable Integer genomeId, boolean generateHtoCalls, boolean createOutputFiles) throws PipelineJobException;
129+
abstract public File runCiteSeqCount(PipelineStepOutput output, @Nullable String outputCategory, Readset htoReadset, File htoList, File cellBarcodeList, File outputDir, String basename, Logger log, List<String> extraArgs, boolean doHtoFiltering, @Nullable Integer minCountPerCell, File localPipelineDir, @Nullable Integer editDistance, boolean scanEditDistances, Readset parentReadset, @Nullable Integer genomeId, boolean generateHtoCalls, boolean createOutputFiles, boolean useSeurat, boolean useMultiSeq) throws PipelineJobException;
130130
}

SequenceAnalysis/resources/external/scRNAseq/htoClassifier.Rmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ if (nrow(barcodeData) > 0 && ncol(barcodeData) > 0){
4141
```{r GenerateCalls, fig.width=12}
4242
4343
if (nrow(barcodeData) > 0 && ncol(barcodeData) > 0){
44-
dt <- GenerateCellHashingCalls(barcodeData = barcodeData, outFile = finalCallFile, allCallsOutFile = allCallsOutFile)
44+
dt <- GenerateCellHashingCalls(barcodeData = barcodeData, outFile = finalCallFile, allCallsOutFile = allCallsOutFile, useSeurat = useSeurat, useMultiSeq = useMultiSeq)
4545
4646
if (exists('whitelistFile') && !is.null(whitelistFile)){
4747
GenerateSummaryForExpectedBarcodes(dt, whitelistFile=whitelistFile, outputFile=metricsFile, barcodeData=barcodeData)

SequenceAnalysis/resources/external/scRNAseq/htoClassifier.sh

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,12 @@ RAW_CALLS=$4
1414
DO_HTO_FILTER=$5
1515
MIN_READS_PER_CELL=$6
1616
METRICS_FILE=$7
17+
USE_SEURAT=$8
18+
USE_MULTISEQ=$9
19+
1720
WHITELIST="whitelistFile<-NULL;"
18-
if [ $# -ge 8 ];then
19-
WHITELIST="whitelistFile<-'"${8}"';"
21+
if [ $# -ge 10 ];then
22+
WHITELIST="whitelistFile<-'"${10}"';"
2023
fi
2124

2225
ENV_OPTS=""
@@ -33,4 +36,4 @@ fi
3336

3437
sudo $DOCKER pull bimberlab/oosap
3538

36-
sudo $DOCKER run --rm=true $RAM_OPTS $ENV_OPTS -v "${WD}:/work" -v "${HOME}:/homeDir" -u $UID -e USERID=$UID -w /work -e HOME=/homeDir bimberlab/oosap Rscript -e "barcodeDir <- '"${CITESEQ_COUNT_DIR}"';finalCallFile <- '"${FINAL_CALLS}"';doHtoFilter <- "${DO_HTO_FILTER}";maxValueForColSumFilter <- "${MIN_READS_PER_CELL}";allCallsOutFile <- '"${RAW_CALLS}"';metricsFile <- '"${METRICS_FILE}"';"${WHITELIST}"rmarkdown::render('htoClassifier.Rmd', output_file = '"${HTML_FILE}"')"
39+
sudo $DOCKER run --rm=true $RAM_OPTS $ENV_OPTS -v "${WD}:/work" -v "${HOME}:/homeDir" -u $UID -e USERID=$UID -w /work -e HOME=/homeDir bimberlab/oosap Rscript -e "barcodeDir <- '"${CITESEQ_COUNT_DIR}"';finalCallFile <- '"${FINAL_CALLS}"';doHtoFilter <- "${DO_HTO_FILTER}";maxValueForColSumFilter <- "${MIN_READS_PER_CELL}";allCallsOutFile <- '"${RAW_CALLS}"';metricsFile <- '"${METRICS_FILE}"';useSeurat <- ${USE_SEURAT};useMultiSeq <- ${USE_MULTISEQ};"${WHITELIST}"rmarkdown::render('htoClassifier.Rmd', output_file = '"${HTML_FILE}"')"

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -527,10 +527,10 @@ else if (!SequenceUtil.hasLineCount(f))
527527
}
528528

529529
@Override
530-
public File runCiteSeqCount(PipelineStepOutput output, @Nullable String outputCategory, Readset htoReadset, File htoList, File cellBarcodeList, File outputDir, String basename, Logger log, List<String> extraArgs, boolean doHtoFiltering, @Nullable Integer minCountPerCell, File localPipelineDir, @Nullable Integer editDistance, boolean scanEditDistances, Readset parentReadset, @Nullable Integer genomeId, boolean generateHtoCalls, boolean createOutputFiles) throws PipelineJobException
530+
public File runCiteSeqCount(PipelineStepOutput output, @Nullable String outputCategory, Readset htoReadset, File htoList, File cellBarcodeList, File outputDir, String basename, Logger log, List<String> extraArgs, boolean doHtoFiltering, @Nullable Integer minCountPerCell, File localPipelineDir, @Nullable Integer editDistance, boolean scanEditDistances, Readset parentReadset, @Nullable Integer genomeId, boolean generateHtoCalls, boolean createOutputFiles, boolean useSeurat, boolean useMultiSeq) throws PipelineJobException
531531
{
532532
CellHashingHandler handler = new CellHashingHandler();
533533

534-
return handler.runCiteSeqCount(output, outputCategory, htoReadset, htoList, cellBarcodeList, outputDir, basename, log, extraArgs, doHtoFiltering, minCountPerCell, localPipelineDir, editDistance, scanEditDistances, parentReadset, genomeId, generateHtoCalls ? CellHashingHandler.BARCODE_TYPE.hashing : CellHashingHandler.BARCODE_TYPE.citeseq, createOutputFiles);
534+
return handler.runCiteSeqCount(output, outputCategory, htoReadset, htoList, cellBarcodeList, outputDir, basename, log, extraArgs, doHtoFiltering, minCountPerCell, localPipelineDir, editDistance, scanEditDistances, parentReadset, genomeId, generateHtoCalls ? CellHashingHandler.BARCODE_TYPE.hashing : CellHashingHandler.BARCODE_TYPE.citeseq, createOutputFiles, useSeurat, useMultiSeq);
535535
}
536536
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/CellHashingHandler.java

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,7 @@ else if (line.startsWith("Percentage unmapped"))
443443
if (type.doGenerateCalls())
444444
{
445445
ctx.getJob().setStatus(PipelineJob.TaskStatus.running, "Generating HTO calls for edit distance: " + editDistance);
446-
File htoCalls = generateFinalCalls(outputMatrix.getParentFile(), ctx.getOutputDir(), outputBasename, ctx.getLogger(), null, true, minCountPerCell, ctx.getSourceDirectory());
446+
File htoCalls = generateFinalCalls(outputMatrix.getParentFile(), ctx.getOutputDir(), outputBasename, ctx.getLogger(), null, true, minCountPerCell, ctx.getSourceDirectory(), true, true);
447447
File html = new File(htoCalls.getParentFile(), outputBasename + ".html");
448448

449449
if (!html.exists())
@@ -664,7 +664,7 @@ private File ensureLocalCopy(File input, File outputDir, Logger log, Set<File> t
664664
return input;
665665
}
666666

667-
public File generateFinalCalls(File citeSeqCountOutDir, File outputDir, String basename, Logger log, @Nullable File cellBarcodeWhitelist, boolean doHtoFiltering, Integer minCountPerCell, File localPipelineDir) throws PipelineJobException
667+
private File generateFinalCalls(File citeSeqCountOutDir, File outputDir, String basename, Logger log, @Nullable File cellBarcodeWhitelist, boolean doHtoFiltering, Integer minCountPerCell, File localPipelineDir, boolean useSeurat, boolean useMultiSeq) throws PipelineJobException
668668
{
669669
log.debug("generating final calls from folder: " + citeSeqCountOutDir.getPath());
670670

@@ -694,6 +694,9 @@ public File generateFinalCalls(File citeSeqCountOutDir, File outputDir, String b
694694
File rawCallsFile = new File(outputDir, basename + ".raw.txt");
695695
File metricsFile = getMetricsFile(callsFile);
696696
List<String> args = new ArrayList<>(Arrays.asList("/bin/bash", scriptWrapper, citeSeqCountOutDir.getName(), htmlFile.getName(), callsFile.getName(), rawCallsFile.getName(), (doHtoFiltering ? "T" : "F"), (minCountPerCell == null ? "0" : minCountPerCell.toString()), metricsFile.getName()));
697+
args.add(useSeurat ? "TRUE" : "FALSE");
698+
args.add(useMultiSeq ? "TRUE" : "FALSE");
699+
697700
if (cellBarcodeWhitelist != null)
698701
{
699702
args.add(cellBarcodeWhitelist.getName());
@@ -876,7 +879,7 @@ else if (htoReadset.getReadData().size() != 1)
876879
return ret;
877880
}
878881

879-
public File runCiteSeqCount(PipelineStepOutput output, String category, Readset htoReadset, File htoList, File cellBarcodeList, File outputDir, String basename, Logger log, List<String> extraArgs, boolean doHtoFiltering, @Nullable Integer minCountPerCell, File localPipelineDir, @Nullable Integer editDistance, boolean scanEditDistances, Readset parentReadset, @Nullable Integer genomeId, BARCODE_TYPE type, boolean createOutputFiles) throws PipelineJobException
882+
public File runCiteSeqCount(PipelineStepOutput output, String category, Readset htoReadset, File htoList, File cellBarcodeList, File outputDir, String basename, Logger log, List<String> extraArgs, boolean doHtoFiltering, @Nullable Integer minCountPerCell, File localPipelineDir, @Nullable Integer editDistance, boolean scanEditDistances, Readset parentReadset, @Nullable Integer genomeId, BARCODE_TYPE type, boolean createOutputFiles, boolean useSeurat, boolean useMultiSeq) throws PipelineJobException
880883
{
881884
HtoMergeResult htoFastqs = possiblyMergeHtoFastqs(htoReadset, outputDir, log);
882885
if (!htoFastqs.intermediateFiles.isEmpty())
@@ -963,7 +966,7 @@ public File runCiteSeqCount(PipelineStepOutput output, String category, Readset
963966

964967
File citeSeqCountOutDir = new File(outputDir, basename + ".citeSeqCounts." + ed + "." + type.name());
965968
String outputBasename = basename + "." + ed + "." + type.name();
966-
Map<String, Object> callMap = executeCiteSeqCountWithJobCtx(outputDir, outputBasename, citeSeqCountOutDir, htoFastqs.files.getLeft(), htoFastqs.files.getRight(), toolArgs, ed, log, cellBarcodeList, doHtoFiltering, minCountPerCell, localPipelineDir, unknownBarcodeFile, type);
969+
Map<String, Object> callMap = executeCiteSeqCountWithJobCtx(outputDir, outputBasename, citeSeqCountOutDir, htoFastqs.files.getLeft(), htoFastqs.files.getRight(), toolArgs, ed, log, cellBarcodeList, doHtoFiltering, minCountPerCell, localPipelineDir, unknownBarcodeFile, type, useSeurat, useMultiSeq);
967970
results.put(ed, callMap);
968971

969972
if (type.doGenerateCalls())
@@ -1064,7 +1067,7 @@ public File runCiteSeqCount(PipelineStepOutput output, String category, Readset
10641067
}
10651068
}
10661069

1067-
private Map<String, Object> executeCiteSeqCountWithJobCtx(File outputDir, String basename, File citeSeqCountOutDir, File fastq1, File fastq2, List<String> baseArgs, Integer ed, Logger log, File cellBarcodeList, boolean doHtoFiltering, Integer minCountPerCell, File localPipelineDir, File unknownBarcodeFile, BARCODE_TYPE type) throws PipelineJobException
1070+
private Map<String, Object> executeCiteSeqCountWithJobCtx(File outputDir, String basename, File citeSeqCountOutDir, File fastq1, File fastq2, List<String> baseArgs, Integer ed, Logger log, File cellBarcodeList, boolean doHtoFiltering, Integer minCountPerCell, File localPipelineDir, File unknownBarcodeFile, BARCODE_TYPE type, boolean useSeurat, boolean useMultiSeq) throws PipelineJobException
10681071
{
10691072
CellHashingHandler.CiteSeqCountWrapper wrapper = new CellHashingHandler.CiteSeqCountWrapper(log);
10701073
File doneFile = new File(citeSeqCountOutDir, "citeSeqCount." + type.name() + "." + ed + ".done");
@@ -1131,7 +1134,7 @@ else if (line.startsWith("Percentage unmapped"))
11311134

11321135
if (type.doGenerateCalls())
11331136
{
1134-
File htoCalls = generateFinalCalls(outputMatrix.getParentFile(), outputDir, basename, log, cellBarcodeList, doHtoFiltering, minCountPerCell, localPipelineDir);
1137+
File htoCalls = generateFinalCalls(outputMatrix.getParentFile(), outputDir, basename, log, cellBarcodeList, doHtoFiltering, minCountPerCell, localPipelineDir, useSeurat, useMultiSeq);
11351138
if (!htoCalls.exists())
11361139
{
11371140
throw new PipelineJobException("missing expected file: " + htoCalls.getPath());

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/VariantProcessingRemoteSplitTask.java

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -110,10 +110,20 @@ private VariantProcessingJob getPipelineJob()
110110
File output = ((SequenceOutputHandler.TracksVCF)handler).getScatterJobOutput(ctx);
111111
try
112112
{
113-
//NOTE: the VCF was copied back to the source dir, so translate paths
114-
String path = _wd.getRelativePath(output);
115-
output = new File(ctx.getSourceDirectory(), path);
116-
getPipelineJob().getScatterJobOutputs().put(getPipelineJob().getIntervalSetName(), output);
113+
output = ((SequenceOutputHandler.TracksVCF)handler).finalizeScatterJobOutput(ctx, output);
114+
115+
// If the output is still under the work dir, translate path. Otherwise it was already copied to the the source dir
116+
if (output.getPath().startsWith(_wd.getDir().getPath()))
117+
{
118+
//NOTE: the VCF will be copied back to the source dir, so translate paths
119+
String path = _wd.getRelativePath(output);
120+
output = new File(ctx.getSourceDirectory(), path);
121+
getPipelineJob().getScatterJobOutputs().put(getPipelineJob().getIntervalSetName(), output);
122+
}
123+
else
124+
{
125+
ctx.getLogger().debug("Output has already been moved from workdir: " + output.getPath());
126+
}
117127
}
118128
catch (IOException e)
119129
{

0 commit comments

Comments
 (0)