Skip to content

Commit 5579edf

Browse files
authored
Merge pull request #69 from LabKey/fb_merge_discvr-20.7
Merge discvr-20.7 to develop
2 parents ee0a737 + 797bfb0 commit 5579edf

File tree

14 files changed

+65
-41
lines changed

14 files changed

+65
-41
lines changed

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AlignerIndexUtil.java

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,14 @@ public static boolean copyIndexIfExists(PipelineContext ctx, AlignmentOutputImpl
4949
return verifyOrCreateCachedIndex(ctx, ctx.getWorkDir(), output, localName, webserverName, genome, forceCopyLocal);
5050
}
5151

52-
public static File getWebserverIndexDir(ReferenceGenome genome, String name)
52+
public static File getIndexDir(ReferenceGenome genome, String name)
5353
{
54-
return new File(genome.getSourceFastaFile().getParentFile(), (genome.isTemporaryGenome() ? "" : INDEX_DIR + "/") + name);
54+
return getIndexDir(genome, name, false);
55+
}
56+
57+
public static File getIndexDir(ReferenceGenome genome, String name, boolean useWebserverDir)
58+
{
59+
return new File(useWebserverDir ? genome.getSourceFastaFile().getParentFile() : genome.getWorkingFastaFile().getParentFile(), (genome.isTemporaryGenome() ? "" : INDEX_DIR + "/") + name);
5560
}
5661

5762
/**
@@ -63,7 +68,7 @@ private static boolean verifyOrCreateCachedIndex(PipelineContext ctx, @Nullable
6368
if (genome != null)
6469
{
6570
//NOTE: when we cache the indexes with the source FASTA genome, we store all aligners under the folder /alignerIndexes. When these are temporary genomes, they're top-level
66-
File webserverIndexDir = getWebserverIndexDir(genome, webserverName);
71+
File webserverIndexDir = getIndexDir(genome, webserverName, true);
6772
if (webserverIndexDir.exists())
6873
{
6974
ctx.getLogger().info("previously created index found, no need to recreate");
@@ -180,7 +185,7 @@ public static void saveCachedIndex(boolean hasCachedIndex, PipelineContext ctx,
180185

181186
lockFile.delete();
182187

183-
ReferenceGenomeManager.get().markGenomeModified(genome);
188+
ReferenceGenomeManager.get().markGenomeModified(genome, ctx.getLogger());
184189
}
185190
catch (IOException e)
186191
{

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenomeManager.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
package org.labkey.api.sequenceanalysis.pipeline;
22

3-
import org.apache.commons.io.FileUtils;
3+
import com.google.common.io.Files;
44
import org.apache.logging.log4j.Logger;
55
import org.labkey.api.pipeline.PipelineJobException;
66
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
@@ -57,13 +57,14 @@ private boolean isUpToDate(ReferenceGenome genome)
5757
long lastUpdated = localFile.lastModified();
5858
long lastSync = remoteFile.lastModified();
5959

60-
return lastUpdated >= lastSync;
60+
return lastSync >= lastUpdated;
6161
}
6262

63-
public void markGenomeModified(ReferenceGenome genome) throws IOException
63+
public void markGenomeModified(ReferenceGenome genome, Logger log) throws IOException
6464
{
6565
File toUpdate = getLocalUpdateFile(genome);
66-
FileUtils.touch(toUpdate);
66+
log.info("Marking genome as modified: " + toUpdate.getPath());
67+
Files.touch(toUpdate);
6768
}
6869

6970
public void cacheGenomeLocally(ReferenceGenome genome, Logger log) throws PipelineJobException
@@ -100,10 +101,10 @@ public void cacheGenomeLocally(ReferenceGenome genome, Logger log) throws Pipeli
100101
File lastUpdate = getLocalUpdateFile(genome);
101102
if (!lastUpdate.exists())
102103
{
103-
FileUtils.touch(lastUpdate);
104+
Files.touch(lastUpdate);
104105
}
105106

106-
FileUtils.touch(getRemoteSyncFile(genome.getGenomeId()));
107+
Files.touch(getRemoteSyncFile(genome.getGenomeId()));
107108
}
108109
catch (IOException e)
109110
{

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequenceOutputHandler.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,11 @@ public static interface TracksVCF
247247
{
248248
public File getScatterJobOutput(JobContext ctx) throws PipelineJobException;
249249

250+
default File finalizeScatterJobOutput(JobContext ctx, File primaryOutput) throws PipelineJobException
251+
{
252+
return primaryOutput;
253+
}
254+
250255
public SequenceOutputFile createFinalSequenceOutput(PipelineJob job, File processed, List<SequenceOutputFile> inputFiles) throws PipelineJobException;
251256
}
252257

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,5 +127,5 @@ static public void setInstance(SequencePipelineService instance)
127127

128128
abstract public PreprocessingStep.Output simpleTrimFastqPair(File fq1, File fq2, List<String> params, File outDir, Logger log) throws PipelineJobException;
129129

130-
abstract public File runCiteSeqCount(PipelineStepOutput output, @Nullable String outputCategory, Readset htoReadset, File htoList, File cellBarcodeList, File outputDir, String basename, Logger log, List<String> extraArgs, boolean doHtoFiltering, @Nullable Integer minCountPerCell, File localPipelineDir, @Nullable Integer editDistance, boolean scanEditDistances, Readset parentReadset, @Nullable Integer genomeId, boolean generateHtoCalls, boolean createOutputFiles) throws PipelineJobException;
130+
abstract public File runCiteSeqCount(PipelineStepOutput output, @Nullable String outputCategory, Readset htoReadset, File htoList, File cellBarcodeList, File outputDir, String basename, Logger log, List<String> extraArgs, boolean doHtoFiltering, @Nullable Integer minCountPerCell, File localPipelineDir, @Nullable Integer editDistance, boolean scanEditDistances, Readset parentReadset, @Nullable Integer genomeId, boolean generateHtoCalls, boolean createOutputFiles, boolean useSeurat, boolean useMultiSeq) throws PipelineJobException;
131131
}

SequenceAnalysis/resources/external/scRNAseq/htoClassifier.Rmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ if (nrow(barcodeData) > 0 && ncol(barcodeData) > 0){
4141
```{r GenerateCalls, fig.width=12}
4242
4343
if (nrow(barcodeData) > 0 && ncol(barcodeData) > 0){
44-
dt <- GenerateCellHashingCalls(barcodeData = barcodeData, outFile = finalCallFile, allCallsOutFile = allCallsOutFile)
44+
dt <- GenerateCellHashingCalls(barcodeData = barcodeData, outFile = finalCallFile, allCallsOutFile = allCallsOutFile, useSeurat = useSeurat, useMultiSeq = useMultiSeq)
4545
4646
if (exists('whitelistFile') && !is.null(whitelistFile)){
4747
GenerateSummaryForExpectedBarcodes(dt, whitelistFile=whitelistFile, outputFile=metricsFile, barcodeData=barcodeData)

SequenceAnalysis/resources/external/scRNAseq/htoClassifier.sh

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,12 @@ RAW_CALLS=$4
1414
DO_HTO_FILTER=$5
1515
MIN_READS_PER_CELL=$6
1616
METRICS_FILE=$7
17+
USE_SEURAT=$8
18+
USE_MULTISEQ=$9
19+
1720
WHITELIST="whitelistFile<-NULL;"
18-
if [ $# -ge 8 ];then
19-
WHITELIST="whitelistFile<-'"${8}"';"
21+
if [ $# -ge 10 ];then
22+
WHITELIST="whitelistFile<-'"${10}"';"
2023
fi
2124

2225
ENV_OPTS=""
@@ -33,4 +36,4 @@ fi
3336

3437
sudo $DOCKER pull bimberlab/oosap
3538

36-
sudo $DOCKER run --rm=true $RAM_OPTS $ENV_OPTS -v "${WD}:/work" -v "${HOME}:/homeDir" -u $UID -e USERID=$UID -w /work -e HOME=/homeDir bimberlab/oosap Rscript -e "barcodeDir <- '"${CITESEQ_COUNT_DIR}"';finalCallFile <- '"${FINAL_CALLS}"';doHtoFilter <- "${DO_HTO_FILTER}";maxValueForColSumFilter <- "${MIN_READS_PER_CELL}";allCallsOutFile <- '"${RAW_CALLS}"';metricsFile <- '"${METRICS_FILE}"';"${WHITELIST}"rmarkdown::render('htoClassifier.Rmd', output_file = '"${HTML_FILE}"')"
39+
sudo $DOCKER run --rm=true $RAM_OPTS $ENV_OPTS -v "${WD}:/work" -v "${HOME}:/homeDir" -u $UID -e USERID=$UID -w /work -e HOME=/homeDir bimberlab/oosap Rscript -e "barcodeDir <- '"${CITESEQ_COUNT_DIR}"';finalCallFile <- '"${FINAL_CALLS}"';doHtoFilter <- "${DO_HTO_FILTER}";maxValueForColSumFilter <- "${MIN_READS_PER_CELL}";allCallsOutFile <- '"${RAW_CALLS}"';metricsFile <- '"${METRICS_FILE}"';useSeurat <- ${USE_SEURAT};useMultiSeq <- ${USE_MULTISEQ};"${WHITELIST}"rmarkdown::render('htoClassifier.Rmd', output_file = '"${HTML_FILE}"')"

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -528,10 +528,10 @@ else if (!SequenceUtil.hasLineCount(f))
528528
}
529529

530530
@Override
531-
public File runCiteSeqCount(PipelineStepOutput output, @Nullable String outputCategory, Readset htoReadset, File htoList, File cellBarcodeList, File outputDir, String basename, Logger log, List<String> extraArgs, boolean doHtoFiltering, @Nullable Integer minCountPerCell, File localPipelineDir, @Nullable Integer editDistance, boolean scanEditDistances, Readset parentReadset, @Nullable Integer genomeId, boolean generateHtoCalls, boolean createOutputFiles) throws PipelineJobException
531+
public File runCiteSeqCount(PipelineStepOutput output, @Nullable String outputCategory, Readset htoReadset, File htoList, File cellBarcodeList, File outputDir, String basename, Logger log, List<String> extraArgs, boolean doHtoFiltering, @Nullable Integer minCountPerCell, File localPipelineDir, @Nullable Integer editDistance, boolean scanEditDistances, Readset parentReadset, @Nullable Integer genomeId, boolean generateHtoCalls, boolean createOutputFiles, boolean useSeurat, boolean useMultiSeq) throws PipelineJobException
532532
{
533533
CellHashingHandler handler = new CellHashingHandler();
534534

535-
return handler.runCiteSeqCount(output, outputCategory, htoReadset, htoList, cellBarcodeList, outputDir, basename, log, extraArgs, doHtoFiltering, minCountPerCell, localPipelineDir, editDistance, scanEditDistances, parentReadset, genomeId, generateHtoCalls ? CellHashingHandler.BARCODE_TYPE.hashing : CellHashingHandler.BARCODE_TYPE.citeseq, createOutputFiles);
535+
return handler.runCiteSeqCount(output, outputCategory, htoReadset, htoList, cellBarcodeList, outputDir, basename, log, extraArgs, doHtoFiltering, minCountPerCell, localPipelineDir, editDistance, scanEditDistances, parentReadset, genomeId, generateHtoCalls ? CellHashingHandler.BARCODE_TYPE.hashing : CellHashingHandler.BARCODE_TYPE.citeseq, createOutputFiles, useSeurat, useMultiSeq);
536536
}
537537
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/CellHashingHandler.java

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ else if (line.startsWith("Percentage unmapped"))
444444
if (type.doGenerateCalls())
445445
{
446446
ctx.getJob().setStatus(PipelineJob.TaskStatus.running, "Generating HTO calls for edit distance: " + editDistance);
447-
File htoCalls = generateFinalCalls(outputMatrix.getParentFile(), ctx.getOutputDir(), outputBasename, ctx.getLogger(), null, true, minCountPerCell, ctx.getSourceDirectory());
447+
File htoCalls = generateFinalCalls(outputMatrix.getParentFile(), ctx.getOutputDir(), outputBasename, ctx.getLogger(), null, true, minCountPerCell, ctx.getSourceDirectory(), true, true);
448448
File html = new File(htoCalls.getParentFile(), outputBasename + ".html");
449449

450450
if (!html.exists())
@@ -665,7 +665,7 @@ private File ensureLocalCopy(File input, File outputDir, Logger log, Set<File> t
665665
return input;
666666
}
667667

668-
public File generateFinalCalls(File citeSeqCountOutDir, File outputDir, String basename, Logger log, @Nullable File cellBarcodeWhitelist, boolean doHtoFiltering, Integer minCountPerCell, File localPipelineDir) throws PipelineJobException
668+
private File generateFinalCalls(File citeSeqCountOutDir, File outputDir, String basename, Logger log, @Nullable File cellBarcodeWhitelist, boolean doHtoFiltering, Integer minCountPerCell, File localPipelineDir, boolean useSeurat, boolean useMultiSeq) throws PipelineJobException
669669
{
670670
log.debug("generating final calls from folder: " + citeSeqCountOutDir.getPath());
671671

@@ -695,6 +695,9 @@ public File generateFinalCalls(File citeSeqCountOutDir, File outputDir, String b
695695
File rawCallsFile = new File(outputDir, basename + ".raw.txt");
696696
File metricsFile = getMetricsFile(callsFile);
697697
List<String> args = new ArrayList<>(Arrays.asList("/bin/bash", scriptWrapper, citeSeqCountOutDir.getName(), htmlFile.getName(), callsFile.getName(), rawCallsFile.getName(), (doHtoFiltering ? "T" : "F"), (minCountPerCell == null ? "0" : minCountPerCell.toString()), metricsFile.getName()));
698+
args.add(useSeurat ? "TRUE" : "FALSE");
699+
args.add(useMultiSeq ? "TRUE" : "FALSE");
700+
698701
if (cellBarcodeWhitelist != null)
699702
{
700703
args.add(cellBarcodeWhitelist.getName());
@@ -877,7 +880,7 @@ else if (htoReadset.getReadData().size() != 1)
877880
return ret;
878881
}
879882

880-
public File runCiteSeqCount(PipelineStepOutput output, String category, Readset htoReadset, File htoList, File cellBarcodeList, File outputDir, String basename, Logger log, List<String> extraArgs, boolean doHtoFiltering, @Nullable Integer minCountPerCell, File localPipelineDir, @Nullable Integer editDistance, boolean scanEditDistances, Readset parentReadset, @Nullable Integer genomeId, BARCODE_TYPE type, boolean createOutputFiles) throws PipelineJobException
883+
public File runCiteSeqCount(PipelineStepOutput output, String category, Readset htoReadset, File htoList, File cellBarcodeList, File outputDir, String basename, Logger log, List<String> extraArgs, boolean doHtoFiltering, @Nullable Integer minCountPerCell, File localPipelineDir, @Nullable Integer editDistance, boolean scanEditDistances, Readset parentReadset, @Nullable Integer genomeId, BARCODE_TYPE type, boolean createOutputFiles, boolean useSeurat, boolean useMultiSeq) throws PipelineJobException
881884
{
882885
HtoMergeResult htoFastqs = possiblyMergeHtoFastqs(htoReadset, outputDir, log);
883886
if (!htoFastqs.intermediateFiles.isEmpty())
@@ -964,7 +967,7 @@ public File runCiteSeqCount(PipelineStepOutput output, String category, Readset
964967

965968
File citeSeqCountOutDir = new File(outputDir, basename + ".citeSeqCounts." + ed + "." + type.name());
966969
String outputBasename = basename + "." + ed + "." + type.name();
967-
Map<String, Object> callMap = executeCiteSeqCountWithJobCtx(outputDir, outputBasename, citeSeqCountOutDir, htoFastqs.files.getLeft(), htoFastqs.files.getRight(), toolArgs, ed, log, cellBarcodeList, doHtoFiltering, minCountPerCell, localPipelineDir, unknownBarcodeFile, type);
970+
Map<String, Object> callMap = executeCiteSeqCountWithJobCtx(outputDir, outputBasename, citeSeqCountOutDir, htoFastqs.files.getLeft(), htoFastqs.files.getRight(), toolArgs, ed, log, cellBarcodeList, doHtoFiltering, minCountPerCell, localPipelineDir, unknownBarcodeFile, type, useSeurat, useMultiSeq);
968971
results.put(ed, callMap);
969972

970973
if (type.doGenerateCalls())
@@ -1065,7 +1068,7 @@ public File runCiteSeqCount(PipelineStepOutput output, String category, Readset
10651068
}
10661069
}
10671070

1068-
private Map<String, Object> executeCiteSeqCountWithJobCtx(File outputDir, String basename, File citeSeqCountOutDir, File fastq1, File fastq2, List<String> baseArgs, Integer ed, Logger log, File cellBarcodeList, boolean doHtoFiltering, Integer minCountPerCell, File localPipelineDir, File unknownBarcodeFile, BARCODE_TYPE type) throws PipelineJobException
1071+
private Map<String, Object> executeCiteSeqCountWithJobCtx(File outputDir, String basename, File citeSeqCountOutDir, File fastq1, File fastq2, List<String> baseArgs, Integer ed, Logger log, File cellBarcodeList, boolean doHtoFiltering, Integer minCountPerCell, File localPipelineDir, File unknownBarcodeFile, BARCODE_TYPE type, boolean useSeurat, boolean useMultiSeq) throws PipelineJobException
10691072
{
10701073
CellHashingHandler.CiteSeqCountWrapper wrapper = new CellHashingHandler.CiteSeqCountWrapper(log);
10711074
File doneFile = new File(citeSeqCountOutDir, "citeSeqCount." + type.name() + "." + ed + ".done");
@@ -1132,7 +1135,7 @@ else if (line.startsWith("Percentage unmapped"))
11321135

11331136
if (type.doGenerateCalls())
11341137
{
1135-
File htoCalls = generateFinalCalls(outputMatrix.getParentFile(), outputDir, basename, log, cellBarcodeList, doHtoFiltering, minCountPerCell, localPipelineDir);
1138+
File htoCalls = generateFinalCalls(outputMatrix.getParentFile(), outputDir, basename, log, cellBarcodeList, doHtoFiltering, minCountPerCell, localPipelineDir, useSeurat, useMultiSeq);
11361139
if (!htoCalls.exists())
11371140
{
11381141
throw new PipelineJobException("missing expected file: " + htoCalls.getPath());

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CreateReferenceLibraryTask.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ public RecordedActionSet run() throws PipelineJobException
384384
getJob().getLogger().info("creation complete");
385385

386386
ReferenceGenome rg = SequenceAnalysisService.get().getReferenceGenome(rowId, getJob().getUser());
387-
ReferenceGenomeManager.get().markGenomeModified(rg);
387+
ReferenceGenomeManager.get().markGenomeModified(rg, getJob().getLogger());
388388

389389
Set<GenomeTrigger> triggers = new HashSet<>(getPipelineJob().getExtraTriggers());
390390
if (getPipelineJob().isSkipTriggers())

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ImportGenomeTrackTask.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ public RecordedActionSet run() throws PipelineJobException
143143
}
144144

145145
final int trackId = addTrackForLibrary(getPipelineJob().getTrack(), getPipelineJob().getTrackName(), getPipelineJob().getTrackDescription(), action);
146-
ReferenceGenomeManager.get().markGenomeModified(SequenceAnalysisService.get().getReferenceGenome(getPipelineJob().getLibraryId(), getJob().getUser()));
146+
ReferenceGenomeManager.get().markGenomeModified(SequenceAnalysisService.get().getReferenceGenome(getPipelineJob().getLibraryId(), getJob().getUser()), getJob().getLogger());
147147

148148
Set<GenomeTrigger> triggers = SequenceAnalysisServiceImpl.get().getGenomeTriggers();
149149
if (!triggers.isEmpty())

0 commit comments

Comments
 (0)