Skip to content

Commit 8ae8e16

Browse files
authored
Merge pull request #56 from LabKey/fb_merge_discvr-20.7
Merge discvr-20.7 to develop
2 parents 5e1a454 + 659b841 commit 8ae8e16

24 files changed

+948
-175
lines changed

SequenceAnalysis/resources/external/scRNAseq/htoClassifier.Rmd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ if (cores != ''){
1919

2020
## Basic QC and Filtering on input:
2121

22-
```{r QC}
22+
```{r QC, fig.width=12}
2323
2424
barcodeData <- ProcessCiteSeqCount(bFile = barcodeDir, doRowFilter = doHtoFilter, maxValueForColSumFilter = maxValueForColSumFilter)
2525
if (nrow(barcodeData) == 0) {
@@ -38,7 +38,7 @@ if (nrow(barcodeData) > 0 && ncol(barcodeData) > 0){
3838

3939
## Generate calls
4040

41-
```{r GenerateCalls}
41+
```{r GenerateCalls, fig.width=12}
4242
4343
if (nrow(barcodeData) > 0 && ncol(barcodeData) > 0){
4444
dt <- GenerateCellHashingCalls(barcodeData = barcodeData, outFile = finalCallFile, allCallsOutFile = allCallsOutFile)
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
<query xmlns="http://labkey.org/data/xml/query">
2+
<metadata>
3+
<tables xmlns="http://labkey.org/data/xml">
4+
<table tableName="quality_metrics_analyses_pivoted" tableDbType="TABLE">
5+
<tableTitle>Quality Metrics By Analysis</tableTitle>
6+
<columns>
7+
<column columnName="analysis_id">
8+
<isKeyField>true</isKeyField>
9+
</column>
10+
</columns>
11+
</table>
12+
</tables>
13+
</metadata>
14+
</query>
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
select
2+
analysis_id,
3+
max(readset) as readset,
4+
--container,
5+
max(category) as category,
6+
count(*) as records,
7+
metricName,
8+
avg(metricValue) as metricValue
9+
10+
from sequenceanalysis.quality_metrics q
11+
where (category is null or category not in ('FIRST_OF_PAIR', 'SECOND_OF_PAIR'))
12+
group by analysis_id, metricName
13+
pivot metricValue by metricName

SequenceAnalysis/resources/queries/sequenceanalysis/quality_metrics_pivoted.query.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<metadata>
33
<tables xmlns="http://labkey.org/data/xml">
44
<table tableName="quality_metrics_pivoted" tableDbType="TABLE">
5-
<tableTitle>Quality Metrics</tableTitle>
5+
<tableTitle>Quality Metrics By File</tableTitle>
66
<columns>
77
<column columnName="dataId">
88
<isKeyField>true</isKeyField>

SequenceAnalysis/resources/schemas/sequenceanalysis.xml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,15 @@
484484
<columnTitle>SRA Run</columnTitle>
485485
<url>https://trace.ncbi.nlm.nih.gov/Traces/sra/?run=${sra_accession}</url>
486486
</column>
487+
<column columnName="metrics" wrappedColumnName="rowid">
488+
<columnTitle>Quality Metrics</columnTitle>
489+
<isUnselectable>true</isUnselectable>
490+
<fk>
491+
<fkDbSchema>sequenceanalysis</fkDbSchema>
492+
<fkTable>quality_metrics_analyses_pivoted</fkTable>
493+
<fkColumnName>analysis_id</fkColumnName>
494+
</fk>
495+
</column>
487496
</columns>
488497
<buttonBarOptions includeStandardButtons="false">
489498
<includeScript>laboratory.context</includeScript>

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@
8181
import org.labkey.sequenceanalysis.run.analysis.LofreqAnalysis;
8282
import org.labkey.sequenceanalysis.run.analysis.MergeLoFreqVcfHandler;
8383
import org.labkey.sequenceanalysis.run.analysis.PARalyzerAnalysis;
84+
import org.labkey.sequenceanalysis.run.analysis.PindelAnalysis;
8485
import org.labkey.sequenceanalysis.run.analysis.SequenceBasedTypingAnalysis;
8586
import org.labkey.sequenceanalysis.run.analysis.SnpCountAnalysis;
8687
import org.labkey.sequenceanalysis.run.analysis.SubreadAnalysis;
@@ -281,6 +282,7 @@ public static void registerPipelineSteps()
281282
SequencePipelineService.get().registerPipelineStep(new SubreadAnalysis.Provider());
282283
SequencePipelineService.get().registerPipelineStep(new TagPcrSummaryStep.Provider());
283284
SequencePipelineService.get().registerPipelineStep(new LofreqAnalysis.Provider());
285+
SequencePipelineService.get().registerPipelineStep(new PindelAnalysis.Provider());
284286

285287
//SequencePipelineService.get().registerPipelineStep(new BlastUnmappedReadAnalysis.Provider());
286288
SequencePipelineService.get().registerPipelineStep(new PARalyzerAnalysis.Provider());

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceProvider.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,12 +219,16 @@ public List<TabbedReportItem> getTabbedReportItems(Container c, User u)
219219
TabbedReportItem analyses = new QueryTabbedReportItem(cache, this, SequenceAnalysisSchema.SCHEMA_NAME, SequenceAnalysisSchema.TABLE_ANALYSES, "Sequence Analyses", category);
220220
analyses.setSubjectIdFieldKey(FieldKey.fromString("readset/subjectid"));
221221
analyses.setSampleDateFieldKey(FieldKey.fromString("readset/sampledate"));
222+
analyses.setAllProjectsFieldKey(FieldKey.fromString("readset/allProjectsPivot"));
223+
analyses.setOverlappingProjectsFieldKey(FieldKey.fromString("readset/overlappingProjectsPivot"));
222224
analyses.setOwnerKey(owner.getPropertyManagerKey());
223225
items.add(analyses);
224226

225227
TabbedReportItem outputs = new QueryTabbedReportItem(cache, this, SequenceAnalysisSchema.SCHEMA_NAME, SequenceAnalysisSchema.TABLE_OUTPUTFILES, "Sequence Outputs", category);
226228
outputs.setSubjectIdFieldKey(FieldKey.fromString("readset/subjectid"));
227229
outputs.setSampleDateFieldKey(FieldKey.fromString("readset/sampledate"));
230+
outputs.setAllProjectsFieldKey(FieldKey.fromString("readset/allProjectsPivot"));
231+
outputs.setOverlappingProjectsFieldKey(FieldKey.fromString("readset/overlappingProjectsPivot"));
228232
outputs.setOwnerKey(owner.getPropertyManagerKey());
229233
items.add(outputs);
230234

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/TaskFileManagerImpl.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -836,6 +836,7 @@ public void cleanup(Collection<RecordedAction> actions, @Nullable AbstractResume
836836
{
837837
_job.getLogger().debug("performing file cleanup");
838838
_job.setStatus(PipelineJob.TaskStatus.running, "PERFORMING FILE CLEANUP");
839+
_job.setErrors(0);
839840

840841
_job.getLogger().debug("transferring " + _outputsToCreate.size() + " sequence outputs to pipeline job, existing: " + _job.getOutputsToCreate().size());
841842
for (SequenceOutputFile so : _outputsToCreate)

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/CellRangerWrapper.java

Lines changed: 9 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@
6161

6262
public class CellRangerWrapper extends AbstractCommandWrapper
6363
{
64+
public static final String GTF_FILE = "GTF File";
65+
6466
public CellRangerWrapper(@Nullable Logger logger)
6567
{
6668
super(logger);
@@ -94,10 +96,7 @@ public Provider()
9496
put("extensions", Arrays.asList("gtf"));
9597
put("width", 400);
9698
put("allowBlank", false);
97-
}}, null),
98-
ToolParameterDescriptor.create("premrna", "Use pre-mRNA GTF", "Normally, reads are only counted if they overlap exons. If selected, the pipeline will convert the GTF to list all transcript intervals as exon, meaning reads within introns will be counted as well. This could be useful for single-nuclei sequencing (which captures pre-mRNA), or if your GTF exon annotations may be lacking.", "checkbox", new JSONObject(){{
99-
100-
}}, false)
99+
}}, null)
101100
), PageFlowUtil.set("sequenceanalysis/field/GenomeFileSelectorField.js"), "https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger", true, false, ALIGNMENT_MODE.MERGE_THEN_ALIGN);
102101
}
103102

@@ -138,7 +137,6 @@ public String getAlignmentDescription()
138137

139138
protected static String getAlignDescription(PipelineStepProvider provider, PipelineContext ctx, int stepIdx, boolean addAligner)
140139
{
141-
boolean isPreMrna = isPreMrna(provider, ctx, stepIdx);
142140
Integer gtfId = provider.getParameterByName("gtfFile").extractValue(ctx.getJob(), provider, stepIdx, Integer.class);
143141
File gtfFile = ctx.getSequenceSupport().getCachedData(gtfId);
144142
if (gtfFile == null)
@@ -161,11 +159,6 @@ protected static String getAlignDescription(PipelineStepProvider provider, Pipel
161159
lines.add("GTF: " + gtfFile.getName());
162160
}
163161

164-
if (isPreMrna)
165-
{
166-
lines.add("Converted to pre-mRNA GTF");
167-
}
168-
169162
return lines.isEmpty() ? null : StringUtils.join(lines, '\n');
170163
}
171164

@@ -178,14 +171,7 @@ public String getIndexCachedDirName(PipelineJob job)
178171
throw new IllegalArgumentException("Missing gtfFile parameter");
179172
}
180173

181-
boolean premrna = isPreMrna(getProvider(), getPipelineCtx(), getStepIdx());
182-
183-
return "cellRanger-" + gtfId + (premrna ? "-premrna" : "");
184-
}
185-
186-
private static boolean isPreMrna(PipelineStepProvider provider, PipelineContext ctx, int stepIdx)
187-
{
188-
return provider.getParameterByName("premrna").extractValue(ctx.getJob(), provider, stepIdx, Boolean.class, false);
174+
return "cellRanger-" + gtfId;
189175
}
190176

191177
@Override
@@ -227,13 +213,6 @@ public IndexOutput createIndex(ReferenceGenome referenceGenome, File outputDir)
227213

228214
File gtfEdit = new File(indexDir.getParentFile(), FileUtil.getBaseName(gtfFile) + ".geneId.gtf");
229215

230-
//See: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/advanced/references
231-
boolean premrna = getProvider().getParameterByName("premrna").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, false);
232-
if (premrna)
233-
{
234-
getPipelineCtx().getLogger().debug("Creating a pre-mRNA version of the GTF");
235-
}
236-
237216
try (CSVReader reader = new CSVReader(Readers.getReader(gtfFile), '\t', CSVWriter.NO_QUOTE_CHARACTER); CSVWriter writer = new CSVWriter(PrintWriters.getPrintWriter(gtfEdit), '\t', CSVWriter.NO_QUOTE_CHARACTER, CSVWriter.NO_ESCAPE_CHARACTER))
238217
{
239218
String[] line;
@@ -252,12 +231,6 @@ public IndexOutput createIndex(ReferenceGenome referenceGenome, File outputDir)
252231
continue;
253232
}
254233

255-
if (premrna && "transcript".equalsIgnoreCase(line[2]))
256-
{
257-
exonsAdded++;
258-
line[2] = "exon";
259-
}
260-
261234
writer.writeNext(line);
262235
}
263236
}
@@ -271,12 +244,7 @@ public IndexOutput createIndex(ReferenceGenome referenceGenome, File outputDir)
271244
getPipelineCtx().getLogger().info("dropped " + linesDropped + " lines lacking gene_id, transcript_id, or with an empty value for gene_id/transcript_id");
272245
}
273246

274-
if (premrna)
275-
{
276-
getPipelineCtx().getLogger().info("total transcripts converted to exon: " + exonsAdded);
277-
}
278-
279-
boolean useAlternateGtf = linesDropped > 0 || premrna;
247+
boolean useAlternateGtf = linesDropped > 0;
280248
if (useAlternateGtf)
281249
{
282250
gtfFile = gtfEdit;
@@ -365,6 +333,10 @@ public AlignmentOutput performAlignment(Readset rs, File inputFastq1, @Nullable
365333
args.add("--sample=" + StringUtils.join(sampleNames, ","));
366334
}
367335

336+
Integer gtfId = getProvider().getParameterByName("gtfFile").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class);
337+
File gtfFile = getPipelineCtx().getSequenceSupport().getCachedData(gtfId);
338+
output.addInput(gtfFile, GTF_FILE);
339+
368340
File indexDir = AlignerIndexUtil.getWebserverIndexDir(referenceGenome, getIndexCachedDirName(getPipelineCtx().getJob()));
369341
args.add("--transcriptome=" + indexDir.getPath());
370342

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/LofreqAnalysis.java

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,23 @@ public Provider()
116116
put("minValue", 0.5);
117117
put("maxValue", 1.0);
118118
put("decimalPrecision", 2);
119-
}}, 0.5)
119+
}}, 0.5),
120+
ToolParameterDescriptor.create("minFraction", "Pindel Min Fraction To Report", "Only variants representing at least this fraction of reads (based on depth at the start position) will be reported.", "ldk-numberfield", new JSONObject()
121+
{{
122+
put("minValue", 0.0);
123+
put("maxValue", 1.0);
124+
put("decimalPrecision", 2);
125+
}}, 0.1),
126+
ToolParameterDescriptor.create("minDepth", "Pindel Min Depth To Report", "Only variants representing at least this many reads (based on depth at the start position) will be reported.", "ldk-integerfield", new JSONObject()
127+
{{
128+
put("minValue", 0);
129+
}}, 10),
130+
ToolParameterDescriptor.create("minInsertSize", "Min Insert Size", "Normally this tool will use the value of Picard CollectInsertSizeMetrics as the mean insert size to pass to pindel; however, this value can be used to set a minimum.", "ldk-integerfield", new JSONObject()
131+
{{
132+
put("minValue", 0);
133+
}}, 200)
120134

121-
), null, "http://csb5.github.io/lofreq/");
135+
), PageFlowUtil.set("sequenceanalysis/field/GenomeFileSelectorField.js"), "http://csb5.github.io/lofreq/");
122136
}
123137

124138

@@ -129,22 +143,9 @@ public LofreqAnalysis create(PipelineContext ctx)
129143
}
130144
}
131145

132-
133-
@Override
134-
public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, ReferenceGenome referenceGenome, File outputDir) throws PipelineJobException
146+
public static void runDepthOfCoverage(PipelineContext ctx, AnalysisOutputImpl output, File outputDir, ReferenceGenome referenceGenome, File inputBam, File coverageOut) throws PipelineJobException
135147
{
136-
AnalysisOutputImpl output = new AnalysisOutputImpl();
137-
138-
File outputVcf = new File(outputDir, FileUtil.getBaseName(inputBam) + ".lofreq.vcf.gz");
139-
File outputVcfSnpEff = new File(outputDir, FileUtil.getBaseName(inputBam) + ".lofreq.snpeff.vcf.gz");
140-
141-
//LoFreq
142-
getWrapper().execute(inputBam, outputVcf, referenceGenome.getWorkingFastaFile(), SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger()));
143-
144-
//Add depth for downstream use:
145-
File coverageOut = new File(outputDir, SequenceAnalysisService.get().getUnzippedBaseName(outputVcf.getName()) + ".coverage");
146-
147-
DepthOfCoverageWrapper wrapper = new DepthOfCoverageWrapper(getPipelineCtx().getLogger());
148+
DepthOfCoverageWrapper wrapper = new DepthOfCoverageWrapper(ctx.getLogger());
148149
List<String> extraArgs = new ArrayList<>();
149150
extraArgs.add("--include-deletions");
150151
extraArgs.add("--omit-per-sample-statistics");
@@ -159,6 +160,22 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
159160
{
160161
throw new PipelineJobException("Unable to find file: " + coverageOut.getPath());
161162
}
163+
}
164+
165+
@Override
166+
public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, ReferenceGenome referenceGenome, File outputDir) throws PipelineJobException
167+
{
168+
AnalysisOutputImpl output = new AnalysisOutputImpl();
169+
170+
File outputVcf = new File(outputDir, FileUtil.getBaseName(inputBam) + ".lofreq.vcf.gz");
171+
File outputVcfSnpEff = new File(outputDir, FileUtil.getBaseName(inputBam) + ".lofreq.snpeff.vcf.gz");
172+
173+
//LoFreq
174+
getWrapper().execute(inputBam, outputVcf, referenceGenome.getWorkingFastaFile(), SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger()));
175+
176+
//Add depth for downstream use:
177+
File coverageOut = new File(outputDir, SequenceAnalysisService.get().getUnzippedBaseName(outputVcf.getName()) + ".coverage");
178+
runDepthOfCoverage(getPipelineCtx(), output, outputDir, referenceGenome, inputBam, coverageOut);
162179

163180
//Create a BED file with all regions of coverage below MIN_COVERAGE:
164181
int minCoverage = getProvider().getParameterByName("minCoverage").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class);
@@ -306,7 +323,7 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
306323

307324
//SnpEff:
308325
Integer geneFileId = getProvider().getParameterByName(SNPEffStep.GENE_PARAM).extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class);
309-
File snpEffBaseDir = SNPEffStep.checkOrCreateIndex(getPipelineCtx(), referenceGenome, geneFileId);
326+
File snpEffBaseDir = SNPEffStep.checkOrCreateIndex(getPipelineCtx().getSequenceSupport(), getPipelineCtx().getLogger(), referenceGenome, geneFileId);
310327

311328
SnpEffWrapper snpEffWrapper = new SnpEffWrapper(getPipelineCtx().getLogger());
312329
snpEffWrapper.runSnpEff(referenceGenome.getGenomeId(), geneFileId, snpEffBaseDir, outputVcf, outputVcfSnpEff, null);
@@ -569,6 +586,12 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
569586
throw new PipelineJobException(e);
570587
}
571588

589+
Double minFraction = getProvider().getParameterByName("minFraction").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Double.class, 0.0);
590+
int minDepth = getProvider().getParameterByName("minDepth").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class, 0);
591+
int minInsertSize = getProvider().getParameterByName("minInsertSize").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class, 0);
592+
593+
PindelAnalysis.runPindel(output, getPipelineCtx(), rs, outputDir, inputBam, referenceGenome.getWorkingFastaFile(), minFraction, minDepth, true, coverageOut, minInsertSize);
594+
572595
return output;
573596
}
574597

0 commit comments

Comments
 (0)