Skip to content

Commit 3d40d73

Browse files
authored
Merge pull request #278 from BimberLab/24.3_fb_merge
Merge discvr-23.11 to discvr-24.3
2 parents 1f6482b + eaf2e68 commit 3d40d73

File tree

20 files changed

+601
-179
lines changed

20 files changed

+601
-179
lines changed

SequenceAnalysis/pipeline_code/sequence_tools_install.sh

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -335,30 +335,6 @@ else
335335
fi
336336

337337

338-
#
339-
# BisSNP
340-
#
341-
echo ""
342-
echo ""
343-
echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
344-
echo "Install BisSNP"
345-
echo ""
346-
cd $LKSRC_DIR
347-
348-
if [[ ! -e ${LKTOOLS_DIR}/BisSNP.jar || ! -z $FORCE_REINSTALL ]];
349-
then
350-
echo "Cleaning up previous installs"
351-
rm -Rf BisSNP*
352-
rm -Rf $LKTOOLS_DIR/BisSNP.jar
353-
354-
wget $WGET_OPTS https://downloads.sourceforge.net/project/bissnp/BisSNP-0.82.2/BisSNP-0.82.2.jar
355-
356-
install ./BisSNP-0.82.2.jar $LKTOOLS_DIR/BisSNP.jar
357-
else
358-
echo "Already installed"
359-
fi
360-
361-
362338
#
363339
#mosaik
364340
#

SequenceAnalysis/resources/web/SequenceAnalysis/window/LiftoverWindow.js

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,14 +104,19 @@ Ext4.define('SequenceAnalysis.window.LiftoverWindow', {
104104
maxValue: 1.0,
105105
value: 0.95,
106106
fieldLabel: 'Min Percent Match',
107-
helpPopup: 'In order to lift to the target genome, the feature must have at least this percent match. Lower this value to be more permissive; however, this risks incorrect liftovers',
107+
helpPopup: 'In order to lift to the target genome, the feature must have at least this percent match. Lower this value to be more permissive; however, this risks incorrect liftovers. This is ignored if using bcftools.',
108108
itemId: 'pctField'
109109
},{
110110
xtype: 'checkbox',
111111
itemId: 'dropGenotypes',
112112
checked: false,
113113
helpPopup: 'If checked, no genotypes will be written to the output file (applies to VCFs only). This can be useful (and necessary) when lifting VCFs with extremely high sample number.',
114114
fieldLabel: 'Drop Genotypes'
115+
},{
116+
xtype: 'checkbox',
117+
itemId: 'useBcfTools',
118+
checked: false,
119+
fieldLabel: 'Use bcftools'
115120
}].concat(SequenceAnalysis.window.OutputHandlerWindow.getCfgForToolParameters(this.toolParameters)),
116121
buttons: [{
117122
text: 'Submit',

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
import org.labkey.sequenceanalysis.run.alignment.BowtieWrapper;
7878
import org.labkey.sequenceanalysis.run.alignment.GSnapWrapper;
7979
import org.labkey.sequenceanalysis.run.alignment.MosaikWrapper;
80+
import org.labkey.sequenceanalysis.run.alignment.ParagraphStep;
8081
import org.labkey.sequenceanalysis.run.alignment.Pbmm2Wrapper;
8182
import org.labkey.sequenceanalysis.run.alignment.StarWrapper;
8283
import org.labkey.sequenceanalysis.run.alignment.VulcanWrapper;
@@ -113,6 +114,7 @@
113114
import org.labkey.sequenceanalysis.run.util.FastqcRunner;
114115
import org.labkey.sequenceanalysis.run.util.GenomicsDBAppendHandler;
115116
import org.labkey.sequenceanalysis.run.util.GenomicsDBImportHandler;
117+
import org.labkey.sequenceanalysis.run.util.SVAnnotateStep;
116118
import org.labkey.sequenceanalysis.run.variant.*;
117119
import org.labkey.sequenceanalysis.util.Barcoder;
118120
import org.labkey.sequenceanalysis.util.ChainFileValidator;
@@ -300,6 +302,7 @@ public static void registerPipelineSteps()
300302
SequencePipelineService.get().registerPipelineStep(new MendelianViolationReportStep.Provider());
301303
SequencePipelineService.get().registerPipelineStep(new SummarizeGenotypeQualityStep.Provider());
302304
SequencePipelineService.get().registerPipelineStep(new BcftoolsFillTagsStep.Provider());
305+
SequencePipelineService.get().registerPipelineStep(new SVAnnotateStep.Provider());
303306

304307
//handlers
305308
SequenceAnalysisService.get().registerFileHandler(new LiftoverHandler());
@@ -334,6 +337,7 @@ public static void registerPipelineSteps()
334337
SequenceAnalysisService.get().registerFileHandler(new PbsvJointCallingHandler());
335338
SequenceAnalysisService.get().registerFileHandler(new DeepVariantHandler());
336339
SequenceAnalysisService.get().registerFileHandler(new GLNexusHandler());
340+
SequenceAnalysisService.get().registerFileHandler(new ParagraphStep());
337341

338342
SequenceAnalysisService.get().registerReadsetHandler(new MultiQCHandler());
339343
SequenceAnalysisService.get().registerReadsetHandler(new RestoreSraDataHandler());

SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,15 @@
2828
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
2929
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
3030
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
31+
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep;
3132
import org.labkey.api.sequenceanalysis.run.SelectVariantsWrapper;
3233
import org.labkey.api.util.FileType;
3334
import org.labkey.api.util.FileUtil;
3435
import org.labkey.api.view.ActionURL;
3536
import org.labkey.api.writer.PrintWriters;
3637
import org.labkey.sequenceanalysis.SequenceAnalysisModule;
3738
import org.labkey.sequenceanalysis.pipeline.ProcessVariantsHandler;
39+
import org.labkey.sequenceanalysis.run.util.LiftoverBcfToolsWrapper;
3840
import org.labkey.sequenceanalysis.run.util.LiftoverVcfWrapper;
3941
import org.labkey.sequenceanalysis.util.SequenceUtil;
4042

@@ -49,7 +51,7 @@
4951
/**
5052
* Created by bimber on 8/26/2014.
5153
*/
52-
public class LiftoverHandler implements SequenceOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
54+
public class LiftoverHandler implements SequenceOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>, VariantProcessingStep.SupportsScatterGather
5355
{
5456
private final FileType _bedFileType = new FileType(".bed", false);
5557
//private FileType _gffFileType = new FileType("gff", false);
@@ -60,6 +62,12 @@ public LiftoverHandler()
6062

6163
}
6264

65+
@Override
66+
public boolean doSortAfterMerge()
67+
{
68+
return true;
69+
}
70+
6371
@Override
6472
public String getName()
6573
{
@@ -167,8 +175,9 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
167175
JSONObject params = ctx.getParams();
168176

169177
boolean dropGenotypes = params.optBoolean("dropGenotypes", false);
178+
boolean useBcfTools = params.optBoolean("useBcfTools", false);
170179

171-
Integer chainFileId = params.getInt("chainFileId");
180+
int chainFileId = params.getInt("chainFileId");
172181
File chainFile = ctx.getSequenceSupport().getCachedData(chainFileId);
173182
int targetGenomeId = params.getInt("targetGenomeId");
174183

@@ -217,7 +226,7 @@ else if (_vcfFileType.isType(f.getFile()))
217226
{
218227
ReferenceGenome targetGenome = ctx.getSequenceSupport().getCachedGenome(targetGenomeId);
219228
ReferenceGenome sourceGenome = ctx.getSequenceSupport().getCachedGenome(f.getLibrary_id());
220-
liftOverVcf(ctx, targetGenome, sourceGenome, chainFile, f.getFile(), lifted, unmappedOutput, job, pct, dropGenotypes);
229+
liftOverVcf(ctx, targetGenome, sourceGenome, chainFile, f.getFile(), lifted, unmappedOutput, job, pct, dropGenotypes, useBcfTools);
221230
}
222231
}
223232
catch (Exception e)
@@ -293,7 +302,7 @@ else if (!SequenceUtil.hasLineCount(unmappedOutput))
293302
}
294303
}
295304

296-
public void liftOverVcf(JobContext ctx, ReferenceGenome targetGenome, ReferenceGenome sourceGenome, File chain, File input, File output, @Nullable File unmappedOutput, PipelineJob job, double pct, boolean dropGenotypes) throws IOException, PipelineJobException
305+
public void liftOverVcf(JobContext ctx, ReferenceGenome targetGenome, ReferenceGenome sourceGenome, File chain, File input, File output, @Nullable File unmappedOutput, PipelineJob job, double pct, boolean dropGenotypes, boolean useBcfTools) throws IOException, PipelineJobException
297306
{
298307
File currentVCF = input;
299308
if (dropGenotypes)
@@ -315,8 +324,16 @@ public void liftOverVcf(JobContext ctx, ReferenceGenome targetGenome, ReferenceG
315324
ctx.getFileManager().addIntermediateFile(new File(outputFile.getPath() + ".tbi"));
316325
}
317326

318-
LiftoverVcfWrapper wrapper = new LiftoverVcfWrapper(job.getLogger());
319-
wrapper.doLiftover(currentVCF, chain, targetGenome.getWorkingFastaFile(), unmappedOutput, output, pct);
327+
if (useBcfTools)
328+
{
329+
LiftoverBcfToolsWrapper wrapper = new LiftoverBcfToolsWrapper(job.getLogger());
330+
wrapper.doLiftover(currentVCF, chain, sourceGenome.getWorkingFastaFile(), targetGenome.getWorkingFastaFile(), unmappedOutput, output);
331+
}
332+
else
333+
{
334+
LiftoverVcfWrapper wrapper = new LiftoverVcfWrapper(job.getLogger());
335+
wrapper.doLiftover(currentVCF, chain, targetGenome.getWorkingFastaFile(), unmappedOutput, output, pct);
336+
}
320337

321338
Long mapped = null;
322339
if (output.exists())
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
package org.labkey.sequenceanalysis.run.alignment;
2+
3+
import org.json.JSONObject;
4+
import org.labkey.api.module.ModuleLoader;
5+
import org.labkey.api.pipeline.PipelineJob;
6+
import org.labkey.api.pipeline.PipelineJobException;
7+
import org.labkey.api.pipeline.RecordedAction;
8+
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
9+
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
10+
import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler;
11+
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
12+
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
13+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
14+
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
15+
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
16+
import org.labkey.api.util.FileUtil;
17+
import org.labkey.sequenceanalysis.SequenceAnalysisModule;
18+
import org.labkey.sequenceanalysis.run.variant.DepthOfCoverageHandler;
19+
import org.labkey.sequenceanalysis.util.SequenceUtil;
20+
21+
import java.io.File;
22+
import java.io.IOException;
23+
import java.util.ArrayList;
24+
import java.util.Arrays;
25+
import java.util.List;
26+
27+
public class ParagraphStep extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
28+
{
29+
public ParagraphStep()
30+
{
31+
super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "Paragraph SV Genotyping", "This will run paraGRAPH on one or more BAM files to genotype SVs", null, Arrays.asList(
32+
ToolParameterDescriptor.createExpDataParam("svVCF", "Input VCF", "This is the DataId of the VCF containing the SVs to genotype", "ldk-expdatafield", new JSONObject()
33+
{{
34+
put("allowBlank", false);
35+
}}, null)
36+
));
37+
}
38+
39+
@Override
40+
public boolean canProcess(SequenceOutputFile o)
41+
{
42+
return o.getFile() != null && o.getFile().exists() && SequenceUtil.FILETYPE.bamOrCram.getFileType().isType(o.getFile());
43+
}
44+
45+
@Override
46+
public boolean doRunRemote()
47+
{
48+
return true;
49+
}
50+
51+
@Override
52+
public boolean doRunLocal()
53+
{
54+
return false;
55+
}
56+
57+
@Override
58+
public SequenceOutputProcessor getProcessor()
59+
{
60+
return new DepthOfCoverageHandler.Processor();
61+
}
62+
63+
public static class Processor implements SequenceOutputProcessor
64+
{
65+
@Override
66+
public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List<SequenceOutputFile> inputFiles, JSONObject params, File outputDir, List<RecordedAction> actions, List<SequenceOutputFile> outputsToCreate) throws UnsupportedOperationException, PipelineJobException
67+
{
68+
69+
}
70+
71+
@Override
72+
public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException
73+
{
74+
File inputVCF = ctx.getSequenceSupport().getCachedData(ctx.getParams().getInt("svVCF"));
75+
if (!inputVCF.exists())
76+
{
77+
throw new PipelineJobException("Unable to find file: " + inputVCF.getPath());
78+
}
79+
80+
for (SequenceOutputFile so : inputFiles)
81+
{
82+
List<String> depthArgs = new ArrayList<>();
83+
depthArgs.add("idxdepth");
84+
depthArgs.add("-d");
85+
depthArgs.add(so.getFile().getPath());
86+
87+
File coverageFile = new File(ctx.getWorkingDirectory(), "coverage.txt");
88+
depthArgs.add("-o");
89+
depthArgs.add(coverageFile.getPath());
90+
91+
depthArgs.add("-r");
92+
depthArgs.add(ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile().getPath());
93+
94+
new SimpleScriptWrapper(ctx.getLogger()).execute(depthArgs);
95+
96+
if (!coverageFile.exists())
97+
{
98+
throw new PipelineJobException("Missing file: " + coverageFile.getPath());
99+
}
100+
101+
// Should produce a simple text file:
102+
// id path depth read length
103+
// TNPRC-IB18 ../IB18.cram 29.77 150
104+
105+
List<String> paragraphArgs = new ArrayList<>();
106+
paragraphArgs.add("multigrmpy.py");
107+
paragraphArgs.add("--verbose");
108+
109+
File paragraphOut = new File(ctx.getWorkingDirectory(), FileUtil.getBaseName(so.getFile()) + ".paragraph.txt");
110+
paragraphArgs.add("-o");
111+
paragraphArgs.add(paragraphOut.getPath());
112+
113+
int svVcfId = ctx.getParams().optInt("svVCF");
114+
if (svVcfId == 0)
115+
{
116+
throw new PipelineJobException("Missing svVCF ID");
117+
}
118+
119+
File svVcf = ctx.getSequenceSupport().getCachedData(svVcfId);
120+
if (svVcf == null)
121+
{
122+
throw new PipelineJobException("File not found for ID: " + svVcfId);
123+
}
124+
else if (!svVcf.exists())
125+
{
126+
throw new PipelineJobException("Missing file: " + svVcf.getPath());
127+
}
128+
129+
paragraphArgs.add("-i");
130+
paragraphArgs.add(svVcf.getPath());
131+
132+
paragraphArgs.add("-m");
133+
paragraphArgs.add(coverageFile.getPath());
134+
135+
paragraphArgs.add("-r");
136+
paragraphArgs.add(ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()).getWorkingFastaFile().getPath());
137+
138+
paragraphArgs.add("--scratch-dir");
139+
paragraphArgs.add(SequencePipelineService.get().getJavaTempDir());
140+
141+
Integer threads = SequencePipelineService.get().getMaxThreads(ctx.getLogger());
142+
if (threads != null)
143+
{
144+
paragraphArgs.add("--threads");
145+
paragraphArgs.add(threads.toString());
146+
}
147+
148+
paragraphArgs.add("--logfile");
149+
paragraphArgs.add(new File(ctx.getWorkingDirectory(), "paragraph.log").getPath());
150+
151+
new SimpleScriptWrapper(ctx.getLogger()).execute(paragraphArgs);
152+
153+
File genotypes = new File(ctx.getWorkingDirectory(), "genotypes.vcf.gz");
154+
if (!genotypes.exists())
155+
{
156+
throw new PipelineJobException("Missing file: " + genotypes.getPath());
157+
}
158+
159+
try
160+
{
161+
SequenceAnalysisService.get().ensureVcfIndex(genotypes, ctx.getLogger());
162+
}
163+
catch (IOException e)
164+
{
165+
throw new PipelineJobException(e);
166+
}
167+
168+
ctx.getFileManager().addSequenceOutput(genotypes, "paraGRAPH Genotypes: " + so.getName(), "paraGRAPH Genoypes", so.getReadset(), null, so.getLibrary_id(), "Input VCF: " + svVcf.getName() + " (" + svVcfId + ")");
169+
}
170+
}
171+
}
172+
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/StarWrapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ public StarWrapper(@Nullable Logger logger)
5353

5454
public static class StarAlignmentStep extends AbstractAlignmentPipelineStep<StarWrapper> implements AlignmentStep
5555
{
56-
public StarAlignmentStep(AlignmentStepProvider provider, PipelineContext ctx)
56+
public StarAlignmentStep(AlignmentStepProvider<?> provider, PipelineContext ctx)
5757
{
5858
super(provider, ctx, new StarWrapper(ctx.getLogger()));
5959
}

0 commit comments

Comments
 (0)