Skip to content

Commit d26f5cf

Browse files
authored
Merge pull request #104 from LabKey/fb_merge_discvr-21.3
Merge discvr-21.3 to develop
2 parents 9bef94f + c50b226 commit d26f5cf

File tree

10 files changed

+250
-39
lines changed

10 files changed

+250
-39
lines changed

SequenceAnalysis/build.gradle

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,3 +67,20 @@ if (project.findProject(BuildUtils.getTestProjectPath(project.gradle)) != null &
6767
dependsOn(createPipelineConfigTask)
6868
}
6969
}
70+
71+
project.task("copyJars",
72+
type: Copy,
73+
group: "Build",
74+
description: "Copy commons-math3 JAR to module's lib directory",
75+
{ CopySpec copy ->
76+
copy.setDuplicatesStrategy(DuplicatesStrategy.EXCLUDE)
77+
copy.from(project.configurations.external)
78+
copy.into new File("${project.labkey.explodedModuleLibDir}")
79+
copy.include {
80+
"**commons-math3-**.jar"
81+
}
82+
}
83+
)
84+
85+
project.tasks.module.dependsOn(project.tasks.copyJars)
86+
project.tasks.copyJars.mustRunAfter(project.tasks.populateExplodedLib)

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@
131131
import org.labkey.sequenceanalysis.run.reference.SavedReferenceLibraryStep;
132132
import org.labkey.sequenceanalysis.run.reference.VirusReferenceLibraryStep;
133133
import org.labkey.sequenceanalysis.run.util.CombineGVCFsHandler;
134+
import org.labkey.sequenceanalysis.run.util.FastqcRunner;
134135
import org.labkey.sequenceanalysis.run.util.GenomicsDBAppendHandler;
135136
import org.labkey.sequenceanalysis.run.util.GenomicsDBImportHandler;
136137
import org.labkey.sequenceanalysis.run.variant.CombineVariantsHandler;
@@ -476,7 +477,8 @@ public Set<Class> getUnitTests()
476477
ProcessVariantsHandler.TestCase.class,
477478
VariantProcessingJob.TestCase.class,
478479
ScatterGatherUtils.TestCase.class,
479-
ChainFileValidator.TestCase.class
480+
ChainFileValidator.TestCase.class,
481+
FastqcRunner.TestCase.class
480482
);
481483
}
482484

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/LofreqAnalysis.java

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,11 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
613613
File pindelOutput = PindelAnalysis.runPindel(output, getPipelineCtx(), rs, outputDir, inputBam, referenceGenome.getWorkingFastaFile(), minFraction, minDepth, true, coverageOut, minInsertSize);
614614
try (CSVReader reader = new CSVReader(Readers.getReader(pindelOutput), '\t'))
615615
{
616+
final int MAX_DEL_EVENT_COVERAGE = 20;
617+
final double MIN_AF = 0.25;
618+
final int MIN_LENGTH_TO_CONSIDER = 10;
619+
final int MAX_DELETION_LENGTH = 5000;
620+
616621
String[] line;
617622
while ((line = reader.readNext()) != null)
618623
{
@@ -621,10 +626,50 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
621626
continue;
622627
}
623628

624-
if (Double.parseDouble(line[6]) >= 0.35)
629+
int start = Integer.parseInt(line[2]); //1-based, coordinate prior, like VCF
630+
int end = Integer.parseInt(line[3]); //1-based, actual coordinate, like VCF
631+
String refAllele = line[11];
632+
String altAllele = line[12];
633+
int refLength = end - start;
634+
int altLength = altAllele.length();
635+
636+
// Assume LoFreq calls these well enough:
637+
if (refLength < MIN_LENGTH_TO_CONSIDER && altLength < MIN_LENGTH_TO_CONSIDER)
638+
{
639+
continue;
640+
}
641+
642+
if ("D".equals(line[0]) && refLength > MAX_DELETION_LENGTH)
643+
{
644+
continue;
645+
}
646+
647+
if (Double.parseDouble(line[6]) < MIN_AF)
625648
{
626-
indelMap.put(line[0], indelMap.getOrDefault(line[0], 0) + 1);
649+
continue;
650+
}
651+
652+
double eventCoverage = 0.0;
653+
if (StringUtils.trimToNull(line[11]) != null)
654+
{
655+
eventCoverage = Double.parseDouble(line[11]);
656+
}
657+
658+
if ("D".equals(line[0]) && eventCoverage > MAX_DEL_EVENT_COVERAGE)
659+
{
660+
continue;
627661
}
662+
663+
indelMap.put(line[0], indelMap.getOrDefault(line[0], 0) + 1);
664+
665+
VariantContextBuilder vcb = new VariantContextBuilder();
666+
vcb.start(start);
667+
vcb.stop(end);
668+
vcb.chr(line[1]);
669+
vcb.alleles(Arrays.asList(Allele.create(refAllele, true), Allele.create(altAllele)));
670+
vcb.attribute("AF", Double.parseDouble(line[6]));
671+
int dp = "I".equals(line[0]) ? Integer.parseInt(line[4]) : (int)Double.parseDouble(line[10]);
672+
vcb.attribute("DP", dp);
628673
}
629674
}
630675
catch (IOException e)
@@ -670,7 +715,9 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
670715
if (pangolinData != null)
671716
{
672717
writer.writeNext(new String[]{"Pangolin", "PangolinLineage", pangolinData[1]});
673-
writer.writeNext(new String[]{"Pangolin", "PangolinLineageConfidence", pangolinData[2]});
718+
writer.writeNext(new String[]{"Pangolin", "PangolinConflicts", pangolinData[2]});
719+
writer.writeNext(new String[]{"Pangolin", "PangolinVersions", pangolinData[3]});
720+
writer.writeNext(new String[]{"Pangolin", "PangolinVersions", pangolinData[4]});
674721
}
675722
else
676723
{

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PangolinHandler.java

Lines changed: 66 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import au.com.bytecode.opencsv.CSVReader;
44
import au.com.bytecode.opencsv.CSVWriter;
55
import htsjdk.samtools.util.IOUtil;
6+
import org.apache.commons.io.FileUtils;
67
import org.apache.commons.lang3.StringUtils;
78
import org.apache.logging.log4j.Logger;
89
import org.json.JSONObject;
@@ -30,6 +31,7 @@
3031
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
3132
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
3233
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
34+
import org.labkey.api.util.FileUtil;
3335
import org.labkey.api.util.PageFlowUtil;
3436
import org.labkey.sequenceanalysis.SequenceAnalysisModule;
3537
import org.labkey.sequenceanalysis.SequenceAnalysisSchema;
@@ -102,6 +104,10 @@ public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport
102104
row1.put("metricName", "PangolinLineage");
103105
row1.put("qualvalue", line[1]);
104106
row1.put("container", so.getContainer());
107+
if (StringUtils.trimToNull(line[3]) != null)
108+
{
109+
row1.put("comment", line[3]);
110+
}
105111
toInsert.add(row1);
106112

107113
if (StringUtils.trimToNull(line[2]) != null)
@@ -111,11 +117,32 @@ public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport
111117
row2.put("readset", so.getReadset());
112118
row2.put("analysis_id", so.getAnalysis_id());
113119
row2.put("category", "Pangolin");
114-
row2.put("metricName", "PangolinLineageConfidence");
120+
row2.put("metricName", "PangolinConflicts");
115121
row2.put("value", Double.parseDouble(line[2]));
116122
row2.put("container", so.getContainer());
123+
if (StringUtils.trimToNull(line[3]) != null)
124+
{
125+
row2.put("comment", line[3]);
126+
}
117127
toInsert.add(row2);
118128
}
129+
130+
if (StringUtils.trimToNull(line[4]) != null)
131+
{
132+
Map<String, Object> row = new CaseInsensitiveHashMap<>();
133+
row.put("dataid", so.getDataId());
134+
row.put("readset", so.getReadset());
135+
row.put("analysis_id", so.getAnalysis_id());
136+
row.put("category", "Pangolin");
137+
row.put("metricName", "PangolinSummary");
138+
row.put("qualvalue", line[4]);
139+
row.put("container", so.getContainer());
140+
if (StringUtils.trimToNull(line[3]) != null)
141+
{
142+
row.put("comment", line[3]);
143+
}
144+
toInsert.add(row);
145+
}
119146
}
120147
}
121148
catch (IOException e)
@@ -167,7 +194,29 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
167194
for (SequenceOutputFile so : inputFiles)
168195
{
169196
String[] pangolinData = runPangolin(so.getFile(), ctx.getLogger(), ctx.getFileManager());
170-
writer.writeNext(new String[]{String.valueOf(so.getRowid()), (pangolinData == null ? "QC Fail" : pangolinData[1]), (pangolinData == null ? "" : pangolinData[2])});
197+
198+
List<String> versions = new ArrayList<>();
199+
if (pangolinData != null)
200+
{
201+
if (StringUtils.trimToNull(pangolinData[3]) != null)
202+
{
203+
versions.add("Pangolin version: " + pangolinData[3]);
204+
}
205+
206+
if (StringUtils.trimToNull(pangolinData[4]) != null)
207+
{
208+
versions.add("pangoLEARN version: " + pangolinData[4]);
209+
}
210+
211+
if (StringUtils.trimToNull(pangolinData[5]) != null)
212+
{
213+
versions.add("pango version: " + pangolinData[5]);
214+
}
215+
}
216+
217+
String comment = StringUtils.join(versions, ",");
218+
219+
writer.writeNext(new String[]{String.valueOf(so.getRowid()), (pangolinData == null ? "QC Fail" : pangolinData[1]), (pangolinData == null ? "" : pangolinData[2]), comment, (pangolinData == null ? "" : pangolinData[7])});
171220
}
172221
}
173222
catch (IOException e)
@@ -192,6 +241,11 @@ public static void updatePangolinRefs(Logger log) throws PipelineJobException
192241
wrapper.execute(Arrays.asList("/bin/bash", pangolin.getPath()));
193242
}
194243

244+
public static File getRenamedPangolinOutput(File consensusFasta)
245+
{
246+
return new File(consensusFasta.getParentFile(), FileUtil.getBaseName(consensusFasta) + ".pangolin.csv");
247+
}
248+
195249
public static String[] runPangolin(File consensusFasta, Logger log, PipelineOutputTracker tracker) throws PipelineJobException
196250
{
197251
SimpleScriptWrapper wrapper = new SimpleScriptWrapper(log);
@@ -211,17 +265,21 @@ public static String[] runPangolin(File consensusFasta, Logger log, PipelineOutp
211265
throw new PipelineJobException("Pangolin output not found: " + output.getPath());
212266
}
213267

214-
tracker.addIntermediateFile(output);
215-
try (CSVReader reader = new CSVReader(Readers.getReader(output)))
268+
try
216269
{
217-
reader.readNext(); //header
218-
String[] line = reader.readNext();
270+
File outputMoved = getRenamedPangolinOutput(consensusFasta);
271+
FileUtils.moveFile(output, outputMoved);
272+
try (CSVReader reader = new CSVReader(Readers.getReader(outputMoved)))
273+
{
274+
reader.readNext(); //header
275+
String[] line = reader.readNext();
219276

220-
return line;
277+
return line;
278+
}
221279
}
222280
catch (IOException e)
223281
{
224-
throw new PipelineJobException();
282+
throw new PipelineJobException(e);
225283
}
226284
}
227285
}

0 commit comments

Comments
 (0)