|
1 | 1 | package org.labkey.sequenceanalysis.run.analysis; |
2 | 2 |
|
| 3 | +import au.com.bytecode.opencsv.CSVReader; |
| 4 | +import au.com.bytecode.opencsv.CSVWriter; |
3 | 5 | import htsjdk.samtools.util.CloseableIterator; |
| 6 | +import htsjdk.samtools.util.IOUtil; |
| 7 | +import htsjdk.samtools.util.Interval; |
4 | 8 | import htsjdk.variant.variantcontext.VariantContext; |
5 | 9 | import htsjdk.variant.vcf.VCFFileReader; |
6 | 10 | import org.apache.log4j.Logger; |
7 | 11 | import org.jetbrains.annotations.Nullable; |
8 | 12 | import org.json.JSONObject; |
9 | 13 | import org.labkey.api.pipeline.PipelineJobException; |
| 14 | +import org.labkey.api.reader.Readers; |
10 | 15 | import org.labkey.api.sequenceanalysis.SequenceAnalysisService; |
11 | 16 | import org.labkey.api.sequenceanalysis.model.AnalysisModel; |
12 | 17 | import org.labkey.api.sequenceanalysis.model.Readset; |
@@ -53,7 +58,11 @@ public Provider() |
53 | 58 | put("extensions", Arrays.asList("gtf", "gff")); |
54 | 59 | put("width", 400); |
55 | 60 | put("allowBlank", false); |
56 | | - }}, null) |
| 61 | + }}, null), |
| 62 | + ToolParameterDescriptor.create("minCoverage", "Min Coverage For Consensus", "If provided, a consensus will only be called over regions with at least this depth", "ldk-integerfield", new JSONObject(){{ |
| 63 | + put("minValue", 0); |
| 64 | + }}, 25) |
| 65 | + |
57 | 66 | ), null, "http://csb5.github.io/lofreq/"); |
58 | 67 | } |
59 | 68 |
|
@@ -127,22 +136,85 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc |
127 | 136 | } |
128 | 137 |
|
129 | 138 | //generate consensus |
130 | | -// File script = new File(SequenceAnalysisService.get().getScriptPath(SequenceAnalysisModule.NAME, "external/viral_consensus.sh")); |
131 | | -// if (!script.exists()) |
132 | | -// { |
133 | | -// throw new PipelineJobException("Unable to find script: " + script.getPath()); |
134 | | -// } |
135 | | -// |
136 | | -// SimpleScriptWrapper consensusWrapper = new SimpleScriptWrapper(getPipelineCtx().getLogger()); |
137 | | -// consensusWrapper.setWorkingDir(getPipelineCtx().getWorkingDirectory()); |
138 | | -// |
139 | | -// Integer maxThreads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger()); |
140 | | -// if (maxThreads != null) |
141 | | -// { |
142 | | -// consensusWrapper.addToEnvironment("SEQUENCEANALYSIS_MAX_THREADS", maxThreads.toString()); |
143 | | -// } |
144 | | -// |
145 | | -// consensusWrapper.execute(Arrays.asList("/bin/bash", script.getName(), inputBam.getPath(), referenceGenome.getWorkingFastaFile().getPath())); |
| 139 | + File script = new File(SequenceAnalysisService.get().getScriptPath(SequenceAnalysisModule.NAME, "external/viral_consensus.sh")); |
| 140 | + if (!script.exists()) |
| 141 | + { |
| 142 | + throw new PipelineJobException("Unable to find script: " + script.getPath()); |
| 143 | + } |
| 144 | + |
| 145 | + SimpleScriptWrapper consensusWrapper = new SimpleScriptWrapper(getPipelineCtx().getLogger()); |
| 146 | + consensusWrapper.setWorkingDir(getPipelineCtx().getWorkingDirectory()); |
| 147 | + |
| 148 | + Integer maxThreads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger()); |
| 149 | + if (maxThreads != null) |
| 150 | + { |
| 151 | + consensusWrapper.addToEnvironment("SEQUENCEANALYSIS_MAX_THREADS", maxThreads.toString()); |
| 152 | + } |
| 153 | + |
| 154 | + //Create a BED file with all regions of coverage below MIN_COVERAGE: |
| 155 | + int minCoverage = getProvider().getParameterByName("minCoverage").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Integer.class); |
| 156 | + int positionsSkipped = 0; |
| 157 | + int gapIntervals = 0; |
| 158 | + |
| 159 | + File mask = new File(outputDir, "mask.bed"); |
| 160 | + try (CSVReader reader = new CSVReader(Readers.getReader(coverageOut), '\t');CSVWriter writer = new CSVWriter(IOUtil.openFileForBufferedUtf8Writing(mask), '\t', CSVWriter.NO_QUOTE_CHARACTER)) |
| 161 | + { |
| 162 | + String[] line; |
| 163 | + |
| 164 | + Interval intervalOfCurrentGap = null; |
| 165 | + |
| 166 | + while ((line = reader.readNext()) != null) |
| 167 | + { |
| 168 | + String[] tokens = line[0].split(":"); |
| 169 | + int depth = Integer.parseInt(line[1]); |
| 170 | + |
| 171 | + if (depth < minCoverage) |
| 172 | + { |
| 173 | + positionsSkipped++; |
| 174 | + |
| 175 | + if (intervalOfCurrentGap != null) |
| 176 | + { |
| 177 | + if (intervalOfCurrentGap.getContig().equals(tokens[0])) |
| 178 | + { |
| 179 | + //extend |
| 180 | + intervalOfCurrentGap = new Interval(intervalOfCurrentGap.getContig(), intervalOfCurrentGap.getStart(), Integer.parseInt(tokens[1])); |
| 181 | + } |
| 182 | + else |
| 183 | + { |
| 184 | + //switched contigs, write and make new: |
| 185 | + writer.writeNext(new String[]{intervalOfCurrentGap.getContig(), String.valueOf(intervalOfCurrentGap.getStart()-1), String.valueOf(intervalOfCurrentGap.getEnd())}); |
| 186 | + gapIntervals++; |
| 187 | + intervalOfCurrentGap = new Interval(tokens[0], Integer.parseInt(tokens[1]), Integer.parseInt(tokens[1])); |
| 188 | + } |
| 189 | + } |
| 190 | + else |
| 191 | + { |
| 192 | + //Not existing gap, just start one: |
| 193 | + intervalOfCurrentGap = new Interval(tokens[0], Integer.parseInt(tokens[1]), Integer.parseInt(tokens[1])); |
| 194 | + } |
| 195 | + } |
| 196 | + else |
| 197 | + { |
| 198 | + //We just existed a gap, so write: |
| 199 | + if (intervalOfCurrentGap != null) |
| 200 | + { |
| 201 | + writer.writeNext(new String[]{intervalOfCurrentGap.getContig(), String.valueOf(intervalOfCurrentGap.getStart()-1), String.valueOf(intervalOfCurrentGap.getEnd())}); |
| 202 | + gapIntervals++; |
| 203 | + } |
| 204 | + |
| 205 | + intervalOfCurrentGap = null; |
| 206 | + } |
| 207 | + } |
| 208 | + } |
| 209 | + catch (IOException e) |
| 210 | + { |
| 211 | + throw new PipelineJobException(e); |
| 212 | + } |
| 213 | + |
| 214 | + getPipelineCtx().getLogger().info("Total positions with coverage below threshold (" + minCoverage + "): " + positionsSkipped); |
| 215 | + getPipelineCtx().getLogger().info("Total intervals of these gaps: " + gapIntervals); |
| 216 | + |
| 217 | + consensusWrapper.execute(Arrays.asList("/bin/bash", script.getName(), inputBam.getPath(), referenceGenome.getWorkingFastaFile().getPath(), mask.getPath())); |
146 | 218 |
|
147 | 219 | String description = String.format("Total Variants: %s\nTotal GT 1 PCT: %s\nTotal Indel GT 1 PCT: %s", totalVariants, totalGT1, totalIndelGT1); |
148 | 220 | output.addSequenceOutput(outputVcfSnpEff, "LoFreq: " + rs.getName(), CATEGORY, rs.getReadsetId(), null, referenceGenome.getGenomeId(), description); |
|
0 commit comments