Skip to content

Commit ed2c6d7

Browse files
committed
Bugfix to sorting of indel tsv
1 parent c5e9409 commit ed2c6d7

File tree

2 files changed

+27
-12
lines changed

2 files changed

+27
-12
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/MergeLoFreqVcfHandler.java

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
3030
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
3131
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
32+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
3233
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
3334
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
3435
import org.labkey.api.util.PageFlowUtil;
@@ -64,6 +65,7 @@
6465
public class MergeLoFreqVcfHandler extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
6566
{
6667
private static final String MIN_AF = "minAfThreshold";
68+
private static final String MIN_AF_INDEL = "minIndelAfThreshold";
6769
private static final String MIN_COVERAGE = "minCoverage";
6870

6971
public MergeLoFreqVcfHandler()
@@ -77,6 +79,11 @@ public MergeLoFreqVcfHandler()
7779
put("maxValue", 1);
7880
put("decimalPrecision", 2);
7981
}}, 0.10),
82+
ToolParameterDescriptor.create(MIN_AF, "Min AF to Include (Indels)", "An indel will be included in the indel output if there is a passing variant above this AF in at least one sample.", "ldk-numberfield", new JSONObject(){{
83+
put("minValue", 0);
84+
put("maxValue", 1);
85+
put("decimalPrecision", 2);
86+
}}, 0.05),
8087
ToolParameterDescriptor.create(MIN_COVERAGE, "Min Coverage To Include", "A site will be reported as ND, unless coverage is above this threshold", "ldk-integerfield", new JSONObject(){{
8188
put("minValue", 0);
8289
}}, 25),
@@ -221,6 +228,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
221228
basename = basename + (basename.endsWith(".") ? "" : ".");
222229

223230
final double minAfThreshold = ctx.getParams().optDouble(MIN_AF, 0.0);
231+
final double minIndelAfThreshold = ctx.getParams().optDouble(MIN_AF_INDEL, 0.05);
224232
final int minDepth = ctx.getParams().optInt(MIN_COVERAGE, 0);
225233

226234
Set<String> errors = new HashSet<>();
@@ -279,16 +287,6 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
279287
double af = vc.getAttributeAsDouble("AF", 0.0);
280288
if (af >= minAfThreshold)
281289
{
282-
if (vc.isIndel())
283-
{
284-
uniqueIndels.add(vc.getContig() + "<>" + vc.getStart() + "<>" + vc.getReference().getBaseString() + "<>" + vc.getAlternateAlleles().get(0).getBaseString());
285-
List<Integer> depths = vc.getAttributeAsIntList("DP4", 0);
286-
int alleleDepth = depths.get(2) + depths.get(3);
287-
288-
int length = vc.getLengthOnReference() - 1; //NOTE: indels are left-padded including one ref base
289-
writer.writeNext(new String[]{ctx.getSequenceSupport().getCachedReadset(so.getReadset()).getName(), String.valueOf(so.getRowid()), String.valueOf(so.getReadset()), "LoFreq", vc.getContig(), String.valueOf(vc.getStart()), String.valueOf(vc.getEnd()), String.valueOf(length), vc.getReference().getBaseString(), vc.getAlternateAlleles().get(0).getBaseString(), vc.getAttributeAsString("GATK_DP", "ND"), vc.getAttributeAsString("DP", "ND"), String.valueOf(alleleDepth), vc.getAttributeAsString("AF", "ND")});
290-
}
291-
292290
for (int i = 0; i < vc.getLengthOnReference(); i++)
293291
{
294292
int effectiveStart = vc.getStart() + i;
@@ -302,6 +300,16 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
302300
siteToAllele.put(key, site);
303301
}
304302
}
303+
304+
if (af > minIndelAfThreshold && vc.isIndel())
305+
{
306+
uniqueIndels.add(vc.getContig() + "<>" + vc.getStart() + "<>" + vc.getReference().getBaseString() + "<>" + vc.getAlternateAlleles().get(0).getBaseString());
307+
List<Integer> depths = vc.getAttributeAsIntList("DP4", 0);
308+
int alleleDepth = depths.get(2) + depths.get(3);
309+
310+
int length = vc.getLengthOnReference() - 1; //NOTE: indels are left-padded including one ref base
311+
writer.writeNext(new String[]{ctx.getSequenceSupport().getCachedReadset(so.getReadset()).getName(), String.valueOf(so.getRowid()), String.valueOf(so.getReadset()), "LoFreq", vc.getContig(), String.valueOf(vc.getStart()), String.valueOf(vc.getEnd()), String.valueOf(length), vc.getReference().getBaseString(), vc.getAlternateAlleles().get(0).getBaseString(), vc.getAttributeAsString("GATK_DP", "ND"), vc.getAttributeAsString("DP", "ND"), String.valueOf(alleleDepth), vc.getAttributeAsString("AF", "ND")});
312+
}
305313
}
306314
}
307315

@@ -711,7 +719,7 @@ private void sortTsvFile(JobContext ctx, File input) throws PipelineJobException
711719
writer.println("set -e");
712720
writer.println("{");
713721
writer.println("zcat " + input.getPath() +" | head -n 1;");
714-
writer.println("zcat " + input.getPath() +" | tail +2 | sort -k5,5 -k6,6n -k7,7n;");
722+
writer.println("zcat " + input.getPath() +" | tail -n +2 | sort -k5,5 -k6,6n -k7,7n;");
715723
writer.println("} | gzip -c > " + output.getPath() + "\n");
716724
}
717725
catch (IOException e)
@@ -723,6 +731,13 @@ private void sortTsvFile(JobContext ctx, File input) throws PipelineJobException
723731
wrapper.setWorkingDir(script.getParentFile());
724732
wrapper.execute(Arrays.asList("/bin/bash", script.getPath()));
725733

734+
long lineCountIn = SequencePipelineService.get().getLineCount(input);
735+
long lineCountOut = SequencePipelineService.get().getLineCount(output);
736+
if (lineCountIn != lineCountOut)
737+
{
738+
throw new PipelineJobException("Error sorting file");
739+
}
740+
726741
input.delete();
727742
script.delete();
728743

SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ public String getPrimaryExtension()
113113
public static long getLineCount(File f) throws PipelineJobException
114114
{
115115
FileType gz = new FileType(".gz");
116-
try (InputStream is = gz.isType(f) ? new GZIPInputStream(new FileInputStream(f)) : new FileInputStream(f);BufferedReader reader = new BufferedReader(new InputStreamReader(is));)
116+
try (InputStream is = gz.isType(f) ? new GZIPInputStream(new FileInputStream(f)) : new FileInputStream(f);BufferedReader reader = new BufferedReader(new InputStreamReader(is, StringUtilsLabKey.DEFAULT_CHARSET));)
117117
{
118118
long i = 0;
119119
while (reader.readLine() != null)

0 commit comments

Comments
 (0)