Skip to content

Commit 6550d7c

Browse files
authored
Merge discvr-26.3 to develop (#428)
2 parents 31a70e6 + cc2ec93 commit 6550d7c

15 files changed

Lines changed: 876 additions & 674 deletions

File tree

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ jobs:
1515
runs-on: ubuntu-latest
1616
steps:
1717
- name: "Find default branch"
18-
uses: octokit/request-action@v2.x
18+
uses: octokit/request-action@v3.0.0
1919
id: get_default_branch
2020
with:
2121
route: GET /repos/${{ github.repository }}

.github/workflows/build_latest.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
runs-on: ubuntu-latest
1212
steps:
1313
- name: "Find default branch"
14-
uses: octokit/request-action@v2.x
14+
uses: octokit/request-action@v3.0.0
1515
id: get_default_branch
1616
with:
1717
route: GET /repos/${{ github.repository }}

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SamSorter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ public File execute(File input, @Nullable File output, SAMFileHeader.SortOrder s
4040
boolean replaceOriginal = output == null;
4141
if (output == null)
4242
{
43-
output = new File(getOutputDir(input), FileUtil.getBaseName(input) + ".sorted" + "." + FileUtil.getExtension(input));
43+
output = FileUtil.appendName(getOutputDir(input), FileUtil.getBaseName(input) + ".sorted" + "." + FileUtil.getExtension(input));
4444
}
4545

4646
List<String> params = new ArrayList<>();

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@
183183
import org.labkey.sequenceanalysis.run.variant.VariantFiltrationStep;
184184
import org.labkey.sequenceanalysis.run.variant.VariantQCStep;
185185
import org.labkey.sequenceanalysis.run.variant.VariantsToTableStep;
186+
import org.labkey.sequenceanalysis.run.variant.WhatsHapStep;
186187
import org.labkey.sequenceanalysis.util.Barcoder;
187188
import org.labkey.sequenceanalysis.util.ChainFileValidator;
188189
import org.labkey.sequenceanalysis.util.ScatterGatherUtils;
@@ -374,6 +375,7 @@ public static void registerPipelineSteps()
374375
SequencePipelineService.get().registerPipelineStep(new BcftoolsFixploidyStep.Provider());
375376
SequencePipelineService.get().registerPipelineStep(new BcftoolsFillFromFastaStep.Provider());
376377
SequencePipelineService.get().registerPipelineStep(new SVAnnotateStep.Provider());
378+
SequencePipelineService.get().registerPipelineStep(new WhatsHapStep.Provider());
377379

378380
//handlers
379381
SequenceAnalysisService.get().registerFileHandler(new LiftoverHandler());

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceJobSupportImpl.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import org.junit.Assert;
77
import org.junit.Test;
88
import org.labkey.api.collections.IntHashMap;
9+
import org.labkey.api.collections.LongArrayList;
910
import org.labkey.api.collections.LongHashMap;
1011
import org.labkey.api.exp.api.ExpData;
1112
import org.labkey.api.exp.api.ExperimentService;
@@ -28,6 +29,7 @@
2829
import java.io.StringReader;
2930
import java.io.StringWriter;
3031
import java.util.ArrayList;
32+
import java.util.Arrays;
3133
import java.util.Collection;
3234
import java.util.Collections;
3335
import java.util.HashMap;
@@ -319,6 +321,7 @@ public void testSerializeWithMap() throws Exception
319321
js1._cachedObjects.put("cachedInt", 1);
320322
js1._cachedObjects.put("cachedString", "foo");
321323
js1._cachedObjects.put("cachedLong", 2L);
324+
js1._cachedObjects.put("cachedListLong", new LongArrayList(Arrays.asList(1L, 2L)));
322325

323326
LongHashMap<Long> longMap = new LongHashMap<>();
324327
longMap.put(1L, 2L);
@@ -349,10 +352,15 @@ public void testSerializeWithMap() throws Exception
349352
assertEquals("Object not serialized with correct key type", Integer.class, serializedMap.keySet().iterator().next().getClass());
350353
assertNotNull("Map keys not serialized properly", serializedMap.get(1));
351354

352-
LongHashMap<Long> serializedLongMap = (LongHashMap<Long>)deserialized.getCachedObject("cachedLongMap", LongHashMap.class);
355+
LongHashMap<Long> serializedLongMap = deserialized.getCachedObject("cachedLongMap", LongHashMap.class);
353356
assertEquals("LongMap not serialized properly", 1, serializedLongMap.size());
354357
assertEquals("Object not serialized with correct key type", Long.class, serializedLongMap.keySet().iterator().next().getClass());
355358
assertNotNull("LongMap keys not serialized properly", serializedLongMap.get(1L));
359+
360+
List<Long> serializedListLong = deserialized.getCachedObject("cachedListLong", mapper.getTypeFactory().constructType(LongArrayList.class));
361+
assertEquals("List<Long> not serialized properly", 2, serializedListLong.size());
362+
assertEquals("List<Long> values not serialized properly", 1L, (long)serializedListLong.get(0));
363+
assertEquals("List<Long> values not serialized properly", 2L, (long)serializedListLong.get(1));
356364
}
357365

358366
@Test
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
package org.labkey.sequenceanalysis.run.variant;
2+
3+
import htsjdk.samtools.util.Interval;
4+
import htsjdk.variant.vcf.VCFFileReader;
5+
import htsjdk.variant.vcf.VCFHeader;
6+
import org.apache.logging.log4j.Logger;
7+
import org.jetbrains.annotations.Nullable;
8+
import org.labkey.api.collections.LongArrayList;
9+
import org.labkey.api.data.Container;
10+
import org.labkey.api.data.SimpleFilter;
11+
import org.labkey.api.data.TableInfo;
12+
import org.labkey.api.data.TableSelector;
13+
import org.labkey.api.pipeline.PipelineJob;
14+
import org.labkey.api.pipeline.PipelineJobException;
15+
import org.labkey.api.query.FieldKey;
16+
import org.labkey.api.query.QueryService;
17+
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
18+
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
19+
import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider;
20+
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
21+
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
22+
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
23+
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
24+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
25+
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep;
26+
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl;
27+
import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep;
28+
import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
29+
import org.labkey.api.util.FileUtil;
30+
import org.labkey.api.util.PageFlowUtil;
31+
import org.labkey.sequenceanalysis.SequenceAnalysisSchema;
32+
33+
import java.io.File;
34+
import java.io.IOException;
35+
import java.util.ArrayList;
36+
import java.util.Collections;
37+
import java.util.List;
38+
39+
public class WhatsHapStep extends AbstractCommandPipelineStep<WhatsHapStep.WhatsHapWrapper> implements VariantProcessingStep
40+
{
41+
public WhatsHapStep(PipelineStepProvider<?> provider, PipelineContext ctx)
42+
{
43+
super(provider, ctx, new WhatsHapStep.WhatsHapWrapper(ctx.getLogger()));
44+
}
45+
46+
public static class Provider extends AbstractVariantProcessingStepProvider<WhatsHapStep>
47+
{
48+
public Provider()
49+
{
50+
super("WhatsHap", "WhatsHap", "", "This will run WhatsHap to phase the VCF using BAM/CRAM data", List.of(
51+
52+
), null, "https://whatshap.readthedocs.io/en/latest/");
53+
}
54+
55+
@Override
56+
public WhatsHapStep create(PipelineContext ctx)
57+
{
58+
return new WhatsHapStep(this, ctx);
59+
}
60+
}
61+
62+
@Override
63+
public void init(PipelineJob job, SequenceAnalysisJobSupport support, List<SequenceOutputFile> inputFiles) throws PipelineJobException
64+
{
65+
if (inputFiles.size() != 1)
66+
{
67+
throw new PipelineJobException("This step expects a single VCF as input");
68+
}
69+
70+
// look up BAM/CRAMs:
71+
for (SequenceOutputFile so : inputFiles)
72+
{
73+
List<String> samples;
74+
try (VCFFileReader reader = new VCFFileReader(so.getFile()))
75+
{
76+
VCFHeader header = reader.getFileHeader();
77+
samples = header.getSampleNamesInOrder();
78+
}
79+
80+
if (samples.isEmpty())
81+
{
82+
throw new IllegalStateException("No samples found in VCF file");
83+
}
84+
85+
ArrayList<Long> toCache = new LongArrayList();
86+
Container targetContainer = getPipelineCtx().getJob().getContainer().isWorkbookOrTab() ? getPipelineCtx().getJob().getContainer().getParent() : getPipelineCtx().getJob().getContainer();
87+
TableInfo outputFiles = QueryService.get().getUserSchema(getPipelineCtx().getJob().getUser(), targetContainer, SequenceAnalysisSchema.SCHEMA_NAME).getTable(SequenceAnalysisSchema.TABLE_OUTPUTFILES);
88+
for (String sample : samples)
89+
{
90+
// Find readsets for this genome:
91+
SimpleFilter filter1 = new SimpleFilter(FieldKey.fromString("readset/name"), sample).
92+
addCondition(FieldKey.fromString("library_id"), so.getLibrary_id()).
93+
addCondition(FieldKey.fromString("category"), "Alignment");
94+
95+
List<Integer> alignments = new TableSelector(outputFiles, PageFlowUtil.set("rowid"), filter1, null).getArrayList(Integer.class);
96+
if (alignments.isEmpty())
97+
{
98+
throw new PipelineJobException("Unable to find alignment for: " + sample);
99+
}
100+
101+
SequenceOutputFile alignmentFile = SequenceOutputFile.getForId(Collections.max(alignments));
102+
toCache.add(alignmentFile.getDataId());
103+
support.cacheExpData(alignmentFile.getExpData());
104+
}
105+
106+
support.cacheObject(CACHE_KEY, toCache);
107+
}
108+
}
109+
110+
private final String CACHE_KEY = "~cached_readsets~";
111+
112+
private List<File> getCachedBams() throws PipelineJobException
113+
{
114+
List<Long> cachedFiles = getPipelineCtx().getSequenceSupport().getCachedObject(CACHE_KEY, PipelineJob.createObjectMapper().getTypeFactory().constructType(LongArrayList.class));
115+
116+
return cachedFiles.stream().map(x -> getPipelineCtx().getSequenceSupport().getCachedData(x)).toList();
117+
}
118+
119+
@Override
120+
public Output processVariants(File inputVCF, File outputDirectory, ReferenceGenome genome, @Nullable List<Interval> intervals) throws PipelineJobException
121+
{
122+
VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl();
123+
124+
output.addInput(inputVCF, "Input VCF");
125+
output.addInput(genome.getWorkingFastaFile(), "Reference Genome");
126+
127+
File vcfOut = FileUtil.appendName(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(inputVCF.getName()) + ".phased.vcf.gz");
128+
129+
List<String> args = new ArrayList<>();
130+
args.add(getWrapper().getExe().getPath());
131+
args.add("phase");
132+
args.add("-o");
133+
args.add(vcfOut.getPath());
134+
args.add("--reference");
135+
args.add(genome.getWorkingFastaFile().getPath());
136+
args.add(inputVCF.getPath());
137+
for (File f : getCachedBams())
138+
{
139+
args.add(f.getPath());
140+
}
141+
142+
getWrapper().execute(args);
143+
144+
if (!vcfOut.exists())
145+
{
146+
throw new PipelineJobException("Missing file: " + vcfOut.getPath());
147+
}
148+
149+
try
150+
{
151+
SequenceAnalysisService.get().ensureVcfIndex(vcfOut, getPipelineCtx().getLogger());
152+
}
153+
catch (IOException e)
154+
{
155+
throw new PipelineJobException(e);
156+
}
157+
158+
output.addSequenceOutput(vcfOut, "Phased VCF: " + inputVCF.getName(), "Phased VCF", null, null, genome.getGenomeId(), null);
159+
160+
return output;
161+
}
162+
163+
public static class WhatsHapWrapper extends AbstractCommandWrapper
164+
{
165+
public WhatsHapWrapper(@Nullable Logger logger)
166+
{
167+
super(logger);
168+
}
169+
170+
public File getExe()
171+
{
172+
return SequencePipelineService.get().getExeForPackage("WHATSHAPPATH", "whatshap");
173+
}
174+
}
175+
}

0 commit comments

Comments
 (0)