Skip to content

Commit 72fe667

Browse files
committed
Support vireo
1 parent 3307d44 commit 72fe667

File tree

2 files changed

+249
-0
lines changed

2 files changed

+249
-0
lines changed

singlecell/src/org/labkey/singlecell/SingleCellModule.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ public static void registerPipelineSteps()
172172
SequencePipelineService.get().registerPipelineStep(new ClrNormalizeByGroup.Provider());
173173
SequenceAnalysisService.get().registerFileHandler(new CellRangerVLoupeRepairHandler());
174174
SequencePipelineService.get().registerPipelineStep(new PrepareRawCounts.Provider());
175+
SequenceAnalysisService.get().registerFileHandler(new VireoHandler());
175176

176177
SequencePipelineService.get().registerPipelineStep(new RemoveCellCycle.Provider());
177178
SequencePipelineService.get().registerPipelineStep(new RunCellHashing.Provider());
Lines changed: 248 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
package org.labkey.singlecell.pipeline.singlecell;
2+
3+
import htsjdk.samtools.util.IOUtil;
4+
import org.apache.commons.io.FileUtils;
5+
import org.apache.commons.lang3.StringUtils;
6+
import org.json.JSONObject;
7+
import org.labkey.api.module.ModuleLoader;
8+
import org.labkey.api.pipeline.PipelineJob;
9+
import org.labkey.api.pipeline.PipelineJobException;
10+
import org.labkey.api.pipeline.RecordedAction;
11+
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
12+
import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler;
13+
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
14+
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
15+
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
16+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
17+
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
18+
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
19+
import org.labkey.api.writer.PrintWriters;
20+
import org.labkey.singlecell.SingleCellModule;
21+
import org.labkey.singlecell.run.CellRangerGexCountStep;
22+
23+
import java.io.BufferedReader;
24+
import java.io.File;
25+
import java.io.IOException;
26+
import java.io.PrintWriter;
27+
import java.util.ArrayList;
28+
import java.util.Arrays;
29+
import java.util.List;
30+
31+
public class VireoHandler extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
32+
{
33+
public VireoHandler()
34+
{
35+
super(ModuleLoader.getInstance().getModule(SingleCellModule.class), "Run Vireo", "This will run cellsnp-lite and vireo to infer cell-to-sample based on genotype.", null, Arrays.asList(
36+
ToolParameterDescriptor.create("nDonors", "# Donors", "The number of donors to demultiplex", "ldk-integerfield", new JSONObject(){{
37+
put("allowBlank", false);
38+
}}, null),
39+
ToolParameterDescriptor.create("contigs", "Allowable Contigs", "A comma-separated list of contig names to use", "textfield", new JSONObject(){{
40+
41+
}}, "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20")
42+
));
43+
}
44+
45+
@Override
46+
public boolean doSplitJobs()
47+
{
48+
return true;
49+
}
50+
51+
@Override
52+
public boolean canProcess(SequenceOutputFile o)
53+
{
54+
return CellRangerGexCountStep.LOUPE_CATEGORY.equals(o.getCategory()) & o.getFile().getName().endsWith("cloupe.cloupe");
55+
}
56+
57+
@Override
58+
public boolean doRunRemote()
59+
{
60+
return true;
61+
}
62+
63+
@Override
64+
public boolean doRunLocal()
65+
{
66+
return false;
67+
}
68+
69+
@Override
70+
public SequenceOutputProcessor getProcessor()
71+
{
72+
return new Processor();
73+
}
74+
75+
public class Processor implements SequenceOutputProcessor
76+
{
77+
@Override
78+
public void init(JobContext ctx, List<SequenceOutputFile> inputFiles, List<RecordedAction> actions, List<SequenceOutputFile> outputsToCreate) throws UnsupportedOperationException, PipelineJobException
79+
{
80+
if (inputFiles.size() > 1)
81+
{
82+
throw new PipelineJobException("Expected a single input");
83+
}
84+
85+
File bc = getBarcodesFile(inputFiles.get(0).getFile());
86+
if (!bc.exists())
87+
{
88+
throw new PipelineJobException("Unable to find file: " + bc.getPath());
89+
}
90+
91+
File bam = getBamFile(inputFiles.get(0).getFile());
92+
if (!bam.exists())
93+
{
94+
throw new PipelineJobException("Unable to find file: " + bam.getPath());
95+
}
96+
}
97+
98+
private File getBarcodesFile(File loupe)
99+
{
100+
return new File(loupe.getParentFile(), "filtered_feature_bc_matrix/barcodes.tsv.gz");
101+
}
102+
103+
private File getBamFile(File loupe)
104+
{
105+
File[] files = loupe.getParentFile().getParentFile().listFiles(f -> f.getName().endsWith(".bam"));
106+
if (files == null || files.length == 0)
107+
{
108+
throw new IllegalArgumentException("Unable to find BAM file for Loupe file");
109+
}
110+
else if (files.length > 1)
111+
{
112+
throw new IllegalArgumentException("More than one possible BAM file found");
113+
}
114+
115+
return files[0];
116+
}
117+
118+
@Override
119+
public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List<SequenceOutputFile> inputFiles, JSONObject params, File outputDir, List<RecordedAction> actions, List<SequenceOutputFile> outputsToCreate) throws UnsupportedOperationException, PipelineJobException
120+
{
121+
122+
}
123+
124+
@Override
125+
public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException
126+
{
127+
File barcodesGz = getBarcodesFile(inputFiles.get(0).getFile());
128+
File bam = getBamFile(inputFiles.get(0).getFile());
129+
130+
File barcodes = new File(ctx.getWorkingDirectory(), "barcodes.csv");
131+
try (BufferedReader reader = IOUtil.openFileForBufferedUtf8Reading(barcodesGz); PrintWriter writer = PrintWriters.getPrintWriter(barcodes))
132+
{
133+
String line;
134+
while ((line = reader.readLine()) != null)
135+
{
136+
writer.println(line);
137+
}
138+
}
139+
catch (IOException e)
140+
{
141+
throw new PipelineJobException(e);
142+
}
143+
ctx.getFileManager().addIntermediateFile(barcodes);
144+
145+
List<String> cellsnp = new ArrayList<>();
146+
cellsnp.add("cellsnp-lite");
147+
cellsnp.add("-s");
148+
cellsnp.add(bam.getPath());
149+
cellsnp.add("-b");
150+
cellsnp.add(barcodes.getPath());
151+
152+
File cellsnpDir = new File(ctx.getWorkingDirectory(), "cellsnp");
153+
if (cellsnpDir.exists())
154+
{
155+
try
156+
{
157+
FileUtils.deleteDirectory(cellsnpDir);
158+
}
159+
catch (IOException e)
160+
{
161+
throw new PipelineJobException(e);
162+
}
163+
}
164+
165+
cellsnp.add("-O");
166+
cellsnp.add(cellsnpDir.getPath());
167+
168+
Integer maxThreads = SequencePipelineService.get().getMaxThreads(ctx.getLogger());
169+
if (maxThreads != null)
170+
{
171+
cellsnp.add("-p");
172+
cellsnp.add(maxThreads.toString());
173+
}
174+
175+
cellsnp.add("--minMAF");
176+
cellsnp.add("0.1");
177+
178+
cellsnp.add("--minCOUNT");
179+
cellsnp.add("100");
180+
181+
cellsnp.add("--gzip");
182+
183+
ReferenceGenome genome = ctx.getSequenceSupport().getCachedGenome(inputFiles.get(0).getLibrary_id());
184+
185+
cellsnp.add("--refseq");
186+
cellsnp.add(genome.getWorkingFastaFile().getPath());
187+
188+
String contigs = ctx.getParams().optString("contigs", "");
189+
if (!StringUtils.isEmpty(contigs))
190+
{
191+
cellsnp.add("--chrom");
192+
cellsnp.add(contigs);
193+
}
194+
195+
new SimpleScriptWrapper(ctx.getLogger()).execute(cellsnp);
196+
197+
List<String> vireo = new ArrayList<>();
198+
vireo.add("vireo");
199+
vireo.add("-c");
200+
vireo.add(cellsnpDir.getPath());
201+
202+
if (maxThreads != null)
203+
{
204+
vireo.add("-p");
205+
vireo.add(maxThreads.toString());
206+
}
207+
208+
vireo.add("-o");
209+
vireo.add(ctx.getWorkingDirectory().getPath());
210+
211+
int nDonors = ctx.getParams().optInt("nDonors", 0);
212+
if (nDonors == 0)
213+
{
214+
throw new PipelineJobException("Must provide nDonors");
215+
}
216+
217+
vireo.add("-N");
218+
vireo.add(String.valueOf(nDonors));
219+
220+
new SimpleScriptWrapper(ctx.getLogger()).execute(vireo);
221+
222+
File[] outFiles = ctx.getWorkingDirectory().listFiles(f -> f.getName().endsWith("_donor_ids.tsv"));
223+
if (outFiles == null || outFiles.length == 0)
224+
{
225+
throw new PipelineJobException("Missing vireo output file");
226+
}
227+
else if (outFiles.length > 1)
228+
{
229+
throw new PipelineJobException("More than one possible vireo output file found");
230+
}
231+
232+
SequenceOutputFile so = new SequenceOutputFile();
233+
so.setReadset(inputFiles.get(0).getReadset());
234+
so.setLibrary_id(inputFiles.get(0).getLibrary_id());
235+
so.setFile(outFiles[0]);
236+
if (so.getReadset() != null)
237+
{
238+
so.setName(ctx.getSequenceSupport().getCachedReadset(so.getReadset()).getName() + ": Vireo Demultiplexing");
239+
}
240+
else
241+
{
242+
so.setName(inputFiles.get(0).getName() + ": Vireo Demultiplexing");
243+
}
244+
so.setCategory("Vireo Demultiplexing");
245+
ctx.addSequenceOutput(so);
246+
}
247+
}
248+
}

0 commit comments

Comments
 (0)