Skip to content

Commit 7ce24ff

Browse files
authored
Merge pull request #303 from LabKey/fb_merge_24.7_to_develop
Merge discvr-24.7 to develop
2 parents c0fb39d + c39c6b8 commit 7ce24ff

File tree

8 files changed

+62
-42
lines changed

8 files changed

+62
-42
lines changed

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenomeManager.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,11 @@ public void cacheGenomeLocally(ReferenceGenome genome, Logger log) throws Pipeli
103103
}
104104

105105
File localCacheDir = SequencePipelineService.get().getRemoteGenomeCacheDirectory();
106+
if (localCacheDir == null)
107+
{
108+
throw new PipelineJobException("RemoteGenomeCacheDirectory was not set");
109+
}
110+
106111
if (isUpToDate(genome))
107112
{
108113
log.debug("Genome up-to-date, will not repeat rsync: " + genome.getGenomeId());

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,6 @@ static public void setInstance(SequencePipelineService instance)
9898
*/
9999
abstract public String getDockerCommand();
100100

101-
/**
102-
* This allows instances to supply a user that will be passed to 'docker login'. This is rarely needed. It can be set using DOCKER_USER in pipelineConfig.xml
103-
*/
104-
abstract public String getDockerUser();
105-
106101
abstract public List<File> getSequenceJobInputFiles(PipelineJob job);
107102

108103
/**

SequenceAnalysis/resources/web/SequenceAnalysis/panel/BaseSequencePanel.js

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -269,9 +269,6 @@ Ext4.define('SequenceAnalysis.panel.BaseSequencePanel', {
269269
return;
270270
}
271271

272-
// If auto-loading, assume we want to read the URL
273-
thePanel.down('#readUrlParams').setValue(true);
274-
275272
var recIdx = store.find('name', LABKEY.ActionURL.getParameter('template'));
276273
if (recIdx > -1) {
277274
thePanel.down('labkey-combo').setValue(store.getAt(recIdx));
@@ -300,12 +297,6 @@ Ext4.define('SequenceAnalysis.panel.BaseSequencePanel', {
300297
helpPopup: 'By default, the pipelines jobs and their outputs will be created in the workbook you selected. However, in certain cases, such as bulk submission of many jobs, it might be preferable to submit each job to the source folder/workbook for each input. Checking this box will enable this.',
301298
fieldLabel: 'Submit Jobs to Same Folder/Workbook as Readset',
302299
labelWidth: 200
303-
},{
304-
xtype: 'checkbox',
305-
itemId: 'readUrlParams',
306-
helpPopup: 'If true, any parameters provided on the URL with the same name as a parameter in the JSON will be read and override the template.',
307-
fieldLabel: 'Read Parameters From URL',
308-
labelWidth: 200
309300
}]
310301
}],
311302
buttons: [{
@@ -362,8 +353,7 @@ Ext4.define('SequenceAnalysis.panel.BaseSequencePanel', {
362353
delete json.submitJobToReadsetContainer;
363354
}
364355

365-
var readUrlParams = win.down('#readUrlParams').getValue();
366-
win.sequencePanel.applySavedValues(json, readUrlParams);
356+
win.sequencePanel.applySavedValues(json, true);
367357

368358
var submitJobToReadsetContainer = win.sequencePanel.down('[name="submitJobToReadsetContainer"]');
369359
if (submitJobToReadsetContainer) {

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -457,18 +457,6 @@ public String getDockerCommand()
457457
return "docker";
458458
}
459459

460-
@Override
461-
public String getDockerUser()
462-
{
463-
String val = PipelineJobService.get().getConfigProperties().getSoftwarePackagePath("DOCKER_USER");
464-
if (StringUtils.trimToNull(val) != null)
465-
{
466-
return val;
467-
}
468-
469-
return null;
470-
}
471-
472460
@Override
473461
public List<File> getSequenceJobInputFiles(PipelineJob job)
474462
{

SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@
3939
import java.util.ArrayList;
4040
import java.util.Arrays;
4141
import java.util.List;
42+
import java.util.Set;
43+
import java.util.stream.Collectors;
4244

4345
public class UpdateReadsetFilesHandler extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
4446
{
@@ -119,17 +121,25 @@ private SAMFileHeader getAndValidateHeaderForBam(SequenceOutputFile so, String n
119121
try (SamReader reader = samReaderFactory.open(so.getFile()))
120122
{
121123
SAMFileHeader header = reader.getFileHeader().clone();
122-
int nSamples = reader.getFileHeader().getReadGroups().size();
123-
if (nSamples != 1)
124+
List<SAMReadGroupRecord> rgs = header.getReadGroups();
125+
Set<String> distinctLibraries = rgs.stream().map(SAMReadGroupRecord::getLibrary).collect(Collectors.toSet());
126+
if (distinctLibraries.size() > 1)
124127
{
125-
throw new PipelineJobException("File has more than one read group, found: " + nSamples);
128+
throw new PipelineJobException("File has more than one library in read group(s), found: " + distinctLibraries.stream().collect(Collectors.joining(", ")));
126129
}
127130

128-
List<SAMReadGroupRecord> rgs = header.getReadGroups();
129-
String existingSample = rgs.get(0).getSample();
130-
if (existingSample.equals(newRsName))
131+
Set<String> distinctSamples = rgs.stream().map(SAMReadGroupRecord::getSample).collect(Collectors.toSet());
132+
if (distinctSamples.size() > 1)
131133
{
132-
throw new PipelineJobException("Sample names match, aborting");
134+
throw new PipelineJobException("File has more than one sample in read group(s), found: " + distinctSamples.stream().collect(Collectors.joining(", ")));
135+
}
136+
137+
if (
138+
distinctLibraries.stream().filter(x -> !x.equals(newRsName)).count() == 0L &&
139+
distinctSamples.stream().filter(x -> !x.equals(newRsName)).count() == 0L
140+
)
141+
{
142+
throw new PipelineJobException("Sample and library names match in read group(s), aborting");
133143
}
134144

135145
return header;
@@ -252,13 +262,23 @@ private void reheaderBamOrCram(SequenceOutputFile so, JobContext ctx, String new
252262

253263
List<SAMReadGroupRecord> rgs = header.getReadGroups();
254264
String existingSample = rgs.get(0).getSample();
255-
rgs.get(0).setSample(newRsName);
265+
String existingLibrary = rgs.get(0).getLibrary();
266+
rgs.forEach(rg -> {
267+
rg.setSample(newRsName);
268+
rg.setLibrary(newRsName);
269+
});
256270

257271
File headerBam = new File(ctx.getWorkingDirectory(), "header.bam");
258272
try (SAMFileWriter writer = new SAMFileWriterFactory().makeBAMWriter(header, false, headerBam))
259273
{
260274

261275
}
276+
277+
if (!headerBam.exists())
278+
{
279+
throw new PipelineJobException("Expected header was not created: " + headerBam.getPath());
280+
}
281+
262282
ctx.getFileManager().addIntermediateFile(headerBam);
263283
ctx.getFileManager().addIntermediateFile(SequencePipelineService.get().getExpectedIndex(headerBam));
264284

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package org.labkey.sequenceanalysis.run.alignment;
22

33
import htsjdk.samtools.SAMFileHeader;
4+
import htsjdk.samtools.SAMReadGroupRecord;
45
import htsjdk.samtools.SamReader;
56
import htsjdk.samtools.SamReaderFactory;
67
import org.apache.commons.io.FileUtils;
@@ -30,6 +31,8 @@
3031
import java.util.ArrayList;
3132
import java.util.Arrays;
3233
import java.util.List;
34+
import java.util.Set;
35+
import java.util.stream.Collectors;
3336

3437
public class ParagraphStep extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
3538
{
@@ -141,16 +144,23 @@ else if (!svVcf.exists())
141144
{
142145
throw new PipelineJobException("No read groups found in input BAM");
143146
}
144-
else if (header.getReadGroups().size() > 1)
147+
148+
Set<String> uniqueSamples = header.getReadGroups().stream().map(SAMReadGroupRecord::getSample).collect(Collectors.toSet());
149+
if (uniqueSamples.size() > 1)
145150
{
146-
throw new PipelineJobException("More than one read group found in BAM");
151+
throw new PipelineJobException("Readgroups contained more than one unique sample");
147152
}
148153

149-
rgId = header.getReadGroups().get(0).getSample();
154+
rgId = uniqueSamples.iterator().next();
150155

151156
JSONObject json = new JSONObject(FileUtils.readFileToString(coverageJson, Charset.defaultCharset()));
152157
writer.println("id\tpath\tdepth\tread length");
153158
double depth = json.getJSONObject("autosome").getDouble("depth");
159+
if (depth <= 0)
160+
{
161+
throw new PipelineJobException("Depth was zero for file: " + so.getFile().getPath());
162+
}
163+
154164
double readLength = json.getInt("read_length");
155165
writer.println(rgId + "\t" + "/work/" + so.getFile().getName() + "\t" + depth + "\t" + readLength);
156166
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/AddOrReplaceReadGroupsStep.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
*/
2323
public class AddOrReplaceReadGroupsStep extends AbstractCommandPipelineStep<AddOrReplaceReadGroupsWrapper> implements BamProcessingStep
2424
{
25-
public AddOrReplaceReadGroupsStep(PipelineStepProvider provider, PipelineContext ctx)
25+
public AddOrReplaceReadGroupsStep(PipelineStepProvider<?> provider, PipelineContext ctx)
2626
{
2727
super(provider, ctx, new AddOrReplaceReadGroupsWrapper(ctx.getLogger()));
2828
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/MergeVcfsAndGenotypesHandler.java

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
1111
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
1212
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
13+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
1314
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
1415
import org.labkey.api.util.PageFlowUtil;
1516
import org.labkey.sequenceanalysis.SequenceAnalysisModule;
@@ -31,7 +32,8 @@ public class MergeVcfsAndGenotypesHandler extends AbstractParameterizedOutputHan
3132
public MergeVcfsAndGenotypesHandler()
3233
{
3334
super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "Merge Vcfs And Genotypes", "Combine multiple VCF files", null, List.of(
34-
ToolParameterDescriptor.create("basename", "Output File Name", "This will be used as the name for the output VCF.", "textfield", null, "")
35+
ToolParameterDescriptor.create("basename", "Output File Name", "This will be used as the name for the output VCF.", "textfield", null, ""),
36+
ToolParameterDescriptor.create("doSort", "Sort Inputs", "If checked, the input VCFs will be sorted prior to merge. This is usually not necessary", "checkbox", null, false)
3537
));
3638
}
3739

@@ -78,6 +80,7 @@ public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport
7880
public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException
7981
{
8082
File outputVcf = new File(ctx.getOutputDir(), ctx.getParams().getString("basename") + ".combined.vcf.gz");
83+
boolean doSort = ctx.getParams().optBoolean("doSort", false);
8184

8285
RecordedAction action = new RecordedAction(getName());
8386

@@ -90,7 +93,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
9093

9194
List<File> inputVCFs = new ArrayList<>();
9295
inputFiles.forEach(x -> inputVCFs.add(x.getFile()));
93-
inputFiles.forEach(x -> action.addInput(x.getFile(), "Combined VCF"));
96+
inputFiles.forEach(x -> action.addInput(x.getFile(), "Input VCF"));
9497

9598
ReferenceGenome genome = ctx.getSequenceSupport().getCachedGenome(genomeIds.iterator().next());
9699
new MergeVcfsAndGenotypesWrapper(ctx.getLogger()).execute(genome.getWorkingFastaFile(), inputVCFs, outputVcf, null);
@@ -99,6 +102,15 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
99102
throw new PipelineJobException("unable to find output: " + outputVcf.getPath());
100103
}
101104

105+
if (doSort)
106+
{
107+
ctx.getLogger().info("Sorting VCFs");
108+
for (File f : inputVCFs)
109+
{
110+
SequencePipelineService.get().sortVcf(f, null, genome.getSequenceDictionary(), ctx.getLogger());
111+
}
112+
}
113+
102114
action.addOutput(outputVcf, "Combined VCF", false);
103115
SequenceOutputFile so = new SequenceOutputFile();
104116
so.setName(outputVcf.getName());

0 commit comments

Comments
 (0)