Merge pull request #303 from LabKey/fb_merge_24.7_to_develop

bbimber · web-flow · commit 7ce24ff262db · 2024-07-24T16:57:53.000-05:00
Merge discvr-24.7 to develop
diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenomeManager.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenomeManager.java
@@ -103,6 +103,11 @@ public void cacheGenomeLocally(ReferenceGenome genome, Logger log) throws Pipeli
         }
 
         File localCacheDir = SequencePipelineService.get().getRemoteGenomeCacheDirectory();
+        if (localCacheDir == null)
+        {
+            throw new PipelineJobException("RemoteGenomeCacheDirectory was not set");
+        }
+
         if (isUpToDate(genome))
         {
             log.debug("Genome up-to-date, will not repeat rsync: " + genome.getGenomeId());
diff --git a/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java b/SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java
@@ -98,11 +98,6 @@ static public void setInstance(SequencePipelineService instance)
      */
     abstract public String getDockerCommand();
 
-    /**
-     * This allows instances to supply a user that will be passed to 'docker login'. This is rarely needed. It can be set using DOCKER_USER in pipelineConfig.xml
-     */
-    abstract public String getDockerUser();
-
     abstract public List<File> getSequenceJobInputFiles(PipelineJob job);
 
     /**
diff --git a/SequenceAnalysis/resources/web/SequenceAnalysis/panel/BaseSequencePanel.js b/SequenceAnalysis/resources/web/SequenceAnalysis/panel/BaseSequencePanel.js
@@ -269,9 +269,6 @@ Ext4.define('SequenceAnalysis.panel.BaseSequencePanel', {
                                                     return;
                                                 }
 
-                                                // If auto-loading, assume we want to read the URL
-                                                thePanel.down('#readUrlParams').setValue(true);
-
                                                 var recIdx = store.find('name', LABKEY.ActionURL.getParameter('template'));
                                                 if (recIdx > -1) {
                                                     thePanel.down('labkey-combo').setValue(store.getAt(recIdx));
@@ -300,12 +297,6 @@ Ext4.define('SequenceAnalysis.panel.BaseSequencePanel', {
                                 helpPopup: 'By default, the pipelines jobs and their outputs will be created in the workbook you selected. However, in certain cases, such as bulk submission of many jobs, it might be preferable to submit each job to the source folder/workbook for each input. Checking this box will enable this.',
                                 fieldLabel: 'Submit Jobs to Same Folder/Workbook as Readset',
                                 labelWidth: 200
-                            },{
-                                xtype: 'checkbox',
-                                itemId: 'readUrlParams',
-                                helpPopup: 'If true, any parameters provided on the URL with the same name as a parameter in the JSON will be read and override the template.',
-                                fieldLabel: 'Read Parameters From URL',
-                                labelWidth: 200
                             }]
                         }],
                         buttons: [{
@@ -362,8 +353,7 @@ Ext4.define('SequenceAnalysis.panel.BaseSequencePanel', {
                                 delete json.submitJobToReadsetContainer;
                             }
 
-                            var readUrlParams = win.down('#readUrlParams').getValue();
-                            win.sequencePanel.applySavedValues(json, readUrlParams);
+                            win.sequencePanel.applySavedValues(json, true);
 
                             var submitJobToReadsetContainer = win.sequencePanel.down('[name="submitJobToReadsetContainer"]');
                             if (submitJobToReadsetContainer) {
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java
@@ -457,18 +457,6 @@ public String getDockerCommand()
         return "docker";
     }
 
-    @Override
-    public String getDockerUser()
-    {
-        String val = PipelineJobService.get().getConfigProperties().getSoftwarePackagePath("DOCKER_USER");
-        if (StringUtils.trimToNull(val) != null)
-        {
-            return val;
-        }
-
-        return null;
-    }
-
     @Override
     public List<File> getSequenceJobInputFiles(PipelineJob job)
     {
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/UpdateReadsetFilesHandler.java
@@ -39,6 +39,8 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
 
 public class UpdateReadsetFilesHandler extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
 {
@@ -119,17 +121,25 @@ private SAMFileHeader getAndValidateHeaderForBam(SequenceOutputFile so, String n
             try (SamReader reader = samReaderFactory.open(so.getFile()))
             {
                 SAMFileHeader header = reader.getFileHeader().clone();
-                int nSamples = reader.getFileHeader().getReadGroups().size();
-                if (nSamples != 1)
+                List<SAMReadGroupRecord> rgs = header.getReadGroups();
+                Set<String> distinctLibraries = rgs.stream().map(SAMReadGroupRecord::getLibrary).collect(Collectors.toSet());
+                if (distinctLibraries.size() > 1)
                 {
-                    throw new PipelineJobException("File has more than one read group, found: " + nSamples);
+                    throw new PipelineJobException("File has more than one library in read group(s), found: " + distinctLibraries.stream().collect(Collectors.joining(", ")));
                 }
 
-                List<SAMReadGroupRecord> rgs = header.getReadGroups();
-                String existingSample = rgs.get(0).getSample();
-                if (existingSample.equals(newRsName))
+                Set<String> distinctSamples = rgs.stream().map(SAMReadGroupRecord::getSample).collect(Collectors.toSet());
+                if (distinctSamples.size() > 1)
                 {
-                    throw new PipelineJobException("Sample names match, aborting");
+                    throw new PipelineJobException("File has more than one sample in read group(s), found: " + distinctSamples.stream().collect(Collectors.joining(", ")));
+                }
+
+                if (
+                        distinctLibraries.stream().filter(x -> !x.equals(newRsName)).count() == 0L &&
+                        distinctSamples.stream().filter(x -> !x.equals(newRsName)).count() == 0L
+                )
+                {
+                    throw new PipelineJobException("Sample and library names match in read group(s), aborting");
                 }
 
                 return header;
@@ -252,13 +262,23 @@ private void reheaderBamOrCram(SequenceOutputFile so, JobContext ctx, String new
 
                 List<SAMReadGroupRecord> rgs = header.getReadGroups();
                 String existingSample = rgs.get(0).getSample();
-                rgs.get(0).setSample(newRsName);
+                String existingLibrary = rgs.get(0).getLibrary();
+                rgs.forEach(rg -> {
+                    rg.setSample(newRsName);
+                    rg.setLibrary(newRsName);
+                });
 
                 File headerBam = new File(ctx.getWorkingDirectory(), "header.bam");
                 try (SAMFileWriter writer = new SAMFileWriterFactory().makeBAMWriter(header, false, headerBam))
                 {
 
                 }
+
+                if (!headerBam.exists())
+                {
+                    throw new PipelineJobException("Expected header was not created: " + headerBam.getPath());
+                }
+
                 ctx.getFileManager().addIntermediateFile(headerBam);
                 ctx.getFileManager().addIntermediateFile(SequencePipelineService.get().getExpectedIndex(headerBam));
 
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/alignment/ParagraphStep.java
@@ -1,6 +1,7 @@
 package org.labkey.sequenceanalysis.run.alignment;
 
 import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMReadGroupRecord;
 import htsjdk.samtools.SamReader;
 import htsjdk.samtools.SamReaderFactory;
 import org.apache.commons.io.FileUtils;
@@ -30,6 +31,8 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
 
 public class ParagraphStep extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
 {
@@ -141,16 +144,23 @@ else if (!svVcf.exists())
                     {
                         throw new PipelineJobException("No read groups found in input BAM");
                     }
-                    else if (header.getReadGroups().size() > 1)
+
+                    Set<String> uniqueSamples = header.getReadGroups().stream().map(SAMReadGroupRecord::getSample).collect(Collectors.toSet());
+                    if (uniqueSamples.size() > 1)
                     {
-                        throw new PipelineJobException("More than one read group found in BAM");
+                        throw new PipelineJobException("Readgroups contained more than one unique sample");
                     }
 
-                    rgId = header.getReadGroups().get(0).getSample();
+                    rgId = uniqueSamples.iterator().next();
 
                     JSONObject json = new JSONObject(FileUtils.readFileToString(coverageJson, Charset.defaultCharset()));
                     writer.println("id\tpath\tdepth\tread length");
                     double depth = json.getJSONObject("autosome").getDouble("depth");
+                    if (depth <= 0)
+                    {
+                        throw new PipelineJobException("Depth was zero for file: " + so.getFile().getPath());
+                    }
+
                     double readLength = json.getInt("read_length");
                     writer.println(rgId + "\t" + "/work/" + so.getFile().getName() + "\t" + depth + "\t" + readLength);
                 }
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/AddOrReplaceReadGroupsStep.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/bampostprocessing/AddOrReplaceReadGroupsStep.java
@@ -22,7 +22,7 @@
  */
 public class AddOrReplaceReadGroupsStep extends AbstractCommandPipelineStep<AddOrReplaceReadGroupsWrapper> implements BamProcessingStep
 {
-    public AddOrReplaceReadGroupsStep(PipelineStepProvider provider, PipelineContext ctx)
+    public AddOrReplaceReadGroupsStep(PipelineStepProvider<?> provider, PipelineContext ctx)
     {
         super(provider, ctx, new AddOrReplaceReadGroupsWrapper(ctx.getLogger()));
     }
diff --git a/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/MergeVcfsAndGenotypesHandler.java b/SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/MergeVcfsAndGenotypesHandler.java
@@ -10,6 +10,7 @@
 import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
 import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
 import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
+import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
 import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
 import org.labkey.api.util.PageFlowUtil;
 import org.labkey.sequenceanalysis.SequenceAnalysisModule;
@@ -31,7 +32,8 @@ public class MergeVcfsAndGenotypesHandler extends AbstractParameterizedOutputHan
     public MergeVcfsAndGenotypesHandler()
     {
         super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "Merge Vcfs And Genotypes", "Combine multiple VCF files", null, List.of(
-                ToolParameterDescriptor.create("basename", "Output File Name", "This will be used as the name for the output VCF.", "textfield", null, "")
+                ToolParameterDescriptor.create("basename", "Output File Name", "This will be used as the name for the output VCF.", "textfield", null, ""),
+                ToolParameterDescriptor.create("doSort", "Sort Inputs", "If checked, the input VCFs will be sorted prior to merge. This is usually not necessary", "checkbox", null, false)
         ));
     }
 
@@ -78,6 +80,7 @@ public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport
         public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException
         {
             File outputVcf = new File(ctx.getOutputDir(), ctx.getParams().getString("basename") + ".combined.vcf.gz");
+            boolean doSort = ctx.getParams().optBoolean("doSort", false);
 
             RecordedAction action = new RecordedAction(getName());
 
@@ -90,7 +93,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
 
             List<File> inputVCFs = new ArrayList<>();
             inputFiles.forEach(x -> inputVCFs.add(x.getFile()));
-            inputFiles.forEach(x -> action.addInput(x.getFile(), "Combined VCF"));
+            inputFiles.forEach(x -> action.addInput(x.getFile(), "Input VCF"));
 
             ReferenceGenome genome = ctx.getSequenceSupport().getCachedGenome(genomeIds.iterator().next());
             new MergeVcfsAndGenotypesWrapper(ctx.getLogger()).execute(genome.getWorkingFastaFile(), inputVCFs, outputVcf, null);
@@ -99,6 +102,15 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
                 throw new PipelineJobException("unable to find output: " + outputVcf.getPath());
             }
 
+            if (doSort)
+            {
+                ctx.getLogger().info("Sorting VCFs");
+                for (File f : inputVCFs)
+                {
+                    SequencePipelineService.get().sortVcf(f, null, genome.getSequenceDictionary(), ctx.getLogger());
+                }
+            }
+
             action.addOutput(outputVcf, "Combined VCF", false);
             SequenceOutputFile so = new SequenceOutputFile();
             so.setName(outputVcf.getName());

Original file line number	Diff line number	Diff line change
`@@ -103,6 +103,11 @@ public void cacheGenomeLocally(ReferenceGenome genome, Logger log) throws Pipeli`
`103`	`103`	`}`
`104`	`104`
`105`	`105`	`File localCacheDir = SequencePipelineService.get().getRemoteGenomeCacheDirectory();`
	`106`	`+ if (localCacheDir == null)`
	`107`	`+ {`
	`108`	`+ throw new PipelineJobException("RemoteGenomeCacheDirectory was not set");`
	`109`	`+ }`
	`110`	`+`
`106`	`111`	`if (isUpToDate(genome))`
`107`	`112`	`{`
`108`	`113`	`log.debug("Genome up-to-date, will not repeat rsync: " + genome.getGenomeId());`
Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,7 @@`
`22`	`22`	`*/`
`23`	`23`	`public class AddOrReplaceReadGroupsStep extends AbstractCommandPipelineStep<AddOrReplaceReadGroupsWrapper> implements BamProcessingStep`
`24`	`24`	`{`
`25`		`- public AddOrReplaceReadGroupsStep(PipelineStepProvider provider, PipelineContext ctx)`
	`25`	`+ public AddOrReplaceReadGroupsStep(PipelineStepProvider<?> provider, PipelineContext ctx)`
`26`	`26`	`{`
`27`	`27`	`super(provider, ctx, new AddOrReplaceReadGroupsWrapper(ctx.getLogger()));`
`28`	`28`	`}`