Skip to content

Commit fdb4a6e

Browse files
authored
Merge pull request #113 from LabKey/fb_merge_21.7_to_develop
Merge discvr-21.7 to develop
2 parents e0ee7cc + 07d0f18 commit fdb4a6e

File tree

72 files changed

+7013
-7160
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+7013
-7160
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ jobs:
4545
GENERATE_DIST: ${{ steps.default-branch.branch == steps.extract-branch.branch && '1' || '0' }}
4646

4747
- name: Publish Release
48-
if: github.ref == '/refs/heads/${{ fromJson(steps.get_default_branch.outputs.data).default_branch }}'
48+
if: github.ref == '/refs/heads/${{ fromJson(steps.get_default_branch.outputs.data).default_branch }}' && github.event_name == 'push'
4949
uses: "marvinpinto/action-automatic-releases@latest"
5050
with:
5151
repo_token: "${{ secrets.PAT }}"
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
Ext4.define('SequenceAnalysis.field.GenomeField', {
2+
extend: 'LABKEY.ext4.ComboBox',
3+
alias: 'widget.sequenceanalysis-genomefield',
4+
5+
initComponent: function () {
6+
Ext4.apply(this, {
7+
forceSelection: true,
8+
displayField: 'name',
9+
valueField: 'rowid',
10+
store: {
11+
type: 'labkey-store',
12+
schemaName: 'sequenceanalysis',
13+
queryName: 'reference_libraries',
14+
filterArray: [LABKEY.Filter.create('datedisabled', null, LABKEY.Filter.Types.ISBLANK)],
15+
columns: 'name,rowid',
16+
sort: 'name',
17+
containerPath: Laboratory.Utils.getQueryContainerPath(),
18+
autoLoad: true
19+
}
20+
});
21+
22+
this.callParent(arguments);
23+
},
24+
25+
getSubmitValue: function(){
26+
var val = this.callParent(arguments);
27+
if (Ext4.isArray(val)) {
28+
val = val.join(';');
29+
}
30+
31+
return val;
32+
},
33+
34+
getToolParameterValue : function(){
35+
return this.getSubmitValue();
36+
},
37+
38+
setValue: function(val) {
39+
if (this.multiSelect && val && Ext4.isString(val)) {
40+
val = val.split(this.delimiter);
41+
this.callParent([val]);
42+
}
43+
else {
44+
this.callParent(arguments);
45+
}
46+
}
47+
});

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisServiceImpl.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ else if (!forceRecreate && tbi.exists())
290290

291291
public File bgzipFile(File input, Logger log) throws PipelineJobException
292292
{
293-
return new BgzipRunner(log).execute(input);
293+
return SequenceUtil.bgzip(input, log);
294294
}
295295

296296
@Override

SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -331,10 +331,10 @@ public void liftOverVcf(JobContext ctx, ReferenceGenome targetGenome, ReferenceG
331331
Long unmapped = 0L;
332332
if (unmappedOutput != null && unmappedOutput.exists())
333333
{
334-
String unmappedStr = ProcessVariantsHandler.getVCFLineCount(output, job.getLogger(), false);
334+
String unmappedStr = ProcessVariantsHandler.getVCFLineCount(unmappedOutput, job.getLogger(), false);
335335
unmapped = StringUtils.trimToNull(unmappedStr) == null ? 0L : Long.parseLong(unmappedStr);
336336
job.getLogger().info("total unmapped variants: " + unmappedStr);
337-
job.getLogger().info("passing unmapped variants: " + ProcessVariantsHandler.getVCFLineCount(output, job.getLogger(), true));
337+
job.getLogger().info("passing unmapped variants: " + ProcessVariantsHandler.getVCFLineCount(unmappedOutput, job.getLogger(), true));
338338
SequenceAnalysisService.get().ensureVcfIndex(unmappedOutput, job.getLogger());
339339
}
340340

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/CreateReferenceLibraryTask.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
package org.labkey.sequenceanalysis.pipeline;
1717

1818
import htsjdk.samtools.SAMSequenceDictionary;
19+
import htsjdk.samtools.reference.FastaSequenceIndexCreator;
1920
import htsjdk.variant.utils.SAMSequenceDictionaryExtractor;
2021
import org.apache.commons.io.FileUtils;
2122
import org.jetbrains.annotations.NotNull;
@@ -56,6 +57,7 @@
5657
import org.labkey.sequenceanalysis.SequenceAnalysisServiceImpl;
5758
import org.labkey.sequenceanalysis.model.ReferenceLibraryMember;
5859
import org.labkey.sequenceanalysis.run.util.FastaIndexer;
60+
import picard.sam.CreateSequenceDictionary;
5961

6062
import java.io.File;
6163
import java.io.PrintWriter;
@@ -314,7 +316,8 @@ public RecordedActionSet run() throws PipelineJobException
314316
}
315317
catch (PipelineJobException e)
316318
{
317-
getJob().getLogger().warn("Unable to create FASTA index");
319+
getJob().getLogger().warn("Unable to create FASTA index with samtools, creating with HTSJDK");
320+
FastaSequenceIndexCreator.create(fasta.toPath(), true);
318321
}
319322

320323
try

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceOutputHandlerFinalTask.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,11 @@ public static Set<SequenceOutputFile> createOutputFiles(SequenceJob job, int run
173173
{
174174
SimpleFilter filter = new SimpleFilter(FieldKey.fromString("runId"), runId, CompareType.EQUAL);
175175
filter.addCondition(FieldKey.fromString("dataId"), o.getDataId(), CompareType.EQUAL);
176-
filter.addCondition(FieldKey.fromString("category"), o.getCategory(), CompareType.EQUAL);
176+
if (o.getCategory() != null)
177+
{
178+
filter.addCondition(FieldKey.fromString("category"), o.getCategory(), CompareType.EQUAL);
179+
}
180+
177181
filter.addCondition(FieldKey.fromString("name"), o.getName(), CompareType.EQUAL);
178182
TableSelector ts = new TableSelector(ti, filter, null);
179183
if (ts.exists())

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/GxfSorter.java

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import org.apache.commons.io.FileUtils;
44
import org.apache.logging.log4j.Logger;
5-
import org.apache.logging.log4j.LogManager;
65
import org.jetbrains.annotations.Nullable;
76
import org.labkey.api.pipeline.PipelineJobException;
87
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
@@ -42,7 +41,9 @@ else if (SequenceUtil.FILETYPE.gtf.getFileType().isType(input))
4241
public File sortGff(File input, @Nullable File output) throws PipelineJobException
4342
{
4443
File baseDir = output == null ? input.getParentFile() : output.getParentFile();
45-
File outputFile = output == null ? new File(input.getParentFile(), "temp.sorted.gff") : output;
44+
File outputFile = new File(baseDir, "temp.sorted.gtf");
45+
boolean inputIsGzip = input.getPath().toLowerCase().endsWith(".gz");
46+
boolean outputIsGzip = output == null ? inputIsGzip : output.getPath().toLowerCase().endsWith(".gz");
4647

4748
File script = new File(baseDir, "sorter.sh");
4849
try (PrintWriter writer = PrintWriters.getPrintWriter(script))
@@ -53,8 +54,15 @@ public File sortGff(File input, @Nullable File output) throws PipelineJobExcepti
5354
writer.println("GFF=" + input.getPath());
5455
writer.println("OUT_GFF=" + outputFile.getPath());
5556

56-
writer.println("awk '{ if ($1 ~ \"^#\" ) print $0; else exit; }' $GFF > $OUT_GFF");
57-
writer.println("(grep -v '#' $GFF | grep -v \"Parent=\" | sort -V -k1,1 -k4,4n -k5,5n; grep -v '#' $GFF | grep -e \"Parent=\" | sort -V -k1,1 -k4,4n -k5,5n)| sort -V -k1,1 -k4,4n -s >> $OUT_GFF");
57+
String cat = inputIsGzip ? "zcat" : "cat";
58+
writer.println(cat + " $GFF | awk '{ if ($1 ~ \"^#\" ) print $0; else exit; }' > $OUT_GFF");
59+
writer.println("(" + cat + " $GFF | grep -v '#' | grep -v \"Parent=\" | sort -V -k1,1 -k4,4n -k5,5n; " + cat + " $GFF | grep -v '#' | grep -e \"Parent=\" | sort -V -k1,1 -k4,4n -k5,5n)| sort -V -k1,1 -k4,4n -s >> $OUT_GFF");
60+
61+
if (outputIsGzip)
62+
{
63+
writer.println("bgzip -f $OUT_GFF");
64+
outputFile = new File(outputFile.getPath() + ".gz");
65+
}
5866
}
5967
catch (IOException e)
6068
{
@@ -67,7 +75,9 @@ public File sortGff(File input, @Nullable File output) throws PipelineJobExcepti
6775
public File sortGtf(File input, @Nullable File output) throws PipelineJobException
6876
{
6977
File baseDir = output == null ? input.getParentFile() : output.getParentFile();
70-
File outputFile = output == null ? new File(input.getParentFile(), "temp.sorted.gtf") : output;
78+
File outputFile = new File(baseDir, "temp.sorted.gtf");
79+
boolean inputIsGzip = input.getPath().toLowerCase().endsWith(".gz");
80+
boolean outputIsGzip = output == null ? inputIsGzip : output.getPath().toLowerCase().endsWith(".gz");
7181

7282
File script = new File(baseDir, "sorter.sh");
7383
try (PrintWriter writer = PrintWriters.getPrintWriter(script))
@@ -78,8 +88,9 @@ public File sortGtf(File input, @Nullable File output) throws PipelineJobExcepti
7888
writer.println("GTF=" + input.getPath());
7989
writer.println("OUT_GTF=" + outputFile.getPath());
8090

81-
writer.println("awk '{ if ($1 ~ \"^#\" ) print $0; else exit; }' $GTF > $OUT_GTF");
82-
writer.println("cat $GTF | grep -v '#' | awk -v OFS='\\t' ' {");
91+
String cat = inputIsGzip ? "zcat" : "cat";
92+
writer.println(cat + " $GTF | awk '{ if ($1 ~ \"^#\" ) print $0; else exit; }' > $OUT_GTF");
93+
writer.println(cat + " $GTF | grep -v '#' | awk -v OFS='\\t' ' {");
8394
writer.println("so = 3");
8495
writer.println("if (tolower($3) == \"gene\")");
8596
writer.println(" so = 1");
@@ -90,6 +101,12 @@ public File sortGtf(File input, @Nullable File output) throws PipelineJobExcepti
90101
writer.println("else if (tolower($3) == \"cds\")");
91102
writer.println(" so = 4");
92103
writer.println("print so, $0 } ' | sort -V -k2,2 -k5,5n -k1,1n | cut -d$'\\t' -f2- >> $OUT_GTF");
104+
105+
if (outputIsGzip)
106+
{
107+
writer.println("bgzip -f $OUT_GTF");
108+
outputFile = new File(outputFile.getPath() + ".gz");
109+
}
93110
}
94111
catch (IOException e)
95112
{
@@ -125,6 +142,22 @@ private File executeScript(File script, File input, File output, File outputFile
125142
throw new PipelineJobException(e);
126143
}
127144
}
145+
else if (!outputFile.equals(output))
146+
{
147+
try
148+
{
149+
if (output.exists())
150+
{
151+
output.delete();
152+
}
153+
154+
FileUtils.moveFile(outputFile, output);
155+
}
156+
catch (IOException e)
157+
{
158+
throw new PipelineJobException(e);
159+
}
160+
}
128161

129162
return output == null ? input : output;
130163
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/TabixRunner.java

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,22 +48,29 @@ public List<String> getParams(File input)
4848
List<String> params = new ArrayList<>();
4949
params.add(getExe().getPath());
5050
params.add("-f");
51-
params.add("-p");
52-
if (new FileType("gff").isType(input))
51+
52+
String type = null;
53+
if (new FileType(Arrays.asList("gff3", "gff", "gtf"), "gff", FileType.gzSupportLevel.SUPPORT_GZ).isType(input))
5354
{
54-
params.add("gff");
55+
type = "gff";
5556
}
56-
else if (new FileType(Arrays.asList("bed", "bedGraph"), "bed").isType(input))
57+
else if (new FileType(Arrays.asList("bed", "bedGraph"), "bed", FileType.gzSupportLevel.SUPPORT_GZ).isType(input))
5758
{
58-
params.add("bed");
59+
type = "bed";
5960
}
6061
else if (new FileType(Arrays.asList("sam", "bam"), "bam").isType(input))
6162
{
62-
params.add("sam");
63+
type = "sam";
6364
}
6465
else if (new FileType(Arrays.asList("vcf"), "vcf", FileType.gzSupportLevel.SUPPORT_GZ).isType(input))
6566
{
66-
params.add("vcf");
67+
type = "vcf";
68+
}
69+
70+
if (type != null)
71+
{
72+
params.add("-p");
73+
params.add(type);
6774
}
6875

6976
params.add(input.getPath());

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/VariantQCStep.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,13 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
8080
});
8181
}
8282

83+
Integer maxThreads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger());
84+
if (maxThreads != null)
85+
{
86+
options.add("--threads");
87+
options.add(String.valueOf(maxThreads));
88+
}
89+
8390
File outputHtml = new File(outputDirectory, SequencePipelineService.get().getUnzippedBaseName(inputVCF.getName()) + ".variantQC.html");
8491
VariantQCWrapper wrapper = new VariantQCWrapper(getPipelineCtx().getLogger());
8592
wrapper.execute(inputVCF, genome.getWorkingFastaFile(), outputHtml, options);

SequenceAnalysis/src/org/labkey/sequenceanalysis/util/SequenceUtil.java

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
import org.apache.commons.io.FileUtils;
2626
import org.apache.commons.lang3.SystemUtils;
2727
import org.apache.logging.log4j.Logger;
28-
import org.apache.logging.log4j.LogManager;
2928
import org.jetbrains.annotations.Nullable;
3029
import org.json.JSONArray;
3130
import org.json.JSONObject;
@@ -39,6 +38,7 @@
3938
import org.labkey.api.util.FileUtil;
4039
import org.labkey.api.util.StringUtilsLabKey;
4140
import org.labkey.api.writer.PrintWriters;
41+
import org.labkey.sequenceanalysis.run.util.BgzipRunner;
4242
import org.labkey.sequenceanalysis.run.util.BuildBamIndexWrapper;
4343

4444
import java.io.BufferedReader;
@@ -52,6 +52,7 @@
5252
import java.io.Writer;
5353
import java.util.ArrayList;
5454
import java.util.Arrays;
55+
import java.util.Collections;
5556
import java.util.HashSet;
5657
import java.util.List;
5758
import java.util.Set;
@@ -81,10 +82,10 @@ public static enum FILETYPE
8182
fasta(Arrays.asList(".fasta", ".fa", ".fna"), true),
8283
bam(".bam"),
8384
sff(".sff"),
84-
gtf(".gtf"),
85-
gff(Arrays.asList(".gff", ".gff3"), false),
85+
gtf(Collections.singletonList(".gtf"), true),
86+
gff(Arrays.asList(".gff", ".gff3"), true),
8687
gbk(".gbk"),
87-
bed(".bed"),
88+
bed(Collections.singletonList(".bed"), true),
8889
vcf(Arrays.asList(".vcf"), true),
8990
gvcf(Arrays.asList(".g.vcf"), true);
9091

@@ -232,16 +233,28 @@ public static void writeFastaRecord(Writer writer, String header, String sequenc
232233
}
233234
}
234235

235-
@Deprecated
236-
private static void bgzip(File input, File output)
236+
public static File bgzip(File input, Logger log) throws PipelineJobException
237237
{
238-
try (FileInputStream i = new FileInputStream(input); BlockCompressedOutputStream o = new BlockCompressedOutputStream(new FileOutputStream(output), output))
238+
if (SystemUtils.IS_OS_WINDOWS)
239239
{
240-
FileUtil.copyData(i, o);
240+
File output = new File(input.getPath() + ".gz");
241+
try (FileInputStream i = new FileInputStream(input); BlockCompressedOutputStream o = new BlockCompressedOutputStream(new FileOutputStream(output), output))
242+
{
243+
FileUtil.copyData(i, o);
244+
}
245+
catch (IOException e)
246+
{
247+
throw new PipelineJobException(e);
248+
}
249+
250+
// For consistency with bgzip behavior
251+
input.delete();
252+
253+
return output;
241254
}
242-
catch (IOException e)
255+
else
243256
{
244-
throw new RuntimeException(e);
257+
return new BgzipRunner(log).execute(input);
245258
}
246259
}
247260

@@ -336,7 +349,7 @@ public static List<Interval> bedToIntervalList(File input) throws IOException
336349
while (i.hasNext())
337350
{
338351
BEDFeature f = i.next();
339-
ret.add(new Interval(f.getChr(), f.getStart(), f.getEnd()));
352+
ret.add(new Interval(f.getContig(), f.getStart(), f.getEnd()));
340353
}
341354
}
342355
}
@@ -437,18 +450,12 @@ public static void sortROD(File input, Logger log, Integer startColumnIdx) throw
437450

438451
//then sort/append the records
439452
CommandWrapper wrapper = SequencePipelineService.get().getCommandWrapper(log);
440-
wrapper.execute(Arrays.asList("/bin/sh", "-c", "cat '" + input.getPath() + "' | grep -v '^#' | sort -s -V -k1,1f" + (startColumnIdx == null ? "" : " -k" + startColumnIdx + "," + startColumnIdx + "n")), ProcessBuilder.Redirect.appendTo(sorted));
453+
String cat = isCompressed ? "zcat" : "cat";
454+
wrapper.execute(Arrays.asList("/bin/sh", "-c", cat + " '" + input.getPath() + "' | grep -v '^#' | sort -s -V -k1,1f" + (startColumnIdx == null ? "" : " -k" + startColumnIdx + "," + startColumnIdx + "n") + (isCompressed ? " | bgzip -f " : "")), ProcessBuilder.Redirect.appendTo(sorted));
441455

442456
//replace the non-sorted output
443457
input.delete();
444-
if (isCompressed)
445-
{
446-
SequenceUtil.bgzip(sorted, input);
447-
}
448-
else
449-
{
450-
FileUtils.moveFile(sorted, input);
451-
}
458+
FileUtils.moveFile(sorted, input);
452459
sorted.delete();
453460
}
454461

0 commit comments

Comments
 (0)