Skip to content

Commit 3b9d57f

Browse files
authored
Merge pull request #150 from LabKey/fb_merge_22.3_to_develop
Merge 22.3 to develop
2 parents 1ac1e9c + ec353f9 commit 3b9d57f

35 files changed

+818
-186
lines changed

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/AbstractAlignmentStepProvider.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ abstract public class AbstractAlignmentStepProvider<StepType extends AlignmentSt
3333
public static String ALIGNMENT_MODE_PARAM = "alignmentMode";
3434
public static String SUPPORT_MERGED_UNALIGNED = "supportsMergeUnaligned";
3535
public static String COLLECT_WGS_METRICS = "collectWgsMetrics";
36+
public static String CONVERT_TO_CRAM = "convertToCram";
3637
public static String COLLECT_WGS_METRICS_NON_ZERO = "collectWgsMetricsNonZero";
3738
public static String DISCARD_BAM = "discardBam";
38-
public static String SUPPORT_ALIGNMENT_METRICS = "supportAlignmentMetrics";
3939

40-
public static enum ALIGNMENT_MODE
40+
public enum ALIGNMENT_MODE
4141
{
4242
ALIGN_THEN_MERGE(),
4343
MERGE_THEN_ALIGN();

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenome.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ public interface ReferenceGenome extends Serializable
4646
*/
4747
public @NotNull File getWorkingFastaFile();
4848

49+
/**
50+
* @return This is the file that should typically be used by callers. The pipeline code usually copies this file to the local working directory.
51+
* If this has occurred, that file will preferentially be used. Otherwise, the source FASTA file will be returned.
52+
*/
53+
public @NotNull File getWorkingFastaFileGzipped();
54+
4955
public void setWorkingFasta(File workingFasta);
5056

5157
/**
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
package org.labkey.api.sequenceanalysis.pipeline;
2+
3+
import org.apache.logging.log4j.Logger;
4+
import org.jetbrains.annotations.Nullable;
5+
import org.labkey.api.pipeline.PipelineJobException;
6+
7+
import java.io.File;
8+
import java.util.ArrayList;
9+
import java.util.List;
10+
11+
/**
12+
* Created by bimber on 11/4/2016.
13+
*/
14+
public class SamtoolsCramConverter extends SamtoolsRunner
15+
{
16+
public SamtoolsCramConverter(Logger log)
17+
{
18+
super(log);
19+
}
20+
21+
public File convert(File inputBam, File outputCram, File gzippedFasta, boolean doIndex, @Nullable Integer threads) throws PipelineJobException
22+
{
23+
getLogger().info("Converting SAM/BAM to CRAM: " + inputBam.getPath());
24+
25+
List<String> params = new ArrayList<>();
26+
params.add(getSamtoolsPath().getPath());
27+
params.add("view");
28+
29+
params.add("-C");
30+
31+
params.add("-o");
32+
params.add(outputCram.getPath());
33+
34+
params.add("-T");
35+
params.add(gzippedFasta.getPath());
36+
37+
if (threads != null)
38+
{
39+
params.add("--threads");
40+
params.add(String.valueOf(threads));
41+
}
42+
43+
params.add(inputBam.getPath());
44+
45+
execute(params);
46+
47+
if (doIndex)
48+
{
49+
doIndex(outputCram, threads);
50+
}
51+
52+
return outputCram;
53+
}
54+
55+
private void doIndex(File input, @Nullable Integer threads) throws PipelineJobException
56+
{
57+
List<String> params = new ArrayList<>();
58+
params.add(getSamtoolsPath().getPath());
59+
params.add("index");
60+
61+
if (threads != null)
62+
{
63+
params.add("--threads");
64+
params.add(String.valueOf(threads));
65+
}
66+
67+
params.add(input.getPath());
68+
execute(params);
69+
70+
File idx = new File(input.getPath() + ".crai");
71+
if (!idx.exists())
72+
{
73+
throw new PipelineJobException("Unable to find CRAM index: " + idx.getPath());
74+
}
75+
}
76+
}

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DISCVRSeqRunner.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ protected File getJar()
3939

4040
}
4141

42-
protected List<String> getBaseArgs(String toolName)
42+
public List<String> getBaseArgs(String toolName)
4343
{
4444
List<String> args = new ArrayList<>();
4545
args.add(SequencePipelineService.get().getJava8FilePath());

SequenceAnalysis/resources/web/SequenceAnalysis/panel/SequenceImportPanel.js

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1670,7 +1670,7 @@ Ext4.define('SequenceAnalysis.panel.SequenceImportPanel', {
16701670
schemaName: 'sequenceanalysis',
16711671
queryName: 'sequence_readsets',
16721672
filterArray: [LABKEY.Filter.create('rowid', readsetIds.join(';'), LABKEY.Filter.Types.IN)],
1673-
columns: 'rowid,name,platform,application,chemistry,concentration,fragmentSize,librarytype,sampletype,subjectid,sampledate,sampleid,comments,barcode5,barcode3,instrument_run_id,totalFiles',
1673+
columns: 'rowid,name,platform,application,chemistry,concentration,fragmentSize,librarytype,sampletype,subjectid,sampledate,sampleid,comments,barcode5,barcode3,instrument_run_id,totalFiles,status',
16741674
scope: this,
16751675
failure: LDK.Utils.getErrorCallback(),
16761676
success: function(results){
@@ -1699,6 +1699,13 @@ Ext4.define('SequenceAnalysis.panel.SequenceImportPanel', {
16991699
showBarcodes = true;
17001700
}
17011701

1702+
if (row.totalFiles && row.status === 'Replaced'){
1703+
msgs.push('Readset ' + row.rowid + ' has been marked Replaced, but you are attempting to append data to it. This is probably an error.');
1704+
Ext4.Array.forEach(records, function(record) {
1705+
record.data.readset = null;
1706+
}, this);
1707+
}
1708+
17021709
//update row based on saved readset. avoid firing event
17031710
Ext4.Array.forEach(records, function(record) {
17041711
var importType = !record.data.readset ? null : row.totalFiles ? 'Merge With Existing' : 'New Data';

SequenceAnalysis/src/org/labkey/sequenceanalysis/FileGroup.java

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,15 @@
11
package org.labkey.sequenceanalysis;
22

3-
import org.json.JSONObject;
4-
import org.labkey.api.util.Pair;
5-
63
import java.io.File;
74
import java.io.Serializable;
85
import java.util.ArrayList;
96
import java.util.Arrays;
10-
import java.util.HashMap;
7+
import java.util.Comparator;
118
import java.util.HashSet;
129
import java.util.List;
1310
import java.util.Map;
1411
import java.util.Set;
12+
import java.util.TreeMap;
1513

1614
/**
1715
* Created by bimber on 2/19/2015.
@@ -35,7 +33,7 @@ public Set<String> getPlatformUnits()
3533
public List<List<FilePair>> groupByPlatformUnit()
3634
{
3735
List<List<FilePair>> ret = new ArrayList<>();
38-
Map<String, List<FilePair>> grouped = new HashMap<>();
36+
Map<String, List<FilePair>> grouped = new TreeMap<>();
3937

4038
for (FilePair pair : filePairs)
4139
{
@@ -47,13 +45,18 @@ public List<List<FilePair>> groupByPlatformUnit()
4745
{
4846
if (!grouped.containsKey(pair.platformUnit))
4947
{
50-
grouped.put(pair.platformUnit, new ArrayList<FilePair>());
48+
grouped.put(pair.platformUnit, new ArrayList<>());
5149
}
5250

5351
grouped.get(pair.platformUnit).add(pair);
5452
}
5553
}
5654

55+
for (String key : grouped.keySet())
56+
{
57+
grouped.get(key).sort(Comparator.comparing(filePair -> filePair.file1));
58+
}
59+
5760
ret.addAll(grouped.values());
5861

5962
return ret;

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisController.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3690,6 +3690,7 @@ public void export(ExportSequenceFilesForm form, HttpServletResponse response, B
36903690

36913691
Set<File> files = new HashSet<>();
36923692
FileType bamFileType = new FileType("bam");
3693+
FileType cramFileType = new FileType("cram");
36933694
FileType fastaFileType = new FileType("fasta", FileType.gzSupportLevel.SUPPORT_GZ);
36943695
FileType gzFileType = new FileType("gz");
36953696
for (int id : form.getDataIds())
@@ -3710,6 +3711,14 @@ public void export(ExportSequenceFilesForm form, HttpServletResponse response, B
37103711
files.add(index);
37113712
}
37123713
}
3714+
if (cramFileType.isType(data.getFile()))
3715+
{
3716+
File index = new File(data.getFile() + ".crai");
3717+
if (index.exists())
3718+
{
3719+
files.add(index);
3720+
}
3721+
}
37133722
else if (fastaFileType.isType(data.getFile()))
37143723
{
37153724
File index = new File(data.getFile() + ".fai");

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.labkey.api.exp.api.ExperimentService;
1313
import org.labkey.api.ldk.LDKService;
1414
import org.labkey.api.pipeline.PipeRoot;
15+
import org.labkey.api.pipeline.PipelineJobException;
1516
import org.labkey.api.pipeline.PipelineService;
1617
import org.labkey.api.query.FieldKey;
1718
import org.labkey.api.security.User;
@@ -25,10 +26,13 @@
2526
import org.labkey.api.util.SystemMaintenance.MaintenanceTask;
2627
import org.labkey.sequenceanalysis.model.AnalysisModelImpl;
2728
import org.labkey.sequenceanalysis.pipeline.CacheGenomeTrigger;
29+
import org.labkey.sequenceanalysis.pipeline.ReferenceGenomeImpl;
30+
import org.labkey.sequenceanalysis.run.util.FastaIndexer;
2831

2932
import java.io.File;
3033
import java.io.IOException;
3134
import java.util.ArrayList;
35+
import java.util.Arrays;
3236
import java.util.Collections;
3337
import java.util.HashMap;
3438
import java.util.HashSet;
@@ -206,7 +210,7 @@ else if (!d.getFile().exists())
206210
}
207211
}
208212

209-
private void processContainer(Container c, Logger log) throws IOException
213+
private void processContainer(Container c, Logger log) throws IOException, PipelineJobException
210214
{
211215
PipeRoot root = PipelineService.get().getPipelineRootSetting(c);
212216
if (root != null && !root.isCloudRoot())
@@ -283,7 +287,8 @@ private void processContainer(Container c, Logger log) throws IOException
283287
{
284288
//inspect within library
285289
List<String> expectedChildren = new ArrayList<>();
286-
Integer fastaId = new TableSelector(SequenceAnalysisSchema.getInstance().getSchema().getTable(SequenceAnalysisSchema.TABLE_REF_LIBRARIES), PageFlowUtil.set("fasta_file")).getObject(Integer.parseInt(child.getName()), Integer.class);
290+
int libraryId = Integer.parseInt(child.getName());
291+
Integer fastaId = new TableSelector(SequenceAnalysisSchema.getInstance().getSchema().getTable(SequenceAnalysisSchema.TABLE_REF_LIBRARIES), PageFlowUtil.set("fasta_file")).getObject(libraryId, Integer.class);
287292
if (fastaId == null)
288293
{
289294
log.error("Unable to find FASTA ExpData in DB matching jbrowse directory: " + child.getPath());
@@ -297,6 +302,24 @@ private void processContainer(Container c, Logger log) throws IOException
297302
log.error("expected fasta file does not exist: " + fasta.getPath());
298303
}
299304

305+
// Use this to retroactively convert existing genomes:
306+
File gz = new File(fasta.getPath() + ".gz");
307+
if (!gz.exists())
308+
{
309+
ReferenceGenomeImpl genome = new ReferenceGenomeImpl(fasta, fastaData, libraryId, null);
310+
genome.createGzippedFile(log);
311+
}
312+
313+
File gzi = new File(fasta.getPath() + ".gz.gzi");
314+
if (!gzi.exists())
315+
{
316+
new FastaIndexer(log).execute(gz);
317+
}
318+
319+
expectedChildren.add(fasta.getName() + ".gz");
320+
expectedChildren.add(fasta.getName() + ".gz.gzi");
321+
expectedChildren.add(fasta.getName() + ".gz.fai");
322+
300323
expectedChildren.add(fasta.getName());
301324
expectedChildren.add(fasta.getName() + ".fai");
302325
expectedChildren.add(FileUtil.getBaseName(fasta.getName()) + ".idKey.txt");
@@ -320,8 +343,6 @@ private void processContainer(Container c, Logger log) throws IOException
320343
}
321344
}
322345

323-
Integer libraryId = Integer.parseInt(child.getName());
324-
325346
//check/verify tracks
326347
File trackDir = new File(child, "tracks");
327348
if (trackDir.exists())
@@ -459,8 +480,10 @@ private void deleteFile(File f, Logger log) throws IOException
459480
}
460481

461482
private static FileType _bamFileType = new FileType("bam");
483+
private static FileType _cramFileType = new FileType("cram");
462484
private static FileType _vcfFileType = new FileType("vcf", FileType.gzSupportLevel.SUPPORT_GZ);
463485
private static FileType _bedFileType = new FileType("bed");
486+
private static FileType _fastaFileType = new FileType(Arrays.asList("fasta", "fa"), "fasta", FileType.gzSupportLevel.SUPPORT_GZ);
464487

465488
/**
466489
* This is intended to return any files associated with an input, which is primarily designed to pick up index files
@@ -474,6 +497,10 @@ public static List<String> getAssociatedFiles(File f, boolean includeGz)
474497
{
475498
ret.add(f.getName() + ".bai");
476499
}
500+
else if (_cramFileType.isType(f))
501+
{
502+
ret.add(f.getName() + ".crai");
503+
}
477504
else if (_vcfFileType.isType(f))
478505
{
479506
ret.add(f.getName() + ".tbi");
@@ -491,6 +518,13 @@ else if (_bedFileType.isType(f))
491518
{
492519
ret.add(f.getName() + ".idx");
493520
}
521+
else if (_fastaFileType.isType(f))
522+
{
523+
ret.add(f.getName() + ".fai");
524+
ret.add(f.getName() + ".gz");
525+
ret.add(f.getName() + ".gz.gzi");
526+
ret.add(f.getName() + ".gz.fai");
527+
}
494528

495529
return ret;
496530
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
import org.labkey.sequenceanalysis.pipeline.AlignmentImportJob;
6868
import org.labkey.sequenceanalysis.pipeline.CacheGenomePipelineJob;
6969
import org.labkey.sequenceanalysis.pipeline.CacheGenomeTrigger;
70+
import org.labkey.sequenceanalysis.pipeline.ConvertToCramHandler;
7071
import org.labkey.sequenceanalysis.pipeline.IlluminaImportJob;
7172
import org.labkey.sequenceanalysis.pipeline.ImportFastaSequencesPipelineJob;
7273
import org.labkey.sequenceanalysis.pipeline.ImportGenomeTrackPipelineJob;
@@ -361,6 +362,7 @@ public static void registerPipelineSteps()
361362
SequenceAnalysisService.get().registerFileHandler(new MergeLoFreqVcfHandler());
362363
SequenceAnalysisService.get().registerFileHandler(new PangolinHandler());
363364
SequenceAnalysisService.get().registerFileHandler(new NextCladeHandler());
365+
SequenceAnalysisService.get().registerFileHandler(new ConvertToCramHandler());
364366

365367
SequenceAnalysisService.get().registerReadsetHandler(new MultiQCHandler());
366368
SequenceAnalysisService.get().registerReadsetHandler(new RestoreSraDataHandler());

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceIntegrationTests.java

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
import org.apache.commons.io.FileUtils;
1414
import org.apache.commons.io.filefilter.IOFileFilter;
1515
import org.apache.commons.lang3.StringUtils;
16-
import org.apache.logging.log4j.Logger;
1716
import org.apache.logging.log4j.LogManager;
17+
import org.apache.logging.log4j.Logger;
1818
import org.json.JSONArray;
1919
import org.json.JSONObject;
2020
import org.junit.AfterClass;
@@ -88,6 +88,7 @@
8888
import java.util.List;
8989
import java.util.Map;
9090
import java.util.Set;
91+
import java.util.stream.Collectors;
9192

9293
/**
9394
* User: bimber
@@ -923,8 +924,8 @@ public int compare(PipelineJob o1, PipelineJob o2)
923924

924925
verifyJob(basedir, jobName, expectedOutputs, new String[]{PAIRED_FILENAME_L1a, PAIRED_FILENAME2_L1a, PAIRED_FILENAME_L1b, PAIRED_FILENAME2_L1b, PAIRED_FILENAME_L2, PAIRED_FILENAME2_L2}, prefix, config);
925926

926-
Assert.assertEquals("Incorrect read number", 633L, FastqUtils.getSequenceCount(merge1));
927-
Assert.assertEquals("Incorrect read number", 633L, FastqUtils.getSequenceCount(merge2));
927+
Assert.assertEquals("Incorrect read number", 422L, FastqUtils.getSequenceCount(merge1));
928+
Assert.assertEquals("Incorrect read number", 422L, FastqUtils.getSequenceCount(merge2));
928929

929930
//job2: g2
930931
expectedOutputs = new HashSet<>();
@@ -1337,7 +1338,12 @@ private void validateSamples(SequenceReadsetImpl[] models, JSONObject config)
13371338
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
13381339
Assert.assertEquals("Incorrect sampleDate", o.getString("sampledate"), m.getSampleDate() == null ? null : format.format(m.getSampleDate()));
13391340

1340-
//TODO: readData
1341+
String fileGroup = o.getString("fileGroupId");
1342+
List<String> keys = config.keySet().stream().filter(x -> x.startsWith("fileGroup_")).filter(x -> fileGroup.equals(new JSONObject(config.getString(x)).getString("name"))).collect(Collectors.toList());
1343+
Set<String> platformUnits = keys.stream().map(x -> new JSONObject(config.getString(x)).getJSONArray("files").toJSONObjectArray()).flatMap(Arrays::stream).map(y -> y.getString("platformUnit") == null ? y.getString("file1") : y.getString("platformUnit")).collect(Collectors.toSet());
1344+
Assert.assertFalse("No matching readdata", platformUnits.isEmpty());
1345+
1346+
Assert.assertEquals("Incorrect number of readdata", m.getReadData().size(), platformUnits.size());
13411347
}
13421348
}
13431349

0 commit comments

Comments
 (0)