Skip to content

Commit e5093b8

Browse files
authored
Merge pull request #83 from LabKey/fb_merge_discvr-20.11
Merge discvr-20.11 to develop
2 parents 4b3f7c4 + 1fe7ab2 commit e5093b8

29 files changed

+792
-447
lines changed

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/ReferenceGenomeManager.java

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package org.labkey.api.sequenceanalysis.pipeline;
22

3-
import com.google.common.io.Files;
3+
import org.apache.commons.io.FileUtils;
4+
import org.apache.commons.lang3.SystemUtils;
45
import org.apache.logging.log4j.Logger;
56
import org.labkey.api.pipeline.PipelineJobException;
67
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
@@ -60,11 +61,32 @@ private boolean isUpToDate(ReferenceGenome genome)
6061
return lastSync >= lastUpdated;
6162
}
6263

63-
public void markGenomeModified(ReferenceGenome genome, Logger log) throws IOException
64+
public void markGenomeModified(ReferenceGenome genome, Logger log) throws PipelineJobException
6465
{
6566
File toUpdate = getLocalUpdateFile(genome);
6667
log.info("Marking genome as modified: " + toUpdate.getPath());
67-
Files.touch(toUpdate);
68+
touchFile(toUpdate, log);
69+
}
70+
71+
//NOTE: Java implementations of touch are erroring between the cluster and NFS filesystem
72+
private void touchFile(File target, Logger log) throws PipelineJobException
73+
{
74+
if (SystemUtils.IS_OS_WINDOWS)
75+
{
76+
try
77+
{
78+
FileUtils.touch(target);
79+
}
80+
catch (IOException e)
81+
{
82+
throw new PipelineJobException(e);
83+
}
84+
}
85+
else
86+
{
87+
SimpleScriptWrapper wrapper = new SimpleScriptWrapper(log);
88+
wrapper.execute(Arrays.asList("/bin/bash", "-c", "$(which touch) '" + target.getPath() + "'"));
89+
}
6890
}
6991

7092
public void cacheGenomeLocally(ReferenceGenome genome, Logger log) throws PipelineJobException
@@ -80,37 +102,32 @@ public void cacheGenomeLocally(ReferenceGenome genome, Logger log) throws Pipeli
80102
return;
81103
}
82104

105+
File localCacheDir = SequencePipelineService.get().getRemoteGenomeCacheDirectory();
83106
if (isUpToDate(genome))
84107
{
85108
log.debug("Genome up-to-date, will not repeat rsync");
109+
genome.setWorkingFasta(new File(new File(localCacheDir, genome.getGenomeId().toString()), genome.getSourceFastaFile().getName()));
110+
86111
return;
87112
}
88113

89-
File localCacheDir = SequencePipelineService.get().getRemoteGenomeCacheDirectory();
90114
log.info("attempting to rsync genome to local disks: " + localCacheDir.getPath());
91115

92116
File sourceDir = genome.getSourceFastaFile().getParentFile();
93117

94118
//Note: neither source nor dest have trailing slashes, so the entire source (i.e '128', gets synced into a subdir of dest)
95119
new SimpleScriptWrapper(log).execute(Arrays.asList(
96-
"rsync", "-r", "-a", "--delete", "--delete-excluded", "--no-owner", "--no-group", sourceDir.getPath(), localCacheDir.getPath()
120+
"rsync", "-r", "-a", "--delete", "--no-owner", "--no-group", sourceDir.getPath(), localCacheDir.getPath()
97121
));
98122

99-
try
100-
{
101-
File lastUpdate = getLocalUpdateFile(genome);
102-
if (!lastUpdate.exists())
103-
{
104-
Files.touch(lastUpdate);
105-
}
106-
107-
Files.touch(getRemoteSyncFile(genome.getGenomeId()));
108-
}
109-
catch (IOException e)
123+
File lastUpdate = getLocalUpdateFile(genome);
124+
if (!lastUpdate.exists())
110125
{
111-
throw new PipelineJobException(e);
126+
touchFile(lastUpdate, log);
112127
}
113128

129+
touchFile(getRemoteSyncFile(genome.getGenomeId()), log);
130+
114131
genome.setWorkingFasta(new File(new File(localCacheDir, genome.getGenomeId().toString()), genome.getSourceFastaFile().getName()));
115132
}
116133
}

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/RCommandWrapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ private String inferRPath(Logger log)
7070
//preferentially use R config setup in scripting props. only works if running locally.
7171
if (PipelineJobService.get().getLocationType() == PipelineJobService.LocationType.WebServer)
7272
{
73-
LabKeyScriptEngineManager svc = LabKeyScriptEngineManager.get();
73+
LabKeyScriptEngineManager svc = ServiceRegistry.get().getService(LabKeyScriptEngineManager.class);
7474
for (ExternalScriptEngineDefinition def : svc.getEngineDefinitions())
7575
{
7676
if (RScriptEngineFactory.isRScriptEngine(def.getExtensions()))

SequenceAnalysis/resources/external/scRNAseq/htoClassifier.Rmd

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
---
22
title: 'Cell Hashing'
3+
output: html_document
4+
35
---
46

57
```{r setup}
68
79
library(OOSAP)
810
11+
knitr::opts_chunk$set(message=FALSE, warning=FALSE, echo=TRUE, error = TRUE)
12+
913
cores <- Sys.getenv('SEQUENCEANALYSIS_MAX_THREADS')
1014
if (cores != ''){
1115
print(paste0('Setting threads to ', cores))
@@ -15,6 +19,15 @@ if (cores != ''){
1519
print('SEQUENCEANALYSIS_MAX_THREADS not set, will not set cores')
1620
}
1721
22+
print('Global variables: ')
23+
for (v in c('doHtoFilter', 'maxValueForColSumFilter', 'useSeurat', 'useMultiSeq', 'whitelistFile')){
24+
if (exists(v)){
25+
print(paste0(v, ': ', get(v)))
26+
} else {
27+
print(paste0(v, ': not defined'))
28+
}
29+
}
30+
1831
```
1932

2033
## Basic QC and Filtering on input:

SequenceAnalysis/resources/external/scRNAseq/htoClassifier.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,6 @@ fi
3636

3737
sudo $DOCKER pull bimberlab/oosap
3838

39-
sudo $DOCKER run --rm=true $RAM_OPTS $ENV_OPTS -v "${WD}:/work" -v "${HOME}:/homeDir" -u $UID -e USERID=$UID -w /work -e HOME=/homeDir bimberlab/oosap Rscript -e "barcodeDir <- '"${CITESEQ_COUNT_DIR}"';finalCallFile <- '"${FINAL_CALLS}"';doHtoFilter <- "${DO_HTO_FILTER}";maxValueForColSumFilter <- "${MIN_READS_PER_CELL}";allCallsOutFile <- '"${RAW_CALLS}"';metricsFile <- '"${METRICS_FILE}"';useSeurat <- ${USE_SEURAT};useMultiSeq <- ${USE_MULTISEQ};"${WHITELIST}"rmarkdown::render('htoClassifier.Rmd', output_file = '"${HTML_FILE}"')"
39+
sudo $DOCKER run --rm=true $RAM_OPTS $ENV_OPTS -v "${WD}:/work" -v "${HOME}:/homeDir" -u $UID -e USERID=$UID -w /work -e HOME=/homeDir bimberlab/oosap Rscript -e "barcodeDir <- '"${CITESEQ_COUNT_DIR}"';finalCallFile <- '"${FINAL_CALLS}"';doHtoFilter <- "${DO_HTO_FILTER}";maxValueForColSumFilter <- "${MIN_READS_PER_CELL}";allCallsOutFile <- '"${RAW_CALLS}"';metricsFile <- '"${METRICS_FILE}"';useSeurat <- ${USE_SEURAT};useMultiSeq <- ${USE_MULTISEQ};"${WHITELIST}"rmarkdown::render('htoClassifier.Rmd', clean=TRUE, output_format = 'html_document', output_file = '"${HTML_FILE}"');print('Rmarkdown complete');"
40+
41+
echo 'script complete'

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisMaintenanceTask.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,7 @@ private void processContainer(Container c, Logger log) throws IOException
305305
expectedChildren.add("alignerIndexes");
306306
expectedChildren.add("tracks");
307307
expectedChildren.add("chainFiles");
308+
expectedChildren.add(".lastUpdate");
308309

309310
for (String fileName : child.list())
310311
{

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisManager.java

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import org.labkey.api.query.FieldKey;
5656
import org.labkey.api.query.InvalidKeyException;
5757
import org.labkey.api.query.QueryService;
58+
import org.labkey.api.query.QueryUpdateService;
5859
import org.labkey.api.query.QueryUpdateServiceException;
5960
import org.labkey.api.query.UserSchema;
6061
import org.labkey.api.reader.FastaDataLoader;
@@ -448,6 +449,8 @@ private void deleteRefNtSequence(User user, Container container, List<Integer> r
448449

449450
try (DbScope.Transaction transaction = s.getSchema().getScope().ensureTransaction())
450451
{
452+
List<Map<String, Object>> toDeleteQus = new ArrayList<>();
453+
451454
for (int rowId : rowIds)
452455
{
453456
//first data from analyses
@@ -487,13 +490,28 @@ private void deleteRefNtSequence(User user, Container container, List<Integer> r
487490
{
488491
Map<String, Object> map = new CaseInsensitiveHashMap<>();
489492
map.put("rowid", rowId);
490-
List<Map<String, Object>> toDelete = Arrays.asList(map);
493+
toDeleteQus.add(map);
494+
}
495+
}
491496

492-
Map<String, Object> scriptContext = new HashMap<>();
493-
scriptContext.put("deleteFromServer", true); //a flag to make the trigger script accept this
494-
us.getTable(SequenceAnalysisSchema.TABLE_REF_NT_SEQUENCES).getUpdateService().deleteRows(user, container, toDelete, null, scriptContext);
497+
if (!toDeleteQus.isEmpty())
498+
{
499+
Map<String, Object> scriptContext = new HashMap<>();
500+
scriptContext.put("deleteFromServer", true); //a flag to make the trigger script accept this
501+
502+
QueryUpdateService qus = us.getTable(SequenceAnalysisSchema.TABLE_REF_NT_SEQUENCES).getUpdateService();
503+
504+
int batchSize = 2500;
505+
int numBatches = (int)Math.ceil(toDeleteQus.size() / (double)batchSize);
506+
507+
for (int i = 0; i < numBatches; i++)
508+
{
509+
int start = i * batchSize;
510+
List<Map<String, Object>> subset = toDeleteQus.subList(start, Math.min(toDeleteQus.size(), start + batchSize));
511+
qus.deleteRows(user, container, subset, null, scriptContext);
495512
}
496513
}
514+
497515
transaction.commit();
498516
}
499517
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@
104104
import org.labkey.sequenceanalysis.run.bampostprocessing.RnaSeQCStep;
105105
import org.labkey.sequenceanalysis.run.bampostprocessing.SortSamStep;
106106
import org.labkey.sequenceanalysis.run.bampostprocessing.SplitNCigarReadsStep;
107+
import org.labkey.sequenceanalysis.run.preprocessing.CutadaptCropWrapper;
107108
import org.labkey.sequenceanalysis.run.preprocessing.CutadaptWrapper;
108109
import org.labkey.sequenceanalysis.run.preprocessing.DownsampleFastqWrapper;
109110
import org.labkey.sequenceanalysis.run.preprocessing.FastqcProcessingStep;
@@ -235,6 +236,7 @@ public static void registerPipelineSteps()
235236
SequencePipelineService.get().registerPipelineStep(new TrimmomaticWrapper.AvgQualProvider());
236237
SequencePipelineService.get().registerPipelineStep(new CutadaptWrapper.Provider());
237238
SequencePipelineService.get().registerPipelineStep(new FastqcProcessingStep.Provider());
239+
SequencePipelineService.get().registerPipelineStep(new CutadaptCropWrapper.Provider());
238240
//SequencePipelineService.get().registerPipelineStep(new BlastFilterPipelineStep.Provider());
239241

240242
//ref library

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
import htsjdk.samtools.ValidationStringency;
55
import org.apache.commons.io.FilenameUtils;
66
import org.apache.commons.lang3.StringUtils;
7-
import org.apache.logging.log4j.Logger;
87
import org.apache.logging.log4j.LogManager;
8+
import org.apache.logging.log4j.Logger;
99
import org.jetbrains.annotations.Nullable;
1010
import org.labkey.api.pipeline.PipelineJob;
1111
import org.labkey.api.pipeline.PipelineJobException;
@@ -426,7 +426,7 @@ public String inferRPath(Logger log)
426426
//preferentially use R config setup in scripting props. only works if running locally.
427427
if (PipelineJobService.get().getLocationType() == PipelineJobService.LocationType.WebServer)
428428
{
429-
LabKeyScriptEngineManager svc = LabKeyScriptEngineManager.get();
429+
LabKeyScriptEngineManager svc = ServiceRegistry.get().getService(LabKeyScriptEngineManager.class);
430430
for (ExternalScriptEngineDefinition def : svc.getEngineDefinitions())
431431
{
432432
if (RScriptEngineFactory.isRScriptEngine(def.getExtensions()))

SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/CellHashingHandler.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ public static List<ToolParameterDescriptor> getDefaultParams(boolean allowScanni
106106
}
107107

108108
ret.addAll(Arrays.asList(
109-
ToolParameterDescriptor.create("editDistance", "Edit Distance", null, "ldk-integerfield", null, 3),
109+
ToolParameterDescriptor.create("editDistance", "Edit Distance", null, "ldk-integerfield", null, 2),
110110
ToolParameterDescriptor.create("excludeFailedcDNA", "Exclude Failed cDNA", "If selected, cDNAs with non-blank status fields will be omitted", "checkbox", null, true),
111111
ToolParameterDescriptor.create("minCountPerCell", "Min Reads/Cell", null, "ldk-integerfield", null, 5),
112112
ToolParameterDescriptor.createCommandLineParam(CommandLineParam.create("-cells"), "cells", "Expected Cells", null, "ldk-integerfield", null, 20000),

0 commit comments

Comments
 (0)