Skip to content

Commit 1907c45

Browse files
committed
Merge branch 'discvr-21.3' into dev
2 parents 11bd758 + e16aab0 commit 1907c45

File tree

14 files changed

+1488
-357
lines changed

14 files changed

+1488
-357
lines changed

.gitignore

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@
55
/travisSettings.sh
66

77
# Created by npm install
8-
/jbrowse/jb_run.js
9-
/jbrowse/jb_setup.js
10-
/jbrowse/.gradle
8+
jbrowse/jb_run.js
9+
jbrowse/jb_setup.js
10+
jbrowse/.gradle
11+
jbrowse/resources/external/jb-cli/*
1112

1213
# Ignore labkey-npm build artifacts
1314
**/gen/**

SequenceAnalysis/pipeline_code/sequence_tools_install.sh

Lines changed: 7 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -276,13 +276,14 @@ else
276276
echo "Already installed"
277277
fi
278278

279+
279280
#
280-
# GATK
281+
# GATK3
281282
#
282283
echo ""
283284
echo ""
284285
echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
285-
echo "Install GATK"
286+
echo "Install GATK3"
286287
echo ""
287288
cd $LKSRC_DIR
288289

@@ -319,21 +320,6 @@ then
319320
cd ../../../
320321
cp gatk-protected/public/VectorPairHMM/target/libVectorLoglessPairHMM.so ${LKTOOLS_DIR}
321322

322-
#this is a custom extension: https://github.com/biodev/HTCondor_drivers
323-
#git clone https://github.com/biodev/HTCondor_drivers.git
324-
#mkdir ./gatk-protected/public/gatk-queue/src/main/scala/org/broadinstitute/gatk/queue/engine/condor
325-
#cp ./HTCondor_drivers/Queue/CondorJob* ./gatk-protected/public/gatk-queue/src/main/scala/org/broadinstitute/gatk/queue/engine/condor/
326-
327-
#another, for MV checking
328-
mkdir -p ${LK_HOME}/svn/trunk/pipeline_code/
329-
svn co --no-auth-cache https://github.com/BimberLab/DiscvrLabkeyModules/trunk/SequenceAnalysis/pipeline_code/gatk ${LK_HOME}/svn/trunk/pipeline_code/gatk/
330-
331-
mv ${LK_HOME}/svn/trunk/pipeline_code/gatk/MendelianViolationCount.java ./gatk-protected/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/
332-
mv ${LK_HOME}/svn/trunk/pipeline_code/gatk/MendelianViolationBySample.java ./gatk-protected/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/
333-
mv ${LK_HOME}/svn/trunk/pipeline_code/gatk/GenotypeConcordance.java ./gatk-protected/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/
334-
mv ${LK_HOME}/svn/trunk/pipeline_code/gatk/GenotypeConcordanceBySite.java ./gatk-protected/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/
335-
mv ${LK_HOME}/svn/trunk/pipeline_code/gatk/MinorAlleleFrequency.java ./gatk-protected/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/
336-
337323
cd gatk-protected
338324

339325
#remove due to compilation / dependency resolution error
@@ -348,35 +334,14 @@ then
348334
cp ./protected/gatk-queue-package-distribution/target/gatk-queue-package-distribution-3.7.jar ${LKTOOLS_DIR}/Queue.jar
349335
fi
350336

337+
351338
if [[ ! -e ${LKTOOLS_DIR}/DISCVRSeq.jar || ! -z $FORCE_REINSTALL ]];
352339
then
353340
rm -Rf DISCVRSeq*
354341
rm -Rf ${LKTOOLS_DIR}/DISCVRSeq.jar
355342

356-
wget $WGET_OPTS https://github.com/BimberLab/DISCVRSeq/releases/download/1.0/DISCVRSeq-1.0.jar
357-
cp DISCVRSeq-1.0.jar ${LKTOOLS_DIR}/DISCVRSeq.jar
358-
fi
359-
360-
if [[ ! -e ${LKTOOLS_DIR}/GenomeAnalysisTK-discvr.jar || ! -z $FORCE_REINSTALL ]];
361-
then
362-
rm -Rf ${LKTOOLS_DIR}/GenomeAnalysisTK-discvr.jar
363-
rm -Rf ${LKSRC_DIR}/gatk-discvr
364-
365-
mkdir -p gatk-discvr
366-
cd gatk-discvr
367-
368-
echo "Downloading GATK from GIT"
369-
git clone git://github.com/bbimber/gatk-protected.git
370-
cd gatk-protected
371-
372-
#remove due to compilation error
373-
rm ./public/external-example/src/main/java/org/mycompany/app/*
374-
rm ./public/external-example/src/test/java/org/mycompany/app/*
375-
376-
mvn verify -U -P\!queue
377-
mvn package -P\!queue
378-
379-
cp ./protected/gatk-package-distribution/target/gatk-package-distribution-3.7.jar ${LKTOOLS_DIR}/GenomeAnalysisTK-discvr.jar
343+
wget $WGET_OPTS https://github.com/BimberLab/DISCVRSeq/releases/download/1.29/DISCVRSeq-1.29.jar
344+
cp DISCVRSeq-1.29.jar ${LKTOOLS_DIR}/DISCVRSeq.jar
380345
fi
381346

382347

@@ -1336,7 +1301,7 @@ fi
13361301
echo ""
13371302
echo ""
13381303
echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
1339-
echo "Installing Trimmomatic"
1304+
echo "Installing lofreq"
13401305
echo ""
13411306
cd $LKSRC_DIR
13421307

@@ -1355,29 +1320,6 @@ else
13551320
fi
13561321

13571322

1358-
#
1359-
#CITE-seq-count
1360-
#
1361-
#
1362-
#echo ""
1363-
#echo ""
1364-
#echo "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
1365-
#echo "Installing CITE-seq-count"
1366-
#echo ""
1367-
#cd $LKSRC_DIR
1368-
#
1369-
#if [[ ! -e ${LKTOOLS_DIR}/CITE-seq-Count || ! -z $FORCE_REINSTALL ]];
1370-
#then
1371-
# rm -Rf ${LKTOOLS_DIR}/CITE-seq*
1372-
# mkdir -p ${LKTOOLS_DIR}/CITE-seq-count-base
1373-
#
1374-
# pip install CITE-seq-Count --upgrade --install-option="--prefix=${LKTOOLS_DIR}/CITE-seq-count-base"
1375-
# mv ${LKTOOLS_DIR}/CITE-seq-count-base/bin/* ${LKTOOLS_DIR}
1376-
#
1377-
#else
1378-
# echo "Already installed"
1379-
#fi
1380-
13811323
if [ ! -z $LK_USER ];
13821324
then
13831325
echo "Setting owner of files to: ${LK_USER}"

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/preprocessing/TagPcrSummaryStep.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ public static class Provider extends AbstractAnalysisStepProvider<TagPcrSummaryS
6969
{
7070
public Provider()
7171
{
72-
super("Tag-PCR", "Tag-PCR Integration Sites", null, "This will produce a table summarizing unique alignments in this BAM. It was originally created to summarize genomic insertions.", Arrays.asList(
72+
super("Tag-PCR", "Map Integration Sites", null, "This will produce a table summarizing unique genome/transgene junctions using a BAM.", Arrays.asList(
7373
ToolParameterDescriptor.create(OUTPUT_GENBANK, "Create Genbank Output", "If selected, this will output a genbank file summarizing amplicons and primers", "checkbox", new JSONObject(){{
7474
put("checked", true);
7575
}}, true),
@@ -349,7 +349,7 @@ public void execute(File bamFile, File referenceFasta, File outputTable, @Nullab
349349
List<String> args = new ArrayList<>();
350350
args.addAll(getBaseArgs());
351351

352-
args.add("TagPcrSummary");
352+
args.add("IntegrationSiteMapper");
353353

354354
args.add("--bam");
355355
args.add(bamFile.getPath());

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/AbstractGenomicsDBImportHandler.java

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ Set<String> getContigsInInputs(List<File> inputVCFs, Logger log) throws Pipeline
301301
return _contigsInInputs;
302302
}
303303

304-
private void copyToLevelFiles(PipelineJob job, File sourceWorkspace, File destinationWorkspace, boolean removeOtherFiles, boolean overwriteExisting) throws IOException
304+
private void copyTopLevelFiles(PipelineJob job, File sourceWorkspace, File destinationWorkspace, boolean removeOtherFiles, boolean overwriteExisting) throws IOException, PipelineJobException
305305
{
306306
job.getLogger().info("Copying top-level files from: " + sourceWorkspace.getPath());
307307
if (removeOtherFiles)
@@ -333,6 +333,28 @@ private void copyToLevelFiles(PipelineJob job, File sourceWorkspace, File destin
333333

334334
FileUtils.copyFile(source, dest);
335335
}
336+
337+
File metaDir = new File(sourceWorkspace, "genomicsdb_meta_dir");
338+
File metaDirDest = new File(sourceWorkspace, "genomicsdb_meta_dir");
339+
if (metaDirDest.exists())
340+
{
341+
if (!overwriteExisting)
342+
{
343+
job.getLogger().debug("workspace file exists, will not overwrite: " + metaDirDest.getPath());
344+
}
345+
else
346+
{
347+
FileUtils.deleteDirectory(metaDirDest);
348+
}
349+
}
350+
351+
if (metaDir.exists() && !metaDirDest.exists())
352+
{
353+
job.getLogger().debug("Copying directory with rsync: " + metaDir.getPath());
354+
new SimpleScriptWrapper(job.getLogger()).execute(Arrays.asList(
355+
"rsync", "-r", "-a", "--delete", "--no-owner", "--no-group", "--no-perms", "--chmod=D2770,F660", metaDir.getPath(), metaDirDest.getParentFile().getPath()
356+
));
357+
}
336358
}
337359

338360
protected File getMarkerFile(File workspace)
@@ -684,7 +706,7 @@ private void copyWorkspace(JobContext ctx, File sourceWorkspace, File destinatio
684706

685707
if (!haveCopiedTopLevelFiles)
686708
{
687-
copyToLevelFiles(ctx.getJob(), sourceWorkspace, destinationWorkspaceFolder, removeExistingTopLevelFiles, overwriteTopLevelFiles);
709+
copyTopLevelFiles(ctx.getJob(), sourceWorkspace, destinationWorkspaceFolder, removeExistingTopLevelFiles, overwriteTopLevelFiles);
688710
haveCopiedTopLevelFiles = true;
689711
}
690712

@@ -707,7 +729,6 @@ private void copyWorkspace(JobContext ctx, File sourceWorkspace, File destinatio
707729
}
708730
else
709731
{
710-
//TODO: dest permissions?
711732
ctx.getLogger().debug("Copying directory with rsync: " + sourceFolder.getPath());
712733
//NOTE: since neither path will end in slashes, rsync to the parent folder should result in the correct placement
713734
new SimpleScriptWrapper(ctx.getLogger()).execute(Arrays.asList(

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/GenotypeGVCFsWrapper.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,11 @@ else if (f.isDirectory() && doneFile.exists())
226226
{
227227
ctx.getLogger().info("Files will be marked for deletion after this step");
228228
toDelete.add(movedFile);
229-
toDelete.add(movedIdx);
229+
if (movedIdx != null)
230+
{
231+
toDelete.add(movedIdx);
232+
}
233+
230234
if (doneFile.exists())
231235
{
232236
toDelete.add(doneFile);

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/VariantAnnotatorWrapper.java

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
package org.labkey.sequenceanalysis.run.util;
22

33
import org.apache.logging.log4j.Logger;
4-
import org.apache.logging.log4j.LogManager;
54
import org.labkey.api.pipeline.PipelineJobException;
6-
import org.labkey.api.sequenceanalysis.run.AbstractGatkWrapper;
5+
import org.labkey.api.sequenceanalysis.run.AbstractDiscvrSeqWrapper;
76

87
import java.io.File;
98
import java.util.ArrayList;
@@ -12,7 +11,7 @@
1211
/**
1312
* Created by bimber on 8/8/2014.
1413
*/
15-
public class VariantAnnotatorWrapper extends AbstractGatkWrapper
14+
public class VariantAnnotatorWrapper extends AbstractDiscvrSeqWrapper
1615
{
1716
public VariantAnnotatorWrapper(Logger log)
1817
{
@@ -21,20 +20,19 @@ public VariantAnnotatorWrapper(Logger log)
2120

2221
public void execute(File referenceFasta, File inputVcf, File outputVcf, List<String> options) throws PipelineJobException
2322
{
24-
getLogger().info("Running GATK VariantAnnotator");
23+
getLogger().info("Running DiscvrVariantAnnotator");
2524

2625
ensureDictionary(referenceFasta);
2726

2827
List<String> args = new ArrayList<>(getBaseArgs());
29-
args.add("-T");
30-
args.add("VariantAnnotator");
28+
args.add("DiscvrVariantAnnotator");
3129
args.add("-R");
3230
args.add(referenceFasta.getPath());
3331

3432
args.add("-V");
3533
args.add(inputVcf.getPath());
3634

37-
args.add("-o");
35+
args.add("-O");
3836
args.add(outputVcf.getPath());
3937

4038
if (options != null)

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/GenotypeConcordanceStep.java

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -100,16 +100,9 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
100100
options.add("-A");
101101
options.add("GenotypeConcordanceBySite");
102102

103-
options.add("-resource:GT_SOURCE");
103+
options.add("-rg");
104104
options.add(refVCF.getPath());
105105

106-
Integer threads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger());
107-
if (threads != null)
108-
{
109-
options.add("-nt");
110-
options.add(String.valueOf(Math.min(threads, 8)));
111-
}
112-
113106
if (intervals != null)
114107
{
115108
intervals.forEach(interval -> {

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/VariantAnnotatorStep.java

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,6 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
8888
options.add("MinorAlleleFrequency");
8989
}
9090

91-
Integer threads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger());
92-
if (threads != null)
93-
{
94-
options.add("-nt");
95-
options.add(String.valueOf(Math.min(threads, 8)));
96-
}
97-
9891
if (intervals != null)
9992
{
10093
intervals.forEach(interval -> {
@@ -103,14 +96,6 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
10396
});
10497
}
10598

106-
//TODO: allow annotation using fields from another VCF:
107-
/**
108-
-resource:indian "$INDIAN_SUBSET" \
109-
-E indian.AF \
110-
-resource:chinese "$CHINESE_SUBSET" \
111-
-E chinese.AF \
112-
*/
113-
11499
getWrapper().execute(genome.getWorkingFastaFile(), inputVCF, outputVcf, options);
115100
if (!outputVcf.exists())
116101
{

0 commit comments

Comments
 (0)