Skip to content

Commit 5a0303b

Browse files
authored
Merge pull request #181 from LabKey/fb_merge_23.7_to_develop
Merge discvr-23.7 to develop
2 parents b502939 + 1d547f0 commit 5a0303b

File tree

10 files changed

+584
-86
lines changed

10 files changed

+584
-86
lines changed

mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, Pi
430430
{
431431
job.getLogger().debug("Merging track: " + trackName);
432432
List<File> toConcat = orderedJobDirs.stream().map(dirName -> {
433-
File f = getOutputVcf(trackName, new File(ctx.getWorkingDirectory(), dirName));
433+
File f = getOutputVcf(trackName, new File(ctx.getSourceDirectory(), dirName));
434434
if (!f.exists())
435435
{
436436
throw new IllegalStateException("Missing file: " + f.getPath());
@@ -469,8 +469,8 @@ public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, Pi
469469
}
470470

471471
job.getLogger().info("Merging novel sites VCF");
472-
List<File> toConcat = orderedScatterOutputs.stream().map(f -> {
473-
f = getNovelSitesOutput(f.getParentFile());
472+
List<File> toConcat = orderedJobDirs.stream().map(dirName -> {
473+
File f = getNovelSitesOutput(new File(ctx.getSourceDirectory(), dirName));
474474
if (!f.exists())
475475
{
476476
throw new IllegalStateException("Missing file: " + f.getPath());
@@ -482,6 +482,11 @@ public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, Pi
482482
return f;
483483
}).toList();
484484

485+
if (toConcat.isEmpty())
486+
{
487+
throw new PipelineJobException("No novel sites VCFs found");
488+
}
489+
485490
String basename = SequenceAnalysisService.get().getUnzippedBaseName(toConcat.get(0).getName());
486491
File combined = new File(ctx.getSourceDirectory(), basename + ".vcf.gz");
487492
File combinedIdx = new File(combined.getPath() + ".tbi");

mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import au.com.bytecode.opencsv.CSVReader;
44
import au.com.bytecode.opencsv.CSVWriter;
5+
import com.google.common.io.Files;
56
import htsjdk.samtools.util.CloseableIterator;
67
import htsjdk.samtools.util.IOUtil;
78
import htsjdk.variant.variantcontext.Allele;
@@ -1024,6 +1025,15 @@ private File getVariantTableName(JobContext ctx, File vcfInput)
10241025

10251026
private void inspectAndSummarizeVcf(JobContext ctx, File vcfInput, GeneToNameTranslator translator, ReferenceGenome genome, boolean generateSummaries) throws PipelineJobException
10261027
{
1028+
File doneFile = new File(ctx.getWorkingDirectory(), "vcfInspect.done");
1029+
ctx.getFileManager().addIntermediateFile(doneFile);
1030+
1031+
if (doneFile.exists())
1032+
{
1033+
ctx.getLogger().info("VCF inspection already done, skipping");
1034+
return;
1035+
}
1036+
10271037
long sitesInspected = 0L;
10281038
long totalVariants = 0L;
10291039
long totalPrivateVariants = 0L;
@@ -1041,6 +1051,7 @@ private void inspectAndSummarizeVcf(JobContext ctx, File vcfInput, GeneToNameTra
10411051

10421052
if (sitesInspected % 1000000 == 0)
10431053
{
1054+
ctx.getJob().setStatus(PipelineJob.TaskStatus.running, "Inspected " + sitesInspected + " variants");
10441055
ctx.getLogger().info("inspected " + sitesInspected + " variants");
10451056
}
10461057

@@ -1182,11 +1193,12 @@ private void inspectAndSummarizeVcf(JobContext ctx, File vcfInput, GeneToNameTra
11821193
if (vc.getAttribute("CLN_SIG") != null)
11831194
{
11841195
List<String> clnSigs = vc.getAttributeAsStringList("CLN_SIG", "");
1185-
if (clnSigs.size() != vc.getAlternateAlleles().size())
1196+
if (clnSigs.size() != vc.getAlleles().size())
11861197
{
1187-
throw new IllegalStateException("CLN_SIG and alt alleles were not the same length: " + vc.toStringWithoutGenotypes());
1198+
throw new IllegalStateException("CLN_SIG and alleles were not the same length: " + vc.toStringWithoutGenotypes());
11881199
}
11891200

1201+
// NOTE: we iterate REF + ALT here:
11901202
List<String> clnDisease = vc.getAttributeAsStringList("CLN_DN", "");
11911203
List<String> clnAlleleIds = vc.getAttributeAsStringList("CLN_ALLELEID", "");
11921204
int i = -1;
@@ -1198,7 +1210,7 @@ private void inspectAndSummarizeVcf(JobContext ctx, File vcfInput, GeneToNameTra
11981210
continue;
11991211
}
12001212

1201-
Allele altAllele = vc.getAlternateAllele(i);
1213+
Allele a = vc.getAlleles().get(i);
12021214

12031215
String[] sigSplit = sigList.split("\\|");
12041216
List<String> diseaseSplit = Arrays.asList(clnDisease.get(i).split("\\|"));
@@ -1214,7 +1226,7 @@ private void inspectAndSummarizeVcf(JobContext ctx, File vcfInput, GeneToNameTra
12141226

12151227
try
12161228
{
1217-
maybeWriteVariantLine(queuedLines, vc, altAllele.getBaseString(), "ClinVar", diseaseSplit.get(j), description, overlappingGenes, omimIds, omimPhenotypes, ctx.getLogger(), "ClinVar:" + clnAlleleIds.get(i));
1229+
maybeWriteVariantLine(queuedLines, vc, a.getBaseString(), "ClinVar", diseaseSplit.get(j), description, overlappingGenes, omimIds, omimPhenotypes, ctx.getLogger(), "ClinVar:" + clnAlleleIds.get(i));
12181230

12191231
}
12201232
catch (IndexOutOfBoundsException e)
@@ -1300,6 +1312,15 @@ private void inspectAndSummarizeVcf(JobContext ctx, File vcfInput, GeneToNameTra
13001312

13011313
generateSummaries(ctx, vcfInput, genome, totalVariants, totalPrivateVariants, totalSubjects, typeCounts);
13021314
}
1315+
1316+
try
1317+
{
1318+
Files.touch(doneFile);
1319+
}
1320+
catch (IOException e)
1321+
{
1322+
throw new PipelineJobException(e);
1323+
}
13031324
}
13041325

13051326
public Collection<String> parseRawOmimPheno(VariantContext vc, Logger log)

0 commit comments

Comments
 (0)