Skip to content

Commit 0bf4f72

Browse files
author
root
committed
Merge discvr-22.7 to develop
2 parents d4e5c01 + 050b970 commit 0bf4f72

File tree

4 files changed

+21
-62
lines changed

4 files changed

+21
-62
lines changed

mGAP/resources/views/releaseNotes.html

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
<h4>Release 2.3:</h4>
2+
<ul>
3+
<li>This is an additional 560 animals over the prior version.</li>
4+
<li>There are a sizable number of data processing changes, largely adaptations to handle the rapidly growing dataset size:</li>
5+
<ol>
6+
<li>All data used <a href="https://gatk.broadinstitute.org/hc/en-us/articles/4405443600667-ReblockGVCF">GATK Reblocked gVCFs</a> as inputs. This reduces processing, but can reduce sensitivity at homozygous-reference sites (resulting in greater numbers of no-call genotypes at homozygous ref sites)</li>
7+
<li>Also to adapt to larger data size, we changed the structure of data processing. Previously, samples were each aggregated into one GenomicsDB workspace per data type (WGS or WXS). Next, GenotypeGVCFs was run on each workspace, with one job per contig. The resulting VCFs were filtered and merged. In this release, the upfront aggregation step was dropped, and we instead: 1) use reblocked gVCFs as input (entire set of samples), 2) chunk the genome into ~1000 bins with one job/bin, 3) per bin, run GenomicsDbImport to make a transient workspace using the job's intervals +/- 1000bp, 4) run GenotypeGVCFs against that workspace, 5) filter the result, including technology-aware thresholds (i.e. different depth filters for WGS/WXS). This process is both considerably more efficient and has the advantage of joint-genotyping across the entire cohort at once.</li>
8+
</ol>
9+
</ul>
10+
111
<h4>Release 2.2:</h4>
212
<ul>
313
<li>This is an additional 103 animals over the prior version.</li>

mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -306,12 +306,12 @@ else if (so.getCategory().contains("Lifted"))
306306
}
307307
else if (so.getCategory().contains("mGAP Release: Sites Only"))
308308
{
309-
String name = so.getName().replaceAll(": Sites Only", "");
310-
sitesOnlyVcfMap.put(name, so);
309+
sitesOnlyVcfMap.put("mGAP Release: " + releaseVersion, so);
311310
}
312311
else if (so.getCategory().contains("Release Track") && so.getName().contains("Novel Sites"))
313312
{
314313
novelSitesVcfMap.put("mGAP Release: " + releaseVersion, so);
314+
trackVCFMap.put(so.getName(), so);
315315
}
316316
else if (so.getCategory().endsWith("Release"))
317317
{
@@ -853,12 +853,15 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
853853
ctx.getFileManager().removeIntermediateFile(renamedVcfIdx);
854854
ctx.getFileManager().addIntermediateFile(renamedVcfDone);
855855

856-
SequenceOutputFile output = new SequenceOutputFile();
857-
output.setFile(renamedVcf);
858-
output.setName(track.getTrackName());
859-
output.setCategory("Release Track");
860-
output.setLibrary_id(genome.getGenomeId());
861-
ctx.getFileManager().addSequenceOutput(output);
856+
if (!track.isPrimary())
857+
{
858+
SequenceOutputFile output = new SequenceOutputFile();
859+
output.setFile(renamedVcf);
860+
output.setName(track.getTrackName());
861+
output.setCategory("Release Track");
862+
output.setLibrary_id(genome.getGenomeId());
863+
ctx.getFileManager().addSequenceOutput(output);
864+
}
862865
}
863866
}
864867
catch (IOException e)

mcc/resources/queries/mcc/aggregatedDemographics.sql

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -82,45 +82,6 @@ FROM "/data/Colonies/WNPRC/".study.demographics d
8282

8383
UNION ALL
8484

85-
SELECT
86-
d.Id.mccAlias.externalAlias as Id,
87-
d.Id as originalId,
88-
d.date,
89-
d.species,
90-
d.gender,
91-
d.birth,
92-
d.death,
93-
'U NEB' as colony,
94-
d.damMccAlias.externalAlias as dam,
95-
d.sireMccAlias.externalAlias as sire,
96-
d.dam as originalDam,
97-
d.sire as originalSire,
98-
d.Id.mostRecentWeight.mostRecentWeight as mostRecentWeight,
99-
d.objectid,
100-
d.calculated_status,
101-
false as u24_status,
102-
o.availability,
103-
o.current_housing_status,
104-
o.infant_history,
105-
o.fertility_status,
106-
o.medical_history,
107-
o.date_of_observations,
108-
d.container
109-
110-
FROM "/data/Colonies/UNO/".study.demographics d
111-
LEFT JOIN (SELECT
112-
o.Id,
113-
o.date_of_observations,
114-
o."availability::observation" as availability,
115-
o."current_housing_status::observation" as current_housing_status,
116-
o."infant_history::observation" as infant_history,
117-
o."fertility_status::observation" as fertility_status,
118-
o."medical_history::observation" as medical_history,
119-
FROM "/data/Colonies/UNO/".study.mostRecentObservationsPivoted o
120-
) o ON (o.Id = d.Id)
121-
122-
UNION ALL
123-
12485
SELECT
12586
d.Id.mccAlias.externalAlias as Id,
12687
d.Id as originalId,

mcc/resources/queries/mcc/aggregatedKinship.sql

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,21 +28,6 @@ FROM "/data/Colonies/WNPRC/".study.kinship d
2828

2929
UNION ALL
3030

31-
SELECT
32-
d.Id.mccAlias.externalAlias as Id,
33-
d.Id as originalId,
34-
d.date,
35-
d.Id2MccAlias.externalAlias as Id2,
36-
d.Id2 as originalId2,
37-
d.kinship,
38-
d.relationship,
39-
d.objectid,
40-
d.container
41-
42-
FROM "/data/Colonies/UNO/".study.kinship d
43-
44-
UNION ALL
45-
4631
SELECT
4732
d.Id.mccAlias.externalAlias as Id,
4833
d.Id as originalId,

0 commit comments

Comments
 (0)