9696public class mGapReleaseGenerator extends AbstractParameterizedOutputHandler <SequenceOutputHandler .SequenceOutputProcessor >
9797{
9898 private final FileType _vcfType = new FileType (List .of (".vcf" ), ".vcf" , false , FileType .gzSupportLevel .SUPPORT_GZ );
99- public static final String MMUL_GENOME = "mmulGenome " ;
99+ public static final String BASE_GENOME = "baseGenome " ;
100100
101101 public mGapReleaseGenerator ()
102102 {
103103 super (ModuleLoader .getInstance ().getModule (mGAPModule .class ), "Create mGAP Release" , "This will prepare an input VCF for use as an mGAP public release. This will optionally include: removing excess annotations and program records, limiting to SNVs (optional) and removing genotype data (optional). If genotypes are retained, the subject names will be checked for mGAP aliases and replaced as needed." , new LinkedHashSet <>(PageFlowUtil .set ("sequenceanalysis/field/GenomeFileSelectorField.js" )), Arrays .asList (
104+ ToolParameterDescriptor .create ("species" , "Version" , "The species, which is used to filter tracks" , "ldk-simplelabkeycombo" , new JSONObject (){{
105+ put ("allowBlank" , false );
106+ put ("doNotIncludeInTemplates" , true );
107+ put ("width" , 400 );
108+ put ("schemaName" , "laboratory" );
109+ put ("queryName" , "species" );
110+ put ("containerPath" , "js:Laboratory.Utils.getQueryContainerPath()" );
111+ put ("displayField" , "common_name" );
112+ put ("valueField" , "common_name" );
113+ }}, null ),
104114 ToolParameterDescriptor .create ("releaseVersion" , "Version" , "This value will be used as the version when published." , "textfield" , new JSONObject (){{
105115 put ("allowBlank" , false );
106116 put ("doNotIncludeInTemplates" , true );
@@ -182,10 +192,16 @@ public void init(JobContext ctx, List<SequenceOutputFile> inputFiles, List<Recor
182192 ctx .getJob ().getLogger ().info ("writing track/subset data to file" );
183193 Container target = ctx .getJob ().getContainer ().isWorkbook () ? ctx .getJob ().getContainer ().getParent () : ctx .getJob ().getContainer ();
184194 TableInfo releaseTracks = QueryService .get ().getUserSchema (ctx .getJob ().getUser (), target , mGAPSchema .NAME ).getTable (mGAPSchema .TABLE_RELEASE_TRACKS );
195+
196+ final String species = ctx .getParams ().optString ("species" );
197+ if (species == null )
198+ {
199+ throw new PipelineJobException ("Missing value for species" );
200+ }
185201
186202 Set <FieldKey > toSelect = new HashSet <>();
187203 toSelect .add (FieldKey .fromString ("trackName" ));
188- toSelect .add (FieldKey .fromString ("mergepriority " ));
204+ toSelect .add (FieldKey .fromString ("species " ));
189205 toSelect .add (FieldKey .fromString ("skipvalidation" ));
190206 toSelect .add (FieldKey .fromString ("isprimarytrack" ));
191207 toSelect .add (FieldKey .fromString ("vcfId" ));
@@ -197,7 +213,7 @@ public void init(JobContext ctx, List<SequenceOutputFile> inputFiles, List<Recor
197213 File trackFile = getTrackListFile (ctx .getOutputDir ());
198214 try (CSVWriter writer = new CSVWriter (PrintWriters .getPrintWriter (trackFile ), '\t' , CSVWriter .NO_QUOTE_CHARACTER ))
199215 {
200- new TableSelector (releaseTracks , colMap .values (), null , null ).forEachResults (rs -> {
216+ new TableSelector (releaseTracks , colMap .values (), new SimpleFilter ( FieldKey . fromString ( "species" ), species ) , null ).forEachResults (rs -> {
201217 if (rs .getObject (FieldKey .fromString ("vcfId" )) == null )
202218 {
203219 throw new SQLException ("No VCF found for track: " + rs .getObject (FieldKey .fromString ("trackName" )));
@@ -217,7 +233,7 @@ public void init(JobContext ctx, List<SequenceOutputFile> inputFiles, List<Recor
217233 writer .writeNext (new String []{
218234 rs .getString (FieldKey .fromString ("trackName" )),
219235 String .valueOf (rs .getInt (FieldKey .fromString ("vcfId/dataId" ))),
220- String . valueOf ( rs .getObject (FieldKey .fromString ("mergepriority" )) == null ? 999 : rs . getInt ( FieldKey . fromString ( "mergepriority" ) )),
236+ rs .getString (FieldKey .fromString ("species" )),
221237 String .valueOf (rs .getObject (FieldKey .fromString ("skipvalidation" )) != null && rs .getBoolean (FieldKey .fromString ("skipvalidation" ))),
222238 String .valueOf (rs .getObject (FieldKey .fromString ("isprimarytrack" )) != null && rs .getBoolean (FieldKey .fromString ("isprimarytrack" )))
223239 });
@@ -256,7 +272,7 @@ public void init(JobContext ctx, List<SequenceOutputFile> inputFiles, List<Recor
256272 }
257273 int sourceGenome = genomeIds .iterator ().next ();
258274 ctx .getSequenceSupport ().cacheGenome (SequenceAnalysisService .get ().getReferenceGenome (sourceGenome , ctx .getJob ().getUser ()));
259- ctx .getSequenceSupport ().cacheObject (MMUL_GENOME , sourceGenome );
275+ ctx .getSequenceSupport ().cacheObject (BASE_GENOME , sourceGenome );
260276
261277 AnnotationStep .findChainFile (genomeIds .iterator ().next (), ctx .getParams ().getInt (AnnotationStep .GRCH37 ), ctx .getSequenceSupport (), ctx .getJob ());
262278
@@ -286,8 +302,8 @@ public void init(JobContext ctx, List<SequenceOutputFile> inputFiles, List<Recor
286302
287303 private SequenceOutputFile getAndValidateLuceneIndex (PipelineJob job , JSONObject params ) throws PipelineJobException
288304 {
289- Integer luceneIndexId = params .optInt ("luceneIndex" );
290- if (luceneIndexId == null || luceneIndexId == 0 )
305+ int luceneIndexId = params .optInt ("luceneIndex" );
306+ if (luceneIndexId == 0 )
291307 {
292308 throw new PipelineJobException ("Missing luceneIndex ID" );
293309 }
@@ -520,10 +536,13 @@ else if (so.getCategory().endsWith("Release Track"))
520536 throw new PipelineJobException ("Unable to find total variant from stats file!" );
521537 }
522538
539+ final String species = ctx .getParams ().optString ("species" );
540+
523541 //actually create release record
524542 Map <String , Object > row = new CaseInsensitiveHashMap <>();
525543 row .put ("version" , job .getParameters ().get ("releaseVersion" ));
526544 row .put ("releaseDate" , new Date ());
545+ row .put ("species" , species );
527546 row .put ("vcfId" , so .getRowid ());
528547 row .put ("liftedVcfId" , liftedVcf .getRowid ());
529548 row .put ("sitesOnlyVcfId" , sitesOnlyVcf .getRowid ());
@@ -583,7 +602,7 @@ else if (so.getCategory().endsWith("Release Track"))
583602
584603 //also tracks:
585604 UserSchema us = QueryService .get ().getUserSchema (job .getUser (), job .getContainer ().isWorkbook () ? job .getContainer ().getParent () : job .getContainer (), mGAPSchema .NAME );
586- new TableSelector (us .getTable (mGAPSchema .TABLE_RELEASE_TRACKS ), null , null ).forEachResults (rs -> {
605+ new TableSelector (us .getTable (mGAPSchema .TABLE_RELEASE_TRACKS ), new SimpleFilter ( FieldKey . fromString ( "species" ), species ) , null ).forEachResults (rs -> {
587606 SequenceOutputFile so3 = trackVCFMap .get (rs .getString (FieldKey .fromString ("trackName" )));
588607 if (so3 == null && rs .getBoolean (FieldKey .fromString ("isprimarytrack" )))
589608 {
@@ -836,15 +855,15 @@ public static class TrackDescriptor
836855 {
837856 String _trackName ;
838857 Integer _dataId ;
839- Integer _mergePriority ;
858+ String _species ;
840859 boolean _skipValidation ;
841860 boolean _isPrimary ;
842861
843862 public TrackDescriptor (String [] vals )
844863 {
845864 _trackName = vals [0 ];
846865 _dataId = Integer .parseInt (vals [1 ]);
847- _mergePriority = Integer . parseInt ( vals [2 ]) ;
866+ _species = vals [2 ];
848867 _skipValidation = Boolean .parseBoolean (vals [3 ]);
849868 _isPrimary = Boolean .parseBoolean (vals [4 ]);
850869 }
@@ -859,9 +878,9 @@ public Integer getDataId()
859878 return _dataId ;
860879 }
861880
862- public Integer getMergePriority ()
881+ public String getSpecies ()
863882 {
864- return _mergePriority ;
883+ return _species ;
865884 }
866885
867886 public boolean isSkipValidation ()
@@ -886,15 +905,6 @@ private List<TrackDescriptor> getTracks(File webserverDir) throws PipelineJobExc
886905 ret .add (new TrackDescriptor (line ));
887906 }
888907
889- ret .sort (new Comparator <TrackDescriptor >()
890- {
891- @ Override
892- public int compare (TrackDescriptor o1 , TrackDescriptor o2 )
893- {
894- return o1 .getMergePriority ().compareTo (o2 .getMergePriority ());
895- }
896- });
897-
898908 return ret ;
899909 }
900910 catch (IOException e )
@@ -917,12 +927,13 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
917927
918928 GeneToNameTranslator translator = new GeneToNameTranslator (gtf , ctx .getLogger ());
919929 ReferenceGenome grch37Genome = ctx .getSequenceSupport ().getCachedGenome (ctx .getParams ().getInt (AnnotationStep .GRCH37 ));
920- int genomeId = ctx .getSequenceSupport ().getCachedObject (MMUL_GENOME , Integer .class );
930+ int genomeId = ctx .getSequenceSupport ().getCachedObject (BASE_GENOME , Integer .class );
921931 ReferenceGenome genome = ctx .getSequenceSupport ().getCachedGenome (genomeId );
922932 boolean testOnly = ctx .getParams ().optBoolean ("testOnly" , false );
923933
934+ String species = ctx .getParams ().getString ("species" );
924935 String releaseVersion = ctx .getParams ().optString ("releaseVersion" , "0.0" );
925- File primaryTrackVcf = new File (ctx .getOutputDir (), "mGap.v" + FileUtil .makeLegalName (releaseVersion ).replaceAll (" " , "_" ) + ".vcf.gz" );
936+ File primaryTrackVcf = new File (ctx .getOutputDir (), "mGap." + species + ". v" + FileUtil .makeLegalName (releaseVersion ).replaceAll (" " , "_" ) + ".vcf.gz" );
926937
927938 try
928939 {
@@ -994,15 +1005,15 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
9941005
9951006 SequenceOutputFile output = new SequenceOutputFile ();
9961007 output .setFile (primaryTrackVcf );
997- output .setName ("mGAP Release: " + releaseVersion );
1008+ output .setName ("mGAP Release: " + species + " " + releaseVersion );
9981009 output .setCategory ((testOnly ? "Test " : "" ) + "mGAP Release" );
9991010 output .setLibrary_id (genome .getGenomeId ());
10001011 ctx .getFileManager ().addSequenceOutput (output );
10011012
10021013 File interestingVariantTable = getVariantTableName (ctx , primaryTrackVcf );
10031014 SequenceOutputFile output2 = new SequenceOutputFile ();
10041015 output2 .setFile (interestingVariantTable );
1005- output2 .setName ("mGAP Release: " + releaseVersion + " Variant Table" );
1016+ output2 .setName ("mGAP Release: " + species + " " + releaseVersion + " Variant Table" );
10061017 output2 .setCategory ((testOnly ? "Test " : "" ) + "mGAP Release Variant Table" );
10071018 output2 .setLibrary_id (genome .getGenomeId ());
10081019 ctx .getFileManager ().addSequenceOutput (output2 );
@@ -1012,7 +1023,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
10121023 File lifted = liftToHuman (ctx , primaryTrackVcf , sitesOnlyVcf , grch37Genome );
10131024 SequenceOutputFile output3 = new SequenceOutputFile ();
10141025 output3 .setFile (lifted );
1015- output3 .setName ("mGAP Release: " + releaseVersion + " Lifted to Human" );
1026+ output3 .setName ("mGAP Release: " + species + " " + releaseVersion + " Lifted to Human" );
10161027 output3 .setCategory ((testOnly ? "Test " : "" ) + "mGAP Release Lifted to Human" );
10171028 output3 .setLibrary_id (grch37Genome .getGenomeId ());
10181029 ctx .getFileManager ().addSequenceOutput (output3 );
@@ -1111,16 +1122,6 @@ private File getSitesOnlyVcfName(File outDir, File primaryTrackVcf)
11111122 return new File (outDir , SequenceAnalysisService .get ().getUnzippedBaseName (primaryTrackVcf .getName ()) + ".sitesOnly.vcf.gz" );
11121123 }
11131124
1114- private File getDroppedSitesVcfName (File outDir , File primaryTrackVcf )
1115- {
1116- return new File (outDir , SequenceAnalysisService .get ().getUnzippedBaseName (primaryTrackVcf .getName ()) + ".droppedFromPriorRelease.vcf.gz" );
1117- }
1118-
1119- private File getNovelSitesVcfName (File outDir , File primaryTrackVcf )
1120- {
1121- return new File (outDir , SequenceAnalysisService .get ().getUnzippedBaseName (primaryTrackVcf .getName ()) + ".newToRelease.vcf.gz" );
1122- }
1123-
11241125 private File getLiftedVcfName (File outDir , File primaryTrackVcf )
11251126 {
11261127 return new File (outDir , SequenceAnalysisService .get ().getUnzippedBaseName (primaryTrackVcf .getName ()) + ".liftToGRCh37.vcf.gz" );
0 commit comments