Skip to content

Commit 9912271

Browse files
authored
Merge pull request #142 from LabKey/fb_merge_22.7_to_develop
Merge 22.7 to develop
2 parents 616f4b7 + 98a9139 commit 9912271

File tree

29 files changed

+3829
-1177
lines changed

29 files changed

+3829
-1177
lines changed

mGAP/src/org/labkey/mgap/pipeline/RenameSamplesForMgapStep.java

Lines changed: 55 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import au.com.bytecode.opencsv.CSVReader;
44
import au.com.bytecode.opencsv.CSVWriter;
5+
import com.google.common.collect.Lists;
56
import htsjdk.samtools.util.CloseableIterator;
67
import htsjdk.samtools.util.Interval;
78
import htsjdk.variant.utils.SAMSequenceDictionaryExtractor;
@@ -250,46 +251,22 @@ private Map<String, String> getSamplesToAlias(File input) throws PipelineJobExce
250251
return Collections.emptyMap();
251252
}
252253

253-
TableInfo ti = QueryService.get().getUserSchema(getPipelineCtx().getJob().getUser(), (getPipelineCtx().getJob().getContainer().isWorkbook() ? getPipelineCtx().getJob().getContainer().getParent() : getPipelineCtx().getJob().getContainer()), mGAPSchema.NAME).getTable(mGAPSchema.TABLE_ANIMAL_MAPPING);
254-
TableSelector ts = new TableSelector(ti, PageFlowUtil.set("subjectname", "externalAlias", "otherNames"), new SimpleFilter(FieldKey.fromString("subjectname"), subjects, CompareType.IN), null);
255-
ts.forEachResults(new Selector.ForEachBlock<Results>()
256-
{
257-
@Override
258-
public void exec(Results rs) throws SQLException
259-
{
260-
sampleNameMap.put(rs.getString(FieldKey.fromString("subjectname")), rs.getString(FieldKey.fromString("externalAlias")));
261-
262-
if (rs.getObject(FieldKey.fromString("otherNames")) != null)
263-
{
264-
String val = StringUtils.trimToNull(rs.getString(FieldKey.fromString("otherNames")));
265-
if (val != null)
266-
{
267-
String[] tokens = val.split(",");
268-
for (String name : tokens)
269-
{
270-
name = StringUtils.trimToNull(name);
271-
if (name == null)
272-
{
273-
continue;
274-
}
275-
276-
if (sampleNameMap.containsKey(name) && !sampleNameMap.get(name).equals(rs.getString(FieldKey.fromString("externalAlias"))))
277-
{
278-
throw new IllegalStateException("Improper data in mgap.aliases table. Dual/conflicting aliases: " + name + ": " + rs.getString(FieldKey.fromString("externalAlias")) + " / " + sampleNameMap.get(name));
279-
}
280-
281-
sampleNameMap.put(name, rs.getString(FieldKey.fromString("externalAlias")));
282-
}
283-
}
284-
}
285-
}
286-
});
287-
288254
Set<String> sampleNames = new HashSet<>(header.getSampleNamesInOrder());
289255
getPipelineCtx().getLogger().info("total samples in input VCF: " + sampleNames.size());
290256

291-
sampleNames.retainAll(subjects);
292-
getPipelineCtx().getLogger().info("total samples to be written to any track: " + sampleNames.size());
257+
// Pass 1: match on proper ID:
258+
querySampleBatch(sampleNameMap, new SimpleFilter(FieldKey.fromString("subjectname"), subjects, CompareType.IN));
259+
260+
// Pass 2: add others using otherNames:
261+
List<String> missingSamples = new ArrayList<>(sampleNames);
262+
missingSamples.removeAll(sampleNameMap.keySet());
263+
if (!missingSamples.isEmpty())
264+
{
265+
getPipelineCtx().getLogger().debug("Querying " + missingSamples.size() + " samples using otherNames field");
266+
querySampleBatch(sampleNameMap, new SimpleFilter(FieldKey.fromString("otherNames"), missingSamples, CompareType.CONTAINS_ONE_OF));
267+
}
268+
269+
getPipelineCtx().getLogger().info("total sample names to alias: " + sampleNameMap.size());
293270

294271
sampleNames.removeAll(sampleNameMap.keySet());
295272
if (!sampleNames.isEmpty())
@@ -298,17 +275,54 @@ public void exec(Results rs) throws SQLException
298275
}
299276

300277
//Now ensure we dont have duplicate mappings:
301-
List<String> translated = new ArrayList<>(sampleNames.stream().map(sampleNameMap::get).collect(Collectors.toList()));
278+
List<String> translated = new ArrayList<>(header.getSampleNamesInOrder().stream().map(sampleNameMap::get).toList());
302279
Set<String> unique = new HashSet<>();
303-
List<String> duplicates = translated.stream().filter(o -> !unique.add(o)).collect(Collectors.toList());
280+
List<String> duplicates = translated.stream().filter(o -> !unique.add(o)).toList();
304281
if (!duplicates.isEmpty())
305282
{
306283
throw new PipelineJobException("There were duplicate mGAP IDs are translation. They were: " + StringUtils.join(duplicates, ","));
307284
}
308285
}
309286

310-
getPipelineCtx().getLogger().info("total sample names to alias: " + sampleNameMap.size());
311-
312287
return sampleNameMap;
313288
}
289+
290+
private void querySampleBatch(Map<String, String> sampleNameMap, SimpleFilter filter)
291+
{
292+
TableInfo ti = QueryService.get().getUserSchema(getPipelineCtx().getJob().getUser(), (getPipelineCtx().getJob().getContainer().isWorkbook() ? getPipelineCtx().getJob().getContainer().getParent() : getPipelineCtx().getJob().getContainer()), mGAPSchema.NAME).getTable(mGAPSchema.TABLE_ANIMAL_MAPPING);
293+
TableSelector ts = new TableSelector(ti, PageFlowUtil.set("subjectname", "externalAlias", "otherNames"), new SimpleFilter(filter), null);
294+
ts.forEachResults(new Selector.ForEachBlock<Results>()
295+
{
296+
@Override
297+
public void exec(Results rs) throws SQLException
298+
{
299+
sampleNameMap.put(rs.getString(FieldKey.fromString("subjectname")), rs.getString(FieldKey.fromString("externalAlias")));
300+
301+
if (rs.getObject(FieldKey.fromString("otherNames")) != null)
302+
{
303+
String val = StringUtils.trimToNull(rs.getString(FieldKey.fromString("otherNames")));
304+
if (val != null)
305+
{
306+
String[] tokens = val.split(",");
307+
for (String name : tokens)
308+
{
309+
name = StringUtils.trimToNull(name);
310+
if (name == null)
311+
{
312+
continue;
313+
}
314+
315+
if (sampleNameMap.containsKey(name) && !sampleNameMap.get(name).equals(rs.getString(FieldKey.fromString("externalAlias"))))
316+
{
317+
throw new IllegalStateException("Improper data in mgap.aliases table. Dual/conflicting aliases: " + name + ": " + rs.getString(FieldKey.fromString("externalAlias")) + " / " + sampleNameMap.get(name));
318+
}
319+
320+
getPipelineCtx().getLogger().debug("Adding otherName: " + name);
321+
sampleNameMap.put(name, rs.getString(FieldKey.fromString("externalAlias")));
322+
}
323+
}
324+
}
325+
}
326+
});
327+
}
314328
}

mGAP/src/org/labkey/mgap/pipeline/SampleSpecificGenotypeFiltrationStep.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
1212
import org.labkey.api.sequenceanalysis.model.Readset;
1313
import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider;
14+
import org.labkey.api.sequenceanalysis.pipeline.CommandLineParam;
1415
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
1516
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
1617
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
@@ -64,8 +65,11 @@ public Provider()
6465
}}, null),
6566
ToolParameterDescriptor.create("wxsMinQual", "WXS Min Qual", "The min genotype qual for WXS samples.", "ldk-integerfield", new JSONObject(){{
6667
put("minValue", 0);
67-
}}, 30)
68-
), null, "");
68+
}}, 30),
69+
ToolParameterDescriptor.createCommandLineParam(CommandLineParam.createSwitch("--set-filtered-genotype-to-no-call"), "setFilteredGtToNocall", "Set Filtered Genotypes to No-Call", "If selected, any filtered genotypes will be converted to no-call.", "checkbox", new JSONObject(){{
70+
put("checked", true);
71+
}}, true)
72+
), null, "");
6973
}
7074

7175
@Override

mcc/.npmrc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
@labkey:registry=https://labkey.jfrog.io/artifactory/api/npm/libs-client
1+
@labkey:registry=https://labkey.jfrog.io/artifactory/api/npm/libs-client

0 commit comments

Comments
 (0)