Skip to content

Commit ffd2f1d

Browse files
committed
Skip duplicated SRA accessions when re-downloading
1 parent 1280c0a commit ffd2f1d

File tree

1 file changed

+19
-0
lines changed

1 file changed

+19
-0
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1866,6 +1866,7 @@ public void serializeTest() throws Exception
18661866

18671867
private void restoreArchivedReadDataIfNeeded(Readset rs) throws PipelineJobException
18681868
{
1869+
Set<String> sraIDs = new HashSet<>();
18691870
for (ReadData rd : rs.getReadData())
18701871
{
18711872
if (! (rd instanceof ReadDataImpl rdi))
@@ -1886,11 +1887,29 @@ private void restoreArchivedReadDataIfNeeded(Readset rs) throws PipelineJobExcep
18861887
{
18871888
throw new PipelineJobException("Missing SRA accession: " + rd.getRowid());
18881889
}
1890+
else if (sraIDs.contains(rd.getSra_accession()))
1891+
{
1892+
getJob().getLogger().debug("Already encountered accession, skipping: " + rd.getSra_accession());
1893+
if (rs instanceof SequenceReadsetImpl rsi)
1894+
{
1895+
// Remove the duplicate
1896+
List<ReadDataImpl> rdl = new ArrayList<>(rsi.getReadDataImpl());
1897+
rdl.remove(rd);
1898+
rsi.setReadData(rdl);
1899+
}
1900+
else
1901+
{
1902+
throw new PipelineJobException("Expected readset to be SequenceReadsetImpl");
1903+
}
1904+
1905+
continue;
1906+
}
18891907

18901908
File outDir = new File(getHelper().getWorkingDirectory(), "cachedReadData");
18911909
getTaskFileManagerImpl().addDeferredIntermediateFile(outDir);
18921910

18931911
File doneFile = new File(outDir, rd.getSra_accession() + ".done");
1912+
sraIDs.add(rd.getSra_accession());
18941913
RestoreSraDataHandler.FastqDumpWrapper sra = new RestoreSraDataHandler.FastqDumpWrapper(getJob().getLogger());
18951914
if (doneFile.exists())
18961915
{

0 commit comments

Comments
 (0)