Skip to content

Commit debee8a

Browse files
committed
Dont allow collisions with multiple readdata with the same name
1 parent d7181a9 commit debee8a

File tree

1 file changed

+22
-6
lines changed

1 file changed

+22
-6
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/SequenceAlignmentTask.java

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.jetbrains.annotations.Nullable;
2929
import org.junit.Assert;
3030
import org.junit.Test;
31+
import org.labkey.api.collections.CaseInsensitiveHashSet;
3132
import org.labkey.api.data.ConvertHelper;
3233
import org.labkey.api.pipeline.ObjectKeySerialization;
3334
import org.labkey.api.pipeline.PairSerializer;
@@ -294,6 +295,7 @@ private Map<ReadData, Pair<File, File>> performFastqPreprocessing(SequenceReadse
294295

295296
Map<ReadData, Pair<File, File>> toAlign = new LinkedHashMap<>();
296297
int i = 0;
298+
Set<String> basenamesUsed = new CaseInsensitiveHashSet();
297299
for (ReadData d : rs.getReadData())
298300
{
299301
i++;
@@ -334,7 +336,7 @@ private Map<ReadData, Pair<File, File>> performFastqPreprocessing(SequenceReadse
334336
getJob().getLogger().debug("Will not copy inputs to working directory");
335337
}
336338

337-
pair = preprocessFastq(pair.first, pair.second, preprocessingActions, suffix);
339+
pair = preprocessFastq(pair.first, pair.second, preprocessingActions, suffix, basenamesUsed);
338340
toAlign.put(d, pair);
339341
}
340342

@@ -460,7 +462,7 @@ private SequenceTaskHelper getHelper()
460462
/**
461463
* Attempt to normalize FASTQ files and perform preprocessing such as trimming.
462464
*/
463-
public Pair<File, File> preprocessFastq(File inputFile1, @Nullable File inputFile2, List<RecordedAction> actions, String statusSuffix) throws PipelineJobException, IOException
465+
public Pair<File, File> preprocessFastq(File inputFile1, @Nullable File inputFile2, List<RecordedAction> actions, String statusSuffix, Set<String> basenamesUsed) throws PipelineJobException, IOException
464466
{
465467
getJob().setStatus(PipelineJob.TaskStatus.running, PREPROCESS_FASTQ_STATUS);
466468
getJob().getLogger().info("Beginning preprocessing for file: " + inputFile1.getName());
@@ -504,20 +506,34 @@ public Pair<File, File> preprocessFastq(File inputFile1, @Nullable File inputFil
504506
steps = combinedSteps;
505507

506508
String originalbaseName = SequenceTaskHelper.getMinimalBaseName(inputFile1.getName());
509+
int i = 0;
510+
while (basenamesUsed.contains(originalbaseName))
511+
{
512+
i++;
513+
originalbaseName = originalbaseName + "." + i;
514+
}
515+
basenamesUsed.add(originalbaseName);
507516
String originalbaseName2 = null;
508517

509518
//log read count:
510519
Pair<Long, Long> previousCounts = FastqUtils.logSequenceCounts(inputFile1, inputFile2, getJob().getLogger(), null, null);
511520

512521
if (inputFile2 != null)
513522
{
514-
originalbaseName2 = SequenceTaskHelper.getMinimalBaseName(inputFile2.getName());
523+
originalbaseName2 = SequenceTaskHelper.getUnzippedBaseName(inputFile2.getName());
524+
i = 0;
525+
while (basenamesUsed.contains(originalbaseName2))
526+
{
527+
i++;
528+
originalbaseName2 = originalbaseName2 + "." + i;
529+
}
530+
basenamesUsed.add(originalbaseName2);
515531

516532
if (originalbaseName.equalsIgnoreCase(originalbaseName2))
517533
{
518-
getJob().getLogger().debug("Forward and reverse FASTQs have the same basename. Appending .1 and .2 as suffixes.");
519-
originalbaseName = originalbaseName + ".1";
520-
originalbaseName2 = originalbaseName2 + ".2";
534+
getJob().getLogger().debug("Forward and reverse FASTQs have the same basename. Appending .R1 and .R2 as suffixes.");
535+
originalbaseName = originalbaseName + ".R1";
536+
originalbaseName2 = originalbaseName2 + ".R2";
521537
}
522538
}
523539

0 commit comments

Comments
 (0)