Skip to content

Commit edd587a

Browse files
committed
Signficiant refactor of docker in pipeline jobs to migrate everything to DockerWrapper and improve handling of input file locations
1 parent 753356c commit edd587a

File tree

25 files changed

+281
-542
lines changed

25 files changed

+281
-542
lines changed

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/JobResourceSettings.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
package org.labkey.api.sequenceanalysis.pipeline;
22

3+
import org.jetbrains.annotations.Nullable;
34
import org.labkey.api.data.Container;
45

6+
import java.io.File;
57
import java.util.Collection;
68
import java.util.List;
79

@@ -15,4 +17,9 @@ public interface JobResourceSettings
1517
List<ToolParameterDescriptor> getParams();
1618

1719
Collection<String> getDockerVolumes(Container c);
20+
21+
default @Nullable File inferDockerVolume(File input)
22+
{
23+
return null;
24+
}
1825
}

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,12 @@ static public void setInstance(SequencePipelineService instance)
102102

103103
abstract public Collection<String> getDockerVolumes(Container c);
104104

105+
/**
106+
* The purpose of this method is to assist with translating from raw filepath to the desired volume to mount in a docker container.
107+
* This is mostly relevant for situations where the NFS root should be mounted, rather than a child folder.
108+
*/
109+
abstract public @Nullable File inferDockerVolume(File input);
110+
105111
abstract public List<File> getSequenceJobInputFiles(PipelineJob job);
106112

107113
/**

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/run/DockerWrapper.java

Lines changed: 56 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
package org.labkey.api.sequenceanalysis.run;
22

3-
import org.apache.commons.io.FileUtils;
43
import org.apache.commons.lang3.StringUtils;
54
import org.apache.logging.log4j.Logger;
5+
import org.jetbrains.annotations.Nullable;
66
import org.labkey.api.pipeline.PipelineJobException;
77
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
88
import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker;
@@ -13,7 +13,12 @@
1313
import java.io.IOException;
1414
import java.io.PrintWriter;
1515
import java.util.Arrays;
16+
import java.util.Collection;
17+
import java.util.Collections;
18+
import java.util.HashSet;
1619
import java.util.List;
20+
import java.util.Set;
21+
import java.util.stream.Collectors;
1722

1823
public class DockerWrapper extends AbstractCommandWrapper
1924
{
@@ -22,6 +27,7 @@ public class DockerWrapper extends AbstractCommandWrapper
2227
private File _tmpDir = null;
2328
private String _entryPoint = null;
2429
private boolean _runPrune = true;
30+
private String _alternateUserHome = null;
2531

2632
public DockerWrapper(String containerName, Logger log, PipelineContext ctx)
2733
{
@@ -32,6 +38,11 @@ public DockerWrapper(String containerName, Logger log, PipelineContext ctx)
3238
_environment.clear();
3339
}
3440

41+
public void setAlternateUserHome(String alternateUserHome)
42+
{
43+
_alternateUserHome = alternateUserHome;
44+
}
45+
3546
public void setTmpDir(File tmpDir)
3647
{
3748
_tmpDir = tmpDir;
@@ -48,6 +59,11 @@ public void setRunPrune(boolean runPrune)
4859
}
4960

5061
public void executeWithDocker(List<String> containerArgs, File workDir, PipelineOutputTracker tracker) throws PipelineJobException
62+
{
63+
executeWithDocker(containerArgs, workDir, tracker, null);
64+
}
65+
66+
public void executeWithDocker(List<String> containerArgs, File workDir, PipelineOutputTracker tracker, @Nullable Collection<File> inputFiles) throws PipelineJobException
5167
{
5268
File localBashScript = new File(workDir, "docker.sh");
5369
File dockerBashScript = new File(workDir, "dockerRun.sh");
@@ -75,32 +91,42 @@ public void executeWithDocker(List<String> containerArgs, File workDir, Pipeline
7591
File homeDir = new File(System.getProperty("user.home"));
7692
if (homeDir.exists())
7793
{
78-
final String searchString = "-v '" + homeDir.getPath() + "'";
79-
if (_ctx.getDockerVolumes().stream().noneMatch(searchString::startsWith))
94+
if (_ctx.getDockerVolumes().stream().noneMatch(homeDir.getPath()::startsWith))
8095
{
81-
writer.println("\t-v \"" + homeDir.getPath() + ":/homeDir\" \\");
96+
writer.println("\t-v '" + homeDir.getPath() + "':'" + homeDir.getPath() + "' \\");
8297
}
8398
else
8499
{
85-
_ctx.getLogger().debug("homeDir already present in docker volumes, omitting");
100+
_ctx.getLogger().debug("homeDir already present in docker volumes, will not re-add");
86101
}
87102

88103
_environment.put("USER_HOME", homeDir.getPath());
89104
}
90105

91-
_ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\"));
106+
if (_alternateUserHome != null)
107+
{
108+
_environment.put("HOME", _alternateUserHome);
109+
}
110+
111+
_ctx.getDockerVolumes().forEach(v -> writer.println("\t-v '" + v + "':'" + v + "'\\"));
112+
if (inputFiles != null)
113+
{
114+
inspectInputFiles(inputFiles).forEach(v -> writer.println("\t-v '" + v + "':'" + v + "'\\"));
115+
}
116+
92117
if (_tmpDir != null)
93118
{
94119
// NOTE: getDockerVolumes() should be refactored to remove the -v and this logic should be updated accordingly:
95-
final String searchString = "-v '" + _tmpDir.getPath() + "'";
96-
if (_ctx.getDockerVolumes().stream().noneMatch(searchString::startsWith))
120+
if (_ctx.getDockerVolumes().stream().noneMatch(_tmpDir.getPath()::startsWith))
97121
{
98122
writer.println("\t-v \"" + _tmpDir.getPath() + ":/tmp\" \\");
99123
}
100124
else
101125
{
102126
_ctx.getLogger().debug("tmpDir already present in docker volumes, omitting");
103127
}
128+
129+
addToEnvironment("TMPDIR", _tmpDir.getPath());
104130
}
105131

106132
if (_entryPoint != null)
@@ -109,6 +135,8 @@ public void executeWithDocker(List<String> containerArgs, File workDir, Pipeline
109135
}
110136

111137
writer.println("\t-w " + workDir.getPath() + " \\");
138+
addToEnvironment("WORK_DIR", workDir.getPath());
139+
112140
Integer maxRam = SequencePipelineService.get().getMaxRam();
113141
if (maxRam != null)
114142
{
@@ -121,7 +149,7 @@ public void executeWithDocker(List<String> containerArgs, File workDir, Pipeline
121149
writer.println("\t-e " + key + "=" + _environment.get(key) + " \\");
122150
}
123151
writer.println("\t" + _containerName + " \\");
124-
writer.println("\t" + workDir.getPath() + "/" + dockerBashScript.getName());
152+
writer.println("\t/bin/bash " + dockerBashScript.getPath());
125153
writer.println("DOCKER_EXIT_CODE=$?");
126154
writer.println("echo 'Docker run exit code: '$DOCKER_EXIT_CODE");
127155
writer.println("exit $DOCKER_EXIT_CODE");
@@ -141,29 +169,30 @@ public void executeWithDocker(List<String> containerArgs, File workDir, Pipeline
141169
execute(Arrays.asList("/bin/bash", localBashScript.getPath()));
142170
}
143171

144-
public File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTracker output) throws PipelineJobException
172+
private Collection<File> inspectInputFiles(Collection<File> inputFiles)
145173
{
146-
try
174+
Set<File> toAdd = inputFiles.stream().map(f -> f.isDirectory() ? f : f.getParentFile()).filter(x -> _ctx.getDockerVolumes().stream().noneMatch(x.getPath()::startsWith)).collect(Collectors.toSet());
175+
if (!toAdd.isEmpty())
147176
{
148-
if (workingDirectory.equals(input.getParentFile()))
149-
{
150-
return input;
151-
}
177+
Set<File> paths = new HashSet<>();
178+
toAdd.forEach(x -> {
179+
_ctx.getLogger().debug("Adding volume for path: " + x.getPath());
152180

153-
File local = new File(workingDirectory, input.getName());
154-
if (!local.exists())
155-
{
156-
getLogger().debug("Copying file locally: " + input.getPath());
157-
FileUtils.copyFile(input, local);
158-
}
181+
File converted = SequencePipelineService.get().inferDockerVolume(x);
182+
if (!x.equals(converted))
183+
{
184+
_ctx.getLogger().debug("added as: " + converted.getPath());
185+
}
159186

160-
output.addIntermediateFile(local);
187+
if (_ctx.getDockerVolumes().stream().noneMatch(converted.getPath()::startsWith))
188+
{
189+
paths.add(converted);
190+
}
191+
});
161192

162-
return local;
163-
}
164-
catch (IOException e)
165-
{
166-
throw new PipelineJobException(e);
193+
return paths;
167194
}
195+
196+
return Collections.emptySet();
168197
}
169198
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -472,16 +472,28 @@ public Collection<String> getDockerVolumes(Container c)
472472
{
473473
if (settings.isAvailable(c))
474474
{
475-
for (String volume : settings.getDockerVolumes(c))
476-
{
477-
volumeLines.add("-v '" + volume + "':'" + volume + "'");
478-
}
475+
return Collections.unmodifiableCollection(settings.getDockerVolumes(c));
479476
}
480477
}
481478

482479
return volumeLines;
483480
}
484481

482+
@Override
483+
public @Nullable File inferDockerVolume(File input)
484+
{
485+
for (JobResourceSettings settings : SequencePipelineServiceImpl.get().getResourceSettings())
486+
{
487+
File ret = settings.inferDockerVolume(input);
488+
if (ret != null)
489+
{
490+
return ret;
491+
}
492+
}
493+
494+
return null;
495+
}
496+
485497
@Override
486498
public List<File> getSequenceJobInputFiles(PipelineJob job)
487499
{
@@ -570,7 +582,7 @@ public void registerResourceSettings(JobResourceSettings settings)
570582
@Override
571583
public Set<JobResourceSettings> getResourceSettings()
572584
{
573-
return _resourceSettings;
585+
return Collections.unmodifiableSet(_resourceSettings);
574586
}
575587

576588
@Override

0 commit comments

Comments
 (0)