diff --git a/launchable/jar/exe_deploy.jar b/launchable/jar/exe_deploy.jar index 78d3546f3..721d48663 100755 Binary files a/launchable/jar/exe_deploy.jar and b/launchable/jar/exe_deploy.jar differ diff --git a/src/main/java/com/launchableinc/ingest/commits/CommitGraphCollector.java b/src/main/java/com/launchableinc/ingest/commits/CommitGraphCollector.java index 6c0df383f..d02559526 100644 --- a/src/main/java/com/launchableinc/ingest/commits/CommitGraphCollector.java +++ b/src/main/java/com/launchableinc/ingest/commits/CommitGraphCollector.java @@ -21,6 +21,7 @@ import org.apache.http.impl.client.HttpClientBuilder; import org.eclipse.jgit.diff.DiffAlgorithm.SupportedAlgorithm; import org.eclipse.jgit.diff.DiffEntry; +import org.eclipse.jgit.errors.ConfigInvalidException; import org.eclipse.jgit.errors.InvalidObjectIdException; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.lib.ConfigConstants; @@ -46,6 +47,7 @@ import java.io.UncheckedIOException; import java.net.URL; import java.nio.charset.StandardCharsets; +import java.time.Duration; import java.util.ArrayList; import java.util.Collection; import java.util.List; @@ -53,8 +55,8 @@ import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.function.Consumer; +import java.util.function.Function; import java.util.function.Supplier; -import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; import static com.google.common.collect.ImmutableList.*; @@ -68,6 +70,8 @@ public class CommitGraphCollector { static final ObjectMapper objectMapper = new ObjectMapper(); private static final int HTTP_TIMEOUT_MILLISECONDS = 15_000; + private final String rootName; + /** * Root repository to start processing. * @@ -98,7 +102,8 @@ private boolean outputAuditLog() { return audit || dryRun; } - public CommitGraphCollector(Repository git) { + public CommitGraphCollector(String name, Repository git) { + this.rootName = name; this.root = git; } @@ -253,10 +258,11 @@ private ImmutableList getAdvertisedRefs(HttpResponse response) throws public void transfer( Collection advertised, IOConsumer commitSender, IOConsumer fileSender, int chunkSize) throws IOException { - ByRepository r = new ByRepository(root); + ByRepository r = new ByRepository(root, rootName); try (CommitChunkStreamer cs = new CommitChunkStreamer(commitSender, chunkSize); - FileChunkStreamer fs = new FileChunkStreamer(fileSender, chunkSize)) { - r.transfer(advertised, cs, fs); + FileChunkStreamer fs = new FileChunkStreamer(fileSender, chunkSize); + ProgressReportingConsumer fsr = new ProgressReportingConsumer<>(fs, VirtualFile::path, Duration.ofSeconds(3))) { + r.transfer(advertised, cs, fsr); } } @@ -299,13 +305,14 @@ public void collectFiles(boolean collectFiles) { /** Process commits per repository. */ final class ByRepository implements AutoCloseable { - + private final String name; private final Repository git; private final ObjectReader objectReader; private final Set shallowCommits; - ByRepository(Repository git) throws IOException { + ByRepository(Repository git, String name) throws IOException { + this.name = name; this.git = git; this.objectReader = git.newObjectReader(); this.shallowCommits = objectReader.getShallowCommits(); @@ -390,8 +397,12 @@ That is, find submodules that are available in the working tree (thus `!isBare() while (swalk.next()) { try (Repository subRepo = swalk.getRepository()) { if (subRepo != null) { - try (ByRepository br = new ByRepository(subRepo)) { - br.transfer(advertised, commitReceiver, fileReceiver); + try { + try (ByRepository br = new ByRepository(subRepo, name + "/" + swalk.getModulesPath())) { + br.transfer(advertised, commitReceiver, fileReceiver); + } + } catch (ConfigInvalidException e) { + throw new IOException("Invalid Git submodule configuration: " + git.getDirectory(), e); } } } @@ -421,9 +432,9 @@ private void collectFiles(TreeWalk treeWalk, Consumer receiver) thr treeWalk.enterSubtree(); } else { if ((treeWalk.getFileMode(0).getBits()&FileMode.TYPE_MASK)==FileMode.TYPE_FILE) { - GitFile f = new GitFile(treeWalk.getPathString(), head, objectReader); + GitFile f = new GitFile(name, treeWalk.getPathString(), head, objectReader); // to avoid excessive data transfer, skip files that are too big - if (f.size()<1024*1024) { + if (f.size()<1024*1024 && f.isText()) { receiver.accept(f); filesSent++; } diff --git a/src/main/java/com/launchableinc/ingest/commits/CommitIngester.java b/src/main/java/com/launchableinc/ingest/commits/CommitIngester.java index 8321ce5ac..2efff19c0 100644 --- a/src/main/java/com/launchableinc/ingest/commits/CommitIngester.java +++ b/src/main/java/com/launchableinc/ingest/commits/CommitIngester.java @@ -18,9 +18,8 @@ /** Driver for {@link CommitGraphCollector}. */ public class CommitIngester { - @Deprecated - @Argument(required = true, metaVar = "COMMAND", index = 0) - public String dummyCommandForBackwardCompatibility; + @Argument(required = true, metaVar = "NAME", usage = "Uniquely identifies this repository within the workspace", index = 0) + public String name; @Argument(required = true, metaVar = "PATH", usage = "Path to Git repository", index = 1) public File repo; @@ -40,13 +39,6 @@ public class CommitIngester { @Option(name = "-skip-cert-verification", usage = "Bypass SSL certification verification.") public boolean skipCertVerification; - /** - * @deprecated this is an old option and this is on always. - */ - @Deprecated - @Option(name = "-scrub-pii", usage = "Scrub emails and names", hidden = true) - public boolean scrubPii; - @Option(name = "-commit-message", usage = "Collect commit messages") public boolean commitMessage; @@ -143,7 +135,7 @@ void run() throws CmdLineException, IOException { try (Repository db = new RepositoryBuilder().setFS(FS.DETECTED).findGitDir(repo).setMustExist(true).build()) { Git git = Git.wrap(db); - CommitGraphCollector cgc = new CommitGraphCollector(git.getRepository()); + CommitGraphCollector cgc = new CommitGraphCollector(name, git.getRepository()); cgc.setMaxDays(maxDays); cgc.setAudit(audit); cgc.setDryRun(dryRun); @@ -151,11 +143,19 @@ void run() throws CmdLineException, IOException { cgc.collectFiles(collectFiles); cgc.transfer(endpoint, authenticator, enableTimeout); int numCommits = cgc.getCommitsSent(); - String suffix = "commit"; - if (numCommits != 1) { - suffix = "commits"; - } - System.out.printf("Launchable transferred %d more %s from repository %s%n", numCommits, suffix, repo); + int numFiles = cgc.getFilesSent(); + System.out.printf("Launchable transferred %d more %s and %d more %s from repository %s%n", + numCommits, plural(numCommits, "commit"), + numFiles, plural(numFiles, "file"), + repo); + } + } + + private String plural(int count, String noun) { + if (count == 1) { + return noun; + } else { + return noun + "s"; } } diff --git a/src/main/java/com/launchableinc/ingest/commits/FileChunkStreamer.java b/src/main/java/com/launchableinc/ingest/commits/FileChunkStreamer.java index 5483adfeb..bce32a0a9 100644 --- a/src/main/java/com/launchableinc/ingest/commits/FileChunkStreamer.java +++ b/src/main/java/com/launchableinc/ingest/commits/FileChunkStreamer.java @@ -24,7 +24,7 @@ protected void writeTo(List files, OutputStream os) throws IOExcept tar.setLongFileMode(LONGFILE_POSIX); for (VirtualFile f : files) { - TarArchiveEntry e = new TarArchiveEntry(f.path()); + TarArchiveEntry e = new TarArchiveEntry("repo:"+f.repo()+"/"+f.path()); e.setSize(f.size()); tar.putArchiveEntry(e); f.writeTo(tar); diff --git a/src/main/java/com/launchableinc/ingest/commits/GitFile.java b/src/main/java/com/launchableinc/ingest/commits/GitFile.java index be5aeb71f..b3e9cc22e 100644 --- a/src/main/java/com/launchableinc/ingest/commits/GitFile.java +++ b/src/main/java/com/launchableinc/ingest/commits/GitFile.java @@ -1,38 +1,76 @@ package com.launchableinc.ingest.commits; import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.ObjectLoader; import org.eclipse.jgit.lib.ObjectReader; import java.io.IOException; +import java.io.InputStreamReader; import java.io.OutputStream; +import java.io.Reader; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.StandardCharsets; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.eclipse.jgit.lib.Constants.*; /** * Represents a file in a Git repository, and encapsulates the read access for convenience. */ final class GitFile implements VirtualFile { + final String repo; final String path; final ObjectId blob; private final ObjectReader objectReader; - public GitFile(String path, ObjectId blob, ObjectReader objectReader) { + public GitFile(String repo, String path, ObjectId blob, ObjectReader objectReader) { + this.repo = repo; this.path = path; this.blob = blob; this.objectReader = objectReader; } + @Override + public String repo() { + return repo; + } + @Override public String path() { return path; } public long size() throws IOException { - return objectReader.open(blob, OBJ_BLOB).getSize(); + return open().getSize(); } @Override public void writeTo(OutputStream os) throws IOException { - objectReader.open(blob, OBJ_BLOB).copyTo(os); + open().copyTo(os); + } + + private ObjectLoader open() throws IOException { + return objectReader.open(blob, OBJ_BLOB); + } + + /** + * Returns true if the file is a text file. + * + *

I briefly thought about whether it makes sense to deal with the platform default encoding, then + * decided not. In the unlikely event we decide to deal with this, it'd be best to convert to UTF-8 on the CLI + * side since encoding codec is not portable. + */ + public boolean isText() throws IOException { + try { + char[] c = new char[1024]; + try (Reader r = new InputStreamReader(open().openStream(), UTF_8)) { + while (r.read(c)!= -1) { + // Read the file until EOF. + } + } + return true; + } catch (CharacterCodingException e) { + return false; + } } } diff --git a/src/main/java/com/launchableinc/ingest/commits/ProgressReportingConsumer.java b/src/main/java/com/launchableinc/ingest/commits/ProgressReportingConsumer.java new file mode 100644 index 000000000..8f39acc1a --- /dev/null +++ b/src/main/java/com/launchableinc/ingest/commits/ProgressReportingConsumer.java @@ -0,0 +1,56 @@ +package com.launchableinc.ingest.commits; + +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.List; +import java.util.function.Consumer; +import java.util.function.Function; + +import static java.time.Instant.now; + +/** + * Given a slow {@link Consumer} that goes over a large number of items, + * provide a progress report to show that the work is still in progress. + */ +class ProgressReportingConsumer implements Consumer, AutoCloseable { + private final Consumer base; + private final List pool = new ArrayList<>(); + private final Function printer; + private final Duration reportInterval; + + ProgressReportingConsumer(Consumer base, Function printer, Duration reportInterval) { + this.base = base; + this.printer = printer; + this.reportInterval = reportInterval; + } + + @Override + public void accept(T t) { + pool.add(t); + } + + @Override + public void close() { + Instant nextReportTime = now().plus(reportInterval); + int width = String.valueOf(pool.size()).length(); + int i = 0; + for (T x : pool) { + i++; + if (now().isAfter(nextReportTime)) { + System.err.printf("%s/%d: %s%n", pad(i, width), pool.size(), printer.apply(x)); + nextReportTime = now().plus(reportInterval); + } + base.accept(x); + } + pool.clear(); + } + + static String pad(int i, int width) { + String s = String.valueOf(i); + while (s.length() < width) { + s = " " + s; + } + return s; + } +} diff --git a/src/main/java/com/launchableinc/ingest/commits/VirtualFile.java b/src/main/java/com/launchableinc/ingest/commits/VirtualFile.java index bee881cb5..69fe3aeab 100644 --- a/src/main/java/com/launchableinc/ingest/commits/VirtualFile.java +++ b/src/main/java/com/launchableinc/ingest/commits/VirtualFile.java @@ -4,6 +4,14 @@ import java.io.OutputStream; public interface VirtualFile { + /** + * Repository identifier, unique within the workspace. + */ + String repo(); + + /** + * Path to the file within the repository. + */ String path(); long size() throws IOException; void writeTo(OutputStream os) throws IOException; diff --git a/src/test/java/com/launchableinc/ingest/commits/AllTests.java b/src/test/java/com/launchableinc/ingest/commits/AllTests.java index 96a930c8d..ab9b02858 100644 --- a/src/test/java/com/launchableinc/ingest/commits/AllTests.java +++ b/src/test/java/com/launchableinc/ingest/commits/AllTests.java @@ -9,6 +9,7 @@ CommitGraphCollectorTest.class, CommitIngesterTest.class, FileChunkStreamerTest.class, - SSLBypassTest.class + SSLBypassTest.class, + ProgressReportingConsumerTest.class }) public class AllTests {} diff --git a/src/test/java/com/launchableinc/ingest/commits/CommitGraphCollectorTest.java b/src/test/java/com/launchableinc/ingest/commits/CommitGraphCollectorTest.java index ad8b4a435..9f51447aa 100644 --- a/src/test/java/com/launchableinc/ingest/commits/CommitGraphCollectorTest.java +++ b/src/test/java/com/launchableinc/ingest/commits/CommitGraphCollectorTest.java @@ -100,7 +100,7 @@ public void chunking() throws Exception { setupRepos(); try (Git mainrepo = Git.open(mainrepoDir)) { addCommitInSubRepo(mainrepo); - CommitGraphCollector cgc = new CommitGraphCollector(mainrepo.getRepository()); + CommitGraphCollector cgc = new CommitGraphCollector("test", mainrepo.getRepository()); cgc.setMaxDays(30); cgc.collectFiles(true); cgc.transfer( @@ -143,7 +143,7 @@ public void scrubPii() throws Exception { ByteArrayOutputStream baos = new ByteArrayOutputStream(); try (Git mainrepo = Git.open(mainrepoDir)) { addCommitInSubRepo(mainrepo); - CommitGraphCollector cgc = new CommitGraphCollector(mainrepo.getRepository()); + CommitGraphCollector cgc = new CommitGraphCollector("test", mainrepo.getRepository()); cgc.setMaxDays(30); cgc.transfer(ImmutableList.of(), c -> c.writeTo(baos), f -> {}, Integer.MAX_VALUE); } @@ -154,7 +154,7 @@ public void scrubPii() throws Exception { private CommitGraphCollector collectCommit(Repository r, List advertised) throws IOException { - CommitGraphCollector cgc = new CommitGraphCollector(r); + CommitGraphCollector cgc = new CommitGraphCollector("test", r); cgc.setMaxDays(30); cgc.collectFiles(true); cgc.transfer(advertised, c -> {}, f -> {}, 3); diff --git a/src/test/java/com/launchableinc/ingest/commits/FileChunkStreamerTest.java b/src/test/java/com/launchableinc/ingest/commits/FileChunkStreamerTest.java index e64cebbf4..61435b6d4 100644 --- a/src/test/java/com/launchableinc/ingest/commits/FileChunkStreamerTest.java +++ b/src/test/java/com/launchableinc/ingest/commits/FileChunkStreamerTest.java @@ -30,10 +30,10 @@ public void basics() throws Exception { try (FileChunkStreamer fs = new FileChunkStreamer(content -> { switch(count[0]++) { case 0: - assertThat(readEntries(content)).containsExactly("foo.txt", "bar.txt").inOrder(); + assertThat(readEntries(content)).containsExactly("repo:test/foo.txt", "repo:test/bar.txt").inOrder(); break; case 1: - assertThat(readEntries(content)).containsExactly("zot.txt").inOrder(); + assertThat(readEntries(content)).containsExactly("repo:test/zot.txt").inOrder(); break; default: fail(); @@ -68,6 +68,11 @@ private static class VirtualFileImpl implements VirtualFile { this.path = path; } + @Override + public String repo() { + return "test"; + } + @Override public String path() { return path; diff --git a/src/test/java/com/launchableinc/ingest/commits/ProgressReportingConsumerTest.java b/src/test/java/com/launchableinc/ingest/commits/ProgressReportingConsumerTest.java new file mode 100644 index 000000000..eccb34bfa --- /dev/null +++ b/src/test/java/com/launchableinc/ingest/commits/ProgressReportingConsumerTest.java @@ -0,0 +1,29 @@ +package com.launchableinc.ingest.commits; + +import com.google.common.truth.Truth; +import org.junit.Test; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; + +public class ProgressReportingConsumerTest { + @Test + public void basic() { + List done = new ArrayList<>(); + try (ProgressReportingConsumer x = new ProgressReportingConsumer<>(s -> {done.add(s);sleep();}, String::valueOf, Duration.ofMillis(100))) { + for (int i = 0; i < 100; i++) { + x.accept("item " + i); + } + } + Truth.assertThat(done.size()).isEqualTo(100); + } + + private static void sleep() { + try { + Thread.sleep(10); + } catch (InterruptedException e) { + throw new UnsupportedOperationException(); + } + } +}