Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified launchable/jar/exe_deploy.jar
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.http.impl.client.HttpClientBuilder;
import org.eclipse.jgit.diff.DiffAlgorithm.SupportedAlgorithm;
import org.eclipse.jgit.diff.DiffEntry;
import org.eclipse.jgit.errors.ConfigInvalidException;
import org.eclipse.jgit.errors.InvalidObjectIdException;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.lib.ConfigConstants;
Expand All @@ -46,15 +47,16 @@
import java.io.UncheckedIOException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import static com.google.common.collect.ImmutableList.*;
Expand All @@ -68,6 +70,8 @@ public class CommitGraphCollector {
static final ObjectMapper objectMapper = new ObjectMapper();
private static final int HTTP_TIMEOUT_MILLISECONDS = 15_000;

private final String rootName;

/**
* Root repository to start processing.
*
Expand Down Expand Up @@ -98,7 +102,8 @@ private boolean outputAuditLog() {
return audit || dryRun;
}

public CommitGraphCollector(Repository git) {
public CommitGraphCollector(String name, Repository git) {
this.rootName = name;
this.root = git;
}

Expand Down Expand Up @@ -253,10 +258,11 @@ private ImmutableList<ObjectId> getAdvertisedRefs(HttpResponse response) throws
public void transfer(
Collection<ObjectId> advertised, IOConsumer<ContentProducer> commitSender, IOConsumer<ContentProducer> fileSender, int chunkSize)
throws IOException {
ByRepository r = new ByRepository(root);
ByRepository r = new ByRepository(root, rootName);
try (CommitChunkStreamer cs = new CommitChunkStreamer(commitSender, chunkSize);
FileChunkStreamer fs = new FileChunkStreamer(fileSender, chunkSize)) {
r.transfer(advertised, cs, fs);
FileChunkStreamer fs = new FileChunkStreamer(fileSender, chunkSize);
ProgressReportingConsumer<VirtualFile> fsr = new ProgressReportingConsumer<>(fs, VirtualFile::path, Duration.ofSeconds(3))) {
r.transfer(advertised, cs, fsr);
}
}

Expand Down Expand Up @@ -299,13 +305,14 @@ public void collectFiles(boolean collectFiles) {

/** Process commits per repository. */
final class ByRepository implements AutoCloseable {

private final String name;
private final Repository git;

private final ObjectReader objectReader;
private final Set<ObjectId> shallowCommits;

ByRepository(Repository git) throws IOException {
ByRepository(Repository git, String name) throws IOException {
this.name = name;
this.git = git;
this.objectReader = git.newObjectReader();
this.shallowCommits = objectReader.getShallowCommits();
Expand Down Expand Up @@ -390,8 +397,12 @@ That is, find submodules that are available in the working tree (thus `!isBare()
while (swalk.next()) {
try (Repository subRepo = swalk.getRepository()) {
if (subRepo != null) {
try (ByRepository br = new ByRepository(subRepo)) {
br.transfer(advertised, commitReceiver, fileReceiver);
try {
try (ByRepository br = new ByRepository(subRepo, name + "/" + swalk.getModulesPath())) {
br.transfer(advertised, commitReceiver, fileReceiver);
}
} catch (ConfigInvalidException e) {
throw new IOException("Invalid Git submodule configuration: " + git.getDirectory(), e);
}
}
}
Expand Down Expand Up @@ -421,9 +432,9 @@ private void collectFiles(TreeWalk treeWalk, Consumer<VirtualFile> receiver) thr
treeWalk.enterSubtree();
} else {
if ((treeWalk.getFileMode(0).getBits()&FileMode.TYPE_MASK)==FileMode.TYPE_FILE) {
GitFile f = new GitFile(treeWalk.getPathString(), head, objectReader);
GitFile f = new GitFile(name, treeWalk.getPathString(), head, objectReader);
// to avoid excessive data transfer, skip files that are too big
if (f.size()<1024*1024) {
if (f.size()<1024*1024 && f.isText()) {
receiver.accept(f);
filesSent++;
}
Expand Down
32 changes: 16 additions & 16 deletions src/main/java/com/launchableinc/ingest/commits/CommitIngester.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@

/** Driver for {@link CommitGraphCollector}. */
public class CommitIngester {
@Deprecated
@Argument(required = true, metaVar = "COMMAND", index = 0)
public String dummyCommandForBackwardCompatibility;
@Argument(required = true, metaVar = "NAME", usage = "Uniquely identifies this repository within the workspace", index = 0)
public String name;

@Argument(required = true, metaVar = "PATH", usage = "Path to Git repository", index = 1)
public File repo;
Expand All @@ -40,13 +39,6 @@ public class CommitIngester {
@Option(name = "-skip-cert-verification", usage = "Bypass SSL certification verification.")
public boolean skipCertVerification;

/**
* @deprecated this is an old option and this is on always.
*/
@Deprecated
@Option(name = "-scrub-pii", usage = "Scrub emails and names", hidden = true)
public boolean scrubPii;

@Option(name = "-commit-message", usage = "Collect commit messages")
public boolean commitMessage;

Expand Down Expand Up @@ -143,19 +135,27 @@ void run() throws CmdLineException, IOException {
try (Repository db =
new RepositoryBuilder().setFS(FS.DETECTED).findGitDir(repo).setMustExist(true).build()) {
Git git = Git.wrap(db);
CommitGraphCollector cgc = new CommitGraphCollector(git.getRepository());
CommitGraphCollector cgc = new CommitGraphCollector(name, git.getRepository());
cgc.setMaxDays(maxDays);
cgc.setAudit(audit);
cgc.setDryRun(dryRun);
cgc.collectCommitMessage(commitMessage);
cgc.collectFiles(collectFiles);
cgc.transfer(endpoint, authenticator, enableTimeout);
int numCommits = cgc.getCommitsSent();
String suffix = "commit";
if (numCommits != 1) {
suffix = "commits";
}
System.out.printf("Launchable transferred %d more %s from repository %s%n", numCommits, suffix, repo);
int numFiles = cgc.getFilesSent();
System.out.printf("Launchable transferred %d more %s and %d more %s from repository %s%n",
numCommits, plural(numCommits, "commit"),
numFiles, plural(numFiles, "file"),
repo);
}
}

private String plural(int count, String noun) {
if (count == 1) {
return noun;
} else {
return noun + "s";
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ protected void writeTo(List<VirtualFile> files, OutputStream os) throws IOExcept
tar.setLongFileMode(LONGFILE_POSIX);

for (VirtualFile f : files) {
TarArchiveEntry e = new TarArchiveEntry(f.path());
TarArchiveEntry e = new TarArchiveEntry("repo:"+f.repo()+"/"+f.path());
e.setSize(f.size());
tar.putArchiveEntry(e);
f.writeTo(tar);
Expand Down
44 changes: 41 additions & 3 deletions src/main/java/com/launchableinc/ingest/commits/GitFile.java
Original file line number Diff line number Diff line change
@@ -1,38 +1,76 @@
package com.launchableinc.ingest.commits;

import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.ObjectReader;

import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.StandardCharsets;

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.eclipse.jgit.lib.Constants.*;

/**
* Represents a file in a Git repository, and encapsulates the read access for convenience.
*/
final class GitFile implements VirtualFile {
final String repo;
final String path;
final ObjectId blob;
private final ObjectReader objectReader;

public GitFile(String path, ObjectId blob, ObjectReader objectReader) {
public GitFile(String repo, String path, ObjectId blob, ObjectReader objectReader) {
this.repo = repo;
this.path = path;
this.blob = blob;
this.objectReader = objectReader;
}

@Override
public String repo() {
return repo;
}

@Override
public String path() {
return path;
}

public long size() throws IOException {
return objectReader.open(blob, OBJ_BLOB).getSize();
return open().getSize();
}

@Override
public void writeTo(OutputStream os) throws IOException {
objectReader.open(blob, OBJ_BLOB).copyTo(os);
open().copyTo(os);
}

private ObjectLoader open() throws IOException {
return objectReader.open(blob, OBJ_BLOB);
}

/**
* Returns true if the file is a text file.
*
* <p>I briefly thought about whether it makes sense to deal with the platform default encoding, then
* decided not. In the unlikely event we decide to deal with this, it'd be best to convert to UTF-8 on the CLI
* side since encoding codec is not portable.
*/
public boolean isText() throws IOException {
try {
char[] c = new char[1024];
try (Reader r = new InputStreamReader(open().openStream(), UTF_8)) {
while (r.read(c)!= -1) {
// Read the file until EOF.
}
}
return true;
} catch (CharacterCodingException e) {
return false;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package com.launchableinc.ingest.commits;

import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Consumer;
import java.util.function.Function;

import static java.time.Instant.now;

/**
* Given a slow {@link Consumer} that goes over a large number of items,
* provide a progress report to show that the work is still in progress.
*/
class ProgressReportingConsumer<T> implements Consumer<T>, AutoCloseable {
private final Consumer<T> base;
private final List<T> pool = new ArrayList<>();
private final Function<T,String> printer;
private final Duration reportInterval;

ProgressReportingConsumer(Consumer<T> base, Function<T,String> printer, Duration reportInterval) {
this.base = base;
this.printer = printer;
this.reportInterval = reportInterval;
}

@Override
public void accept(T t) {
pool.add(t);
}

@Override
public void close() {
Instant nextReportTime = now().plus(reportInterval);
int width = String.valueOf(pool.size()).length();
int i = 0;
for (T x : pool) {
i++;
if (now().isAfter(nextReportTime)) {
System.err.printf("%s/%d: %s%n", pad(i, width), pool.size(), printer.apply(x));
nextReportTime = now().plus(reportInterval);
}
base.accept(x);
}
pool.clear();
}

static String pad(int i, int width) {
String s = String.valueOf(i);
while (s.length() < width) {
s = " " + s;
}
return s;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@
import java.io.OutputStream;

public interface VirtualFile {
/**
* Repository identifier, unique within the workspace.
*/
String repo();

/**
* Path to the file within the repository.
*/
String path();
long size() throws IOException;
void writeTo(OutputStream os) throws IOException;
Expand Down
3 changes: 2 additions & 1 deletion src/test/java/com/launchableinc/ingest/commits/AllTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
CommitGraphCollectorTest.class,
CommitIngesterTest.class,
FileChunkStreamerTest.class,
SSLBypassTest.class
SSLBypassTest.class,
ProgressReportingConsumerTest.class
})
public class AllTests {}
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ public void chunking() throws Exception {
setupRepos();
try (Git mainrepo = Git.open(mainrepoDir)) {
addCommitInSubRepo(mainrepo);
CommitGraphCollector cgc = new CommitGraphCollector(mainrepo.getRepository());
CommitGraphCollector cgc = new CommitGraphCollector("test", mainrepo.getRepository());
cgc.setMaxDays(30);
cgc.collectFiles(true);
cgc.transfer(
Expand Down Expand Up @@ -143,7 +143,7 @@ public void scrubPii() throws Exception {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try (Git mainrepo = Git.open(mainrepoDir)) {
addCommitInSubRepo(mainrepo);
CommitGraphCollector cgc = new CommitGraphCollector(mainrepo.getRepository());
CommitGraphCollector cgc = new CommitGraphCollector("test", mainrepo.getRepository());
cgc.setMaxDays(30);
cgc.transfer(ImmutableList.of(), c -> c.writeTo(baos), f -> {}, Integer.MAX_VALUE);
}
Expand All @@ -154,7 +154,7 @@ public void scrubPii() throws Exception {

private CommitGraphCollector collectCommit(Repository r, List<ObjectId> advertised)
throws IOException {
CommitGraphCollector cgc = new CommitGraphCollector(r);
CommitGraphCollector cgc = new CommitGraphCollector("test", r);
cgc.setMaxDays(30);
cgc.collectFiles(true);
cgc.transfer(advertised, c -> {}, f -> {}, 3);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ public void basics() throws Exception {
try (FileChunkStreamer fs = new FileChunkStreamer(content -> {
switch(count[0]++) {
case 0:
assertThat(readEntries(content)).containsExactly("foo.txt", "bar.txt").inOrder();
assertThat(readEntries(content)).containsExactly("repo:test/foo.txt", "repo:test/bar.txt").inOrder();
break;
case 1:
assertThat(readEntries(content)).containsExactly("zot.txt").inOrder();
assertThat(readEntries(content)).containsExactly("repo:test/zot.txt").inOrder();
break;
default:
fail();
Expand Down Expand Up @@ -68,6 +68,11 @@ private static class VirtualFileImpl implements VirtualFile {
this.path = path;
}

@Override
public String repo() {
return "test";
}

@Override
public String path() {
return path;
Expand Down
Loading
Loading