Skip to content

Commit 549d68d

Browse files
committed
Hash the uncompressed contents of docker context tar
With an increasing number of docker images, the overhead of hashing the compressed contents of the docker context tars becomes visible when loading the toolchain kind during Firefox builds. But the really interesting part for the hash is the uncompressed contents. Whether they are compressed with gzip at level 9, gzip level 1, bz2 or zstd, it doesn't matter for the resulting docker image. So it makes sense to only hash the uncompressed contents, which is much faster.
1 parent f1f0184 commit 549d68d

File tree

3 files changed

+35
-21
lines changed

3 files changed

+35
-21
lines changed

src/taskgraph/util/archive.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import os
88
import stat
99
import tarfile
10+
from contextlib import contextmanager
1011

1112
# 2016-01-01T00:00:00+0000
1213
DEFAULT_MTIME = 1451606400
@@ -104,14 +105,14 @@ def create_tar_from_files(fp, files):
104105
tf.addfile(ti, f)
105106

106107

107-
def create_tar_gz_from_files(fp, files, filename=None, compresslevel=9):
108-
"""Create a tar.gz file deterministically from files.
108+
@contextmanager
109+
def gzip_compressor(fp, filename=None, compresslevel=9):
110+
"""Create a deterministic GzipFile writer.
109111
110-
This is a glorified wrapper around ``create_tar_from_files`` that
111-
adds gzip compression.
112+
This is a glorified wrapper around ``GzipFile`` that adds some
113+
determinism.
112114
113115
The passed file handle should be opened for writing in binary mode.
114-
When the function returns, all data has been written to the handle.
115116
"""
116117
# Offset 3-7 in the gzip header contains an mtime. Pin it to a known
117118
# value so output is deterministic.
@@ -123,4 +124,17 @@ def create_tar_gz_from_files(fp, files, filename=None, compresslevel=9):
123124
mtime=DEFAULT_MTIME,
124125
)
125126
with gf:
127+
yield gf
128+
129+
130+
def create_tar_gz_from_files(fp, files, filename=None, compresslevel=9):
131+
"""Create a tar.gz file deterministically from files.
132+
133+
This is a glorified wrapper around ``create_tar_from_files`` that
134+
adds gzip compression.
135+
136+
The passed file handle should be opened for writing in binary mode.
137+
When the function returns, all data has been written to the handle.
138+
"""
139+
with gzip_compressor(fp, filename, compresslevel) as gf:
126140
create_tar_from_files(gf, files)

src/taskgraph/util/docker.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import re
1111
from typing import Optional
1212

13-
from taskgraph.util.archive import create_tar_gz_from_files
13+
from taskgraph.util.archive import create_tar_from_files, gzip_compressor
1414

1515
IMAGE_DIR = os.path.join(".", "taskcluster", "docker")
1616

@@ -76,10 +76,15 @@ class HashingWriter:
7676
def __init__(self, writer):
7777
self._hash = hashlib.sha256()
7878
self._writer = writer
79+
self._written = 0
7980

8081
def write(self, buf):
8182
self._hash.update(buf)
8283
self._writer.write(buf)
84+
self._written += len(buf)
85+
86+
def tell(self):
87+
return self._written
8388

8489
def hexdigest(self):
8590
return self._hash.hexdigest()
@@ -108,13 +113,8 @@ def create_context_tar(topsrcdir, context_dir, out_path, args=None):
108113
Returns the SHA-256 hex digest of the created archive.
109114
"""
110115
with open(out_path, "wb") as fh:
111-
return stream_context_tar(
112-
topsrcdir,
113-
context_dir,
114-
fh,
115-
image_name=os.path.basename(out_path),
116-
args=args,
117-
)
116+
with gzip_compressor(fh, filename=os.path.basename(out_path)) as gf:
117+
return stream_context_tar(topsrcdir, context_dir, gf, args=args)
118118

119119

120120
RUN_TASK_ROOT = os.path.join(os.path.dirname(os.path.dirname(__file__)), "run-task")
@@ -135,7 +135,7 @@ def create_context_tar(topsrcdir, context_dir, out_path, args=None):
135135
]
136136

137137

138-
def stream_context_tar(topsrcdir, context_dir, out_file, image_name=None, args=None):
138+
def stream_context_tar(topsrcdir, context_dir, out_file, args=None):
139139
"""Like create_context_tar, but streams the tar file to the `out_file` file
140140
object."""
141141
archive_files = {}
@@ -201,7 +201,7 @@ def stream_context_tar(topsrcdir, context_dir, out_file, image_name=None, args=N
201201
archive_files["Dockerfile"] = io.BytesIO("".join(content).encode("utf-8"))
202202

203203
writer = HashingWriter(out_file)
204-
create_tar_gz_from_files(writer, archive_files, image_name)
204+
create_tar_from_files(writer, archive_files)
205205
return writer.hexdigest()
206206

207207

test/test_util_docker.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def test_generate_context_hash(self):
3939
docker.generate_context_hash(
4040
tmpdir, os.path.join(tmpdir, "docker/my-image"), "my-image"
4141
),
42-
"e1649b3427bd7a0387f4508d25057c2e89228748517aad6c70e3df54f47bd13a",
42+
"ab46d51b191eb6c595cccf1fa02485b4e1decc6ba9737a8b8613038d3661be52",
4343
)
4444
finally:
4545
shutil.rmtree(tmpdir)
@@ -100,7 +100,7 @@ def test_create_context_tar_basic(self):
100100
tp = os.path.join(tmp, "tar")
101101
h = docker.create_context_tar(tmp, d, tp)
102102
self.assertEqual(
103-
h, "6c1cc23357625f64f775a08eace7bbc3877dd08d2f3546e0f2e308bac8491865"
103+
h, "3134fa88c39a604132b260c2c3cf09f6fe4a8234475a4272fd9438aac47caaae"
104104
)
105105

106106
# File prefix should be "my_image"
@@ -133,7 +133,7 @@ def test_create_context_topsrcdir_files(self):
133133
tp = os.path.join(tmp, "tar")
134134
h = docker.create_context_tar(tmp, d, tp)
135135
self.assertEqual(
136-
h, "e7f14044b8ec1ba42e251d4b293af212ad08b30ec8ab6613abbdbe73c3c2b61f"
136+
h, "56657d2f428fe268cc3b0966649a3bf8477dcd2eade7a1d4accc5c312f075a2f"
137137
)
138138

139139
with tarfile.open(tp, "r:gz") as tf:
@@ -217,7 +217,7 @@ def test_create_context_extra_directory(self):
217217
h = docker.create_context_tar(tmp, d, tp)
218218

219219
self.assertEqual(
220-
h, "d2a3363b15d0eb547a6c81a72ddf3980e2f6e6360c29b4fb6818102896f43180"
220+
h, "ba7b62e8f25977e8e6629aee1d121ae92b6007258c90a53fb94c8607e1e96e10"
221221
)
222222

223223
with tarfile.open(tp, "r:gz") as tf:
@@ -261,11 +261,11 @@ def test_stream_context_tar(self):
261261
# file objects are BufferedRandom instances
262262
out_file = BufferedRandom(BytesIO(b""))
263263
h = docker.stream_context_tar(
264-
tmp, d, out_file, "my_image", args={"PYTHON_VERSION": "3.8"}
264+
tmp, d, out_file, args={"PYTHON_VERSION": "3.8"}
265265
)
266266

267267
self.assertEqual(
268-
h, "e015aabf2677d90fee777c8813fd69402309a2d49bcdff2c28428134a53e36be"
268+
h, "ba7b62e8f25977e8e6629aee1d121ae92b6007258c90a53fb94c8607e1e96e10"
269269
)
270270
finally:
271271
shutil.rmtree(tmp)

0 commit comments

Comments
 (0)