Skip to content

Commit bf04a3d

Browse files
jjollyfrenzymadness
authored andcommitted
00427: ZipExtFile tell and seek, CVE-2024-0450
Backport of seek and tell methods for ZipExtFile makes it possible to backport the fix for CVE-2024-0450. Combines: python@066df4f python@66363b9
1 parent 5b48f41 commit bf04a3d

File tree

5 files changed

+196
-3
lines changed

5 files changed

+196
-3
lines changed

Doc/library/zipfile.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -230,9 +230,9 @@ ZipFile Objects
230230
With *mode* ``'r'`` the file-like object
231231
(``ZipExtFile``) is read-only and provides the following methods:
232232
:meth:`~io.BufferedIOBase.read`, :meth:`~io.IOBase.readline`,
233-
:meth:`~io.IOBase.readlines`, :meth:`__iter__`,
234-
:meth:`~iterator.__next__`. These objects can operate independently of
235-
the ZipFile.
233+
:meth:`~io.IOBase.readlines`, :meth:`~io.IOBase.seek`,
234+
:meth:`~io.IOBase.tell`, :meth:`__iter__`, :meth:`~iterator.__next__`.
235+
These objects can operate independently of the ZipFile.
236236

237237
With ``mode='w'``, a writable file handle is returned, which supports the
238238
:meth:`~io.BufferedIOBase.write` method. While a writable file handle is open,

Lib/test/test_zipfile.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1610,6 +1610,100 @@ def test_open_conflicting_handles(self):
16101610
self.assertEqual(zipf.read('baz'), msg3)
16111611
self.assertEqual(zipf.namelist(), ['foo', 'bar', 'baz'])
16121612

1613+
def test_seek_tell(self):
1614+
# Test seek functionality
1615+
txt = b"Where's Bruce?"
1616+
bloc = txt.find(b"Bruce")
1617+
# Check seek on a file
1618+
with zipfile.ZipFile(TESTFN, "w") as zipf:
1619+
zipf.writestr("foo.txt", txt)
1620+
with zipfile.ZipFile(TESTFN, "r") as zipf:
1621+
with zipf.open("foo.txt", "r") as fp:
1622+
fp.seek(bloc, os.SEEK_SET)
1623+
self.assertEqual(fp.tell(), bloc)
1624+
fp.seek(-bloc, os.SEEK_CUR)
1625+
self.assertEqual(fp.tell(), 0)
1626+
fp.seek(bloc, os.SEEK_CUR)
1627+
self.assertEqual(fp.tell(), bloc)
1628+
self.assertEqual(fp.read(5), txt[bloc:bloc+5])
1629+
fp.seek(0, os.SEEK_END)
1630+
self.assertEqual(fp.tell(), len(txt))
1631+
# Check seek on memory file
1632+
data = io.BytesIO()
1633+
with zipfile.ZipFile(data, mode="w") as zipf:
1634+
zipf.writestr("foo.txt", txt)
1635+
with zipfile.ZipFile(data, mode="r") as zipf:
1636+
with zipf.open("foo.txt", "r") as fp:
1637+
fp.seek(bloc, os.SEEK_SET)
1638+
self.assertEqual(fp.tell(), bloc)
1639+
fp.seek(-bloc, os.SEEK_CUR)
1640+
self.assertEqual(fp.tell(), 0)
1641+
fp.seek(bloc, os.SEEK_CUR)
1642+
self.assertEqual(fp.tell(), bloc)
1643+
self.assertEqual(fp.read(5), txt[bloc:bloc+5])
1644+
fp.seek(0, os.SEEK_END)
1645+
self.assertEqual(fp.tell(), len(txt))
1646+
1647+
@requires_zlib
1648+
def test_full_overlap(self):
1649+
data = (
1650+
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
1651+
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed'
1652+
b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P'
1653+
b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2'
1654+
b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00'
1655+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK'
1656+
b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
1657+
b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00'
1658+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00bPK\x05'
1659+
b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00'
1660+
b'\x00\x00\x00'
1661+
)
1662+
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
1663+
self.assertEqual(zipf.namelist(), ['a', 'b'])
1664+
zi = zipf.getinfo('a')
1665+
self.assertEqual(zi.header_offset, 0)
1666+
self.assertEqual(zi.compress_size, 16)
1667+
self.assertEqual(zi.file_size, 1033)
1668+
zi = zipf.getinfo('b')
1669+
self.assertEqual(zi.header_offset, 0)
1670+
self.assertEqual(zi.compress_size, 16)
1671+
self.assertEqual(zi.file_size, 1033)
1672+
self.assertEqual(len(zipf.read('a')), 1033)
1673+
with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'):
1674+
zipf.read('b')
1675+
1676+
@requires_zlib
1677+
def test_quoted_overlap(self):
1678+
data = (
1679+
b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05Y\xfc'
1680+
b'8\x044\x00\x00\x00(\x04\x00\x00\x01\x00\x00\x00a\x00'
1681+
b'\x1f\x00\xe0\xffPK\x03\x04\x14\x00\x00\x00\x08\x00\xa0l'
1682+
b'H\x05\xe2\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00'
1683+
b'\x00\x00b\xed\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\'
1684+
b'd\x0b`PK\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0'
1685+
b'lH\x05Y\xfc8\x044\x00\x00\x00(\x04\x00\x00\x01'
1686+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
1687+
b'\x00aPK\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0l'
1688+
b'H\x05\xe2\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00'
1689+
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00$\x00\x00\x00'
1690+
b'bPK\x05\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00'
1691+
b'\x00S\x00\x00\x00\x00\x00'
1692+
)
1693+
with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf:
1694+
self.assertEqual(zipf.namelist(), ['a', 'b'])
1695+
zi = zipf.getinfo('a')
1696+
self.assertEqual(zi.header_offset, 0)
1697+
self.assertEqual(zi.compress_size, 52)
1698+
self.assertEqual(zi.file_size, 1064)
1699+
zi = zipf.getinfo('b')
1700+
self.assertEqual(zi.header_offset, 36)
1701+
self.assertEqual(zi.compress_size, 16)
1702+
self.assertEqual(zi.file_size, 1033)
1703+
with self.assertRaisesRegex(zipfile.BadZipFile, 'Overlapped entries'):
1704+
zipf.read('a')
1705+
self.assertEqual(len(zipf.read('b')), 1033)
1706+
16131707
def tearDown(self):
16141708
unlink(TESTFN)
16151709
unlink(TESTFN2)

Lib/zipfile.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ class ZipInfo (object):
338338
'compress_size',
339339
'file_size',
340340
'_raw_time',
341+
'_end_offset',
341342
)
342343

343344
def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
@@ -376,6 +377,7 @@ def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
376377
self.volume = 0 # Volume number of file header
377378
self.internal_attr = 0 # Internal attributes
378379
self.external_attr = 0 # External file attributes
380+
self._end_offset = None # Start of the next local header or central directory
379381
# Other attributes are set by class ZipFile:
380382
# header_offset Byte offset to the file header
381383
# CRC CRC-32 of the uncompressed file
@@ -718,6 +720,18 @@ def __init__(self, file, pos, close, lock, writing):
718720
self._close = close
719721
self._lock = lock
720722
self._writing = writing
723+
self.seekable = file.seekable
724+
self.tell = file.tell
725+
726+
def seek(self, offset, whence=0):
727+
with self._lock:
728+
if self.writing():
729+
raise ValueError("Can't reposition in the ZIP file while "
730+
"there is an open writing handle on it. "
731+
"Close the writing handle before trying to read.")
732+
self._file.seek(self._pos)
733+
self._pos = self._file.tell()
734+
return self._pos
721735

722736
def read(self, n=-1):
723737
with self._lock:
@@ -768,6 +782,9 @@ class ZipExtFile(io.BufferedIOBase):
768782
# Read from compressed files in 4k blocks.
769783
MIN_READ_SIZE = 4096
770784

785+
# Chunk size to read during seek
786+
MAX_SEEK_READ = 1 << 24
787+
771788
def __init__(self, fileobj, mode, zipinfo, decrypter=None,
772789
close_fileobj=False):
773790
self._fileobj = fileobj
@@ -800,6 +817,17 @@ def __init__(self, fileobj, mode, zipinfo, decrypter=None,
800817
else:
801818
self._expected_crc = None
802819

820+
self._seekable = False
821+
try:
822+
if fileobj.seekable():
823+
self._orig_compress_start = fileobj.tell()
824+
self._orig_compress_size = zipinfo.compress_size
825+
self._orig_file_size = zipinfo.file_size
826+
self._orig_start_crc = self._running_crc
827+
self._seekable = True
828+
except AttributeError:
829+
pass
830+
803831
def __repr__(self):
804832
result = ['<%s.%s' % (self.__class__.__module__,
805833
self.__class__.__qualname__)]
@@ -985,6 +1013,62 @@ def close(self):
9851013
finally:
9861014
super().close()
9871015

1016+
def seekable(self):
1017+
return self._seekable
1018+
1019+
def seek(self, offset, whence=0):
1020+
if not self._seekable:
1021+
raise io.UnsupportedOperation("underlying stream is not seekable")
1022+
curr_pos = self.tell()
1023+
if whence == 0: # Seek from start of file
1024+
new_pos = offset
1025+
elif whence == 1: # Seek from current position
1026+
new_pos = curr_pos + offset
1027+
elif whence == 2: # Seek from EOF
1028+
new_pos = self._orig_file_size + offset
1029+
else:
1030+
raise ValueError("whence must be os.SEEK_SET (0), "
1031+
"os.SEEK_CUR (1), or os.SEEK_END (2)")
1032+
1033+
if new_pos > self._orig_file_size:
1034+
new_pos = self._orig_file_size
1035+
1036+
if new_pos < 0:
1037+
new_pos = 0
1038+
1039+
read_offset = new_pos - curr_pos
1040+
buff_offset = read_offset + self._offset
1041+
1042+
if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1043+
# Just move the _offset index if the new position is in the _readbuffer
1044+
self._offset = buff_offset
1045+
read_offset = 0
1046+
elif read_offset < 0:
1047+
# Position is before the current position. Reset the ZipExtFile
1048+
1049+
self._fileobj.seek(self._orig_compress_start)
1050+
self._running_crc = self._orig_start_crc
1051+
self._compress_left = self._orig_compress_size
1052+
self._left = self._orig_file_size
1053+
self._readbuffer = b''
1054+
self._offset = 0
1055+
self._decompressor = zipfile._get_decompressor(self._compress_type)
1056+
self._eof = False
1057+
read_offset = new_pos
1058+
1059+
while read_offset > 0:
1060+
read_len = min(self.MAX_SEEK_READ, read_offset)
1061+
self.read(read_len)
1062+
read_offset -= read_len
1063+
1064+
return self.tell()
1065+
1066+
def tell(self):
1067+
if not self._seekable:
1068+
raise io.UnsupportedOperation("underlying stream is not seekable")
1069+
filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1070+
return filepos
1071+
9881072

9891073
class _ZipWriteFile(io.BufferedIOBase):
9901074
def __init__(self, zf, zinfo, zip64):
@@ -1264,6 +1348,12 @@ def _RealGetContents(self):
12641348
if self.debug > 2:
12651349
print("total", total)
12661350

1351+
end_offset = self.start_dir
1352+
for zinfo in sorted(self.filelist,
1353+
key=lambda zinfo: zinfo.header_offset,
1354+
reverse=True):
1355+
zinfo._end_offset = end_offset
1356+
end_offset = zinfo.header_offset
12671357

12681358
def namelist(self):
12691359
"""Return a list of file names in the archive."""
@@ -1418,6 +1508,10 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False):
14181508
'File name in directory %r and header %r differ.'
14191509
% (zinfo.orig_filename, fname))
14201510

1511+
if (zinfo._end_offset is not None and
1512+
zef_file.tell() + zinfo.compress_size > zinfo._end_offset):
1513+
raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r} (possible zip bomb)")
1514+
14211515
# check for encrypted flag & handle password
14221516
is_encrypted = zinfo.flag_bits & 0x1
14231517
zd = None
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Added seek and tell to the ZipExtFile class. This only works if the file
2+
object used to open the zipfile is seekable.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Protect :mod:`zipfile` from "quoted-overlap" zipbomb. It now raises
2+
BadZipFile when try to read an entry that overlaps with other entry or
3+
central directory.

0 commit comments

Comments
 (0)