Skip to content

Commit 8232f00

Browse files
[3.12] gh-150599: Prevent bz2 decompressor reuse after errors (#150600) (#151054)
(cherry picked from commit 5755d0f)
1 parent a7370a9 commit 8232f00

3 files changed

Lines changed: 33 additions & 3 deletions

File tree

Lib/test/test_bz2.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -959,6 +959,21 @@ def test_failure(self):
959959
# Previously, a second call could crash due to internal inconsistency
960960
self.assertRaises(Exception, bzd.decompress, self.BAD_DATA * 30)
961961

962+
def test_decompress_after_data_error(self):
963+
data = bytes.fromhex(
964+
"425a6839314159265359000000000000007fffff000000000000000000000000"
965+
"00000000000000000000000000000000000000e0370000000000000000000000"
966+
"000000000000000000000000000000000000000000000000000083f3"
967+
)
968+
bzd = BZ2Decompressor()
969+
with self.assertRaisesRegex(OSError, "Invalid data stream"):
970+
bzd.decompress(data)
971+
# Previously, a second call could crash due to internal inconsistency
972+
self.assertFalse(bzd.needs_input)
973+
self.assertFalse(bzd.eof)
974+
with self.assertRaisesRegex(ValueError, "previous error"):
975+
bzd.decompress(b'\x00' * 18)
976+
962977
@support.refcount_test
963978
def test_refleaks_in___init__(self):
964979
gettotalrefcount = support.get_attribute(sys, 'gettotalrefcount')
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix a possible stack buffer overflow in :mod:`bz2` when a
2+
:class:`bz2.BZ2Decompressor` is reused after a decompression error.
3+
The decompressor now becomes unusable after libbz2 reports an error.

Modules/_bz2module.c

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ typedef struct {
104104
typedef struct {
105105
PyObject_HEAD
106106
bz_stream bzs;
107+
int bzerror;
107108
char eof; /* T_BOOL expects a char */
108109
PyObject *unused_data;
109110
char needs_input;
@@ -461,8 +462,11 @@ decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
461462

462463
d->bzs_avail_in_real += bzs->avail_in;
463464

464-
if (catch_bz2_error(bzret))
465+
if (catch_bz2_error(bzret)) {
466+
d->bzerror = bzret;
467+
_Py_atomic_store_char_relaxed(&d->needs_input, 0);
465468
goto error;
469+
}
466470
if (bzret == BZ_STREAM_END) {
467471
d->eof = 1;
468472
break;
@@ -630,10 +634,17 @@ _bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data,
630634
PyObject *result = NULL;
631635

632636
ACQUIRE_LOCK(self);
633-
if (self->eof)
637+
if (self->eof) {
634638
PyErr_SetString(PyExc_EOFError, "End of stream already reached");
635-
else
639+
}
640+
else if (self->bzerror) {
641+
// Re-entering BZ2_bzDecompress() after an error can write out of bounds.
642+
PyErr_SetString(PyExc_ValueError,
643+
"Decompressor is unusable after a previous error");
644+
}
645+
else {
636646
result = decompress(self, data->buf, data->len, max_length);
647+
}
637648
RELEASE_LOCK(self);
638649
return result;
639650
}
@@ -655,6 +666,7 @@ _bz2_BZ2Decompressor___init___impl(BZ2Decompressor *self)
655666
}
656667
self->lock = lock;
657668

669+
self->bzerror = 0;
658670
self->needs_input = 1;
659671
self->bzs_avail_in_real = 0;
660672
self->input_buffer = NULL;

0 commit comments

Comments
 (0)