Skip to content

Commit b422600

Browse files
committed
gh-116738: Make lzma module thread-safe
Make the attributes in lzma module thread-safe on the free-threading build. Attributes (check, eof, needs_input, unused_data) are now stored atomically or accessed via mutex-protected getters.
1 parent 14f0b51 commit b422600

File tree

3 files changed

+42
-8
lines changed

3 files changed

+42
-8
lines changed

Lib/test/test_free_threading/test_lzma.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,24 @@ def worker():
4545
data = lzd.decompress(compressed, chunk_size)
4646
self.assertEqual(len(data), chunk_size)
4747
output.append(data)
48+
# Read attributes concurrently with other threads decompressing
49+
self.assertEqual(lzd.check, lzma.CHECK_CRC64)
50+
self.assertIsInstance(lzd.eof, bool)
51+
self.assertIsInstance(lzd.needs_input, bool)
52+
self.assertIsInstance(lzd.unused_data, bytes)
4853

4954
run_concurrently(worker_func=worker, nthreads=NTHREADS)
5055
self.assertEqual(len(output), NTHREADS)
5156
# Verify the expected chunks (order doesn't matter due to append race)
5257
self.assertSetEqual(set(output), set(chunks))
58+
self.assertEqual(lzd.check, lzma.CHECK_CRC64)
59+
self.assertTrue(lzd.eof)
60+
self.assertFalse(lzd.needs_input)
61+
# Each thread added full compressed data to the buffer, but only 1 copy
62+
# is consumed to produce the output. The rest remains as unused_data.
63+
self.assertEqual(
64+
len(lzd.unused_data), len(compressed) * (NTHREADS - 1)
65+
)
5366

5467

5568
if __name__ == "__main__":
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Make the attributes in :mod:`lzma` thread-safe on the :term:`free threaded
2+
<free threading>` build.

Modules/_lzmamodule.c

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "pycore_long.h" // _PyLong_UInt32_Converter()
2121
// Blocks output buffer wrappers
2222
#include "pycore_blocks_output_buffer.h"
23+
#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_*_RELAXED
2324

2425
#if OUTPUT_BUFFER_MAX_BLOCK_SIZE > SIZE_MAX
2526
#error "The maximum block size accepted by liblzma is SIZE_MAX."
@@ -948,10 +949,10 @@ decompress_buf(Decompressor *d, Py_ssize_t max_length)
948949
goto error;
949950
}
950951
if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK) {
951-
d->check = lzma_get_check(&d->lzs);
952+
FT_ATOMIC_STORE_INT_RELAXED(d->check, lzma_get_check(&d->lzs));
952953
}
953954
if (lzret == LZMA_STREAM_END) {
954-
d->eof = 1;
955+
FT_ATOMIC_STORE_CHAR_RELAXED(d->eof, 1);
955956
break;
956957
} else if (lzs->avail_out == 0) {
957958
/* Need to check lzs->avail_out before lzs->avail_in.
@@ -1038,7 +1039,7 @@ decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
10381039
}
10391040

10401041
if (d->eof) {
1041-
d->needs_input = 0;
1042+
FT_ATOMIC_STORE_CHAR_RELAXED(d->needs_input, 0);
10421043
if (lzs->avail_in > 0) {
10431044
Py_XSETREF(d->unused_data,
10441045
PyBytes_FromStringAndSize((char *)lzs->next_in, lzs->avail_in));
@@ -1054,17 +1055,17 @@ decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
10541055
/* (avail_in==0 && avail_out==0)
10551056
Maybe lzs's internal state still have a few bytes can
10561057
be output, try to output them next time. */
1057-
d->needs_input = 0;
1058+
FT_ATOMIC_STORE_CHAR_RELAXED(d->needs_input, 0);
10581059

10591060
/* If max_length < 0, lzs->avail_out always > 0 */
10601061
assert(max_length >= 0);
10611062
} else {
10621063
/* Input buffer exhausted, output buffer has space. */
1063-
d->needs_input = 1;
1064+
FT_ATOMIC_STORE_CHAR_RELAXED(d->needs_input, 1);
10641065
}
10651066
}
10661067
else {
1067-
d->needs_input = 0;
1068+
FT_ATOMIC_STORE_CHAR_RELAXED(d->needs_input, 0);
10681069

10691070
/* If we did not use the input buffer, we now have
10701071
to copy the tail from the caller's buffer into the
@@ -1314,15 +1315,32 @@ PyDoc_STRVAR(Decompressor_needs_input_doc,
13141315
PyDoc_STRVAR(Decompressor_unused_data_doc,
13151316
"Data found after the end of the compressed stream.");
13161317

1318+
static PyObject *
1319+
Decompressor_unused_data_get(PyObject *op, void *Py_UNUSED(ignored))
1320+
{
1321+
Decompressor *self = Decompressor_CAST(op);
1322+
PyMutex_Lock(&self->mutex);
1323+
PyObject *result = Py_XNewRef(self->unused_data);
1324+
PyMutex_Unlock(&self->mutex);
1325+
if (result == NULL) {
1326+
PyErr_SetString(PyExc_AttributeError, "unused_data");
1327+
}
1328+
return result;
1329+
}
1330+
1331+
static PyGetSetDef Decompressor_getset[] = {
1332+
{"unused_data", Decompressor_unused_data_get, NULL,
1333+
Decompressor_unused_data_doc},
1334+
{NULL},
1335+
};
1336+
13171337
static PyMemberDef Decompressor_members[] = {
13181338
{"check", Py_T_INT, offsetof(Decompressor, check), Py_READONLY,
13191339
Decompressor_check_doc},
13201340
{"eof", Py_T_BOOL, offsetof(Decompressor, eof), Py_READONLY,
13211341
Decompressor_eof_doc},
13221342
{"needs_input", Py_T_BOOL, offsetof(Decompressor, needs_input), Py_READONLY,
13231343
Decompressor_needs_input_doc},
1324-
{"unused_data", Py_T_OBJECT_EX, offsetof(Decompressor, unused_data), Py_READONLY,
1325-
Decompressor_unused_data_doc},
13261344
{NULL}
13271345
};
13281346

@@ -1332,6 +1350,7 @@ static PyType_Slot lzma_decompressor_type_slots[] = {
13321350
{Py_tp_new, _lzma_LZMADecompressor},
13331351
{Py_tp_doc, (char *)_lzma_LZMADecompressor__doc__},
13341352
{Py_tp_members, Decompressor_members},
1353+
{Py_tp_getset, Decompressor_getset},
13351354
{0, 0}
13361355
};
13371356

0 commit comments

Comments
 (0)