Skip to content

Commit f50cd7a

Browse files
committed
gh-114953: Expose the MT stream (de)compressors in lzma module
1 parent 364ae60 commit f50cd7a

File tree

7 files changed

+259
-39
lines changed

7 files changed

+259
-39
lines changed

Doc/library/lzma.rst

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ Reading and writing compressed files
145145
Compressing and decompressing data in memory
146146
--------------------------------------------
147147

148-
.. class:: LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)
148+
.. class:: LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None, *, mt_options=None)
149149

150150
Create a compressor object, which can be used to compress data incrementally.
151151

@@ -196,13 +196,23 @@ Compressing and decompressing data in memory
196196
Higher presets produce smaller output, but make the compression process
197197
slower.
198198

199+
Additionally when *format* is specified as :const:`FORMAT_XZ`, adding the
200+
*mt_options* dictionary argument instructs the module to use the
201+
multithreaded compressor implementation. These options provided in
202+
*mt_options* currently have a meaning, anything else is silently ignored:
203+
204+
* *threads*: the desired number of threads the underlying library should use
205+
206+
* *block_size*: Maximum uncompressed size of a block.
207+
199208
.. note::
200209

201210
In addition to being more CPU-intensive, compression with higher presets
202211
also requires much more memory (and produces output that needs more memory
203212
to decompress). With preset ``9`` for example, the overhead for an
204-
:class:`LZMACompressor` object can be as high as 800 MiB. For this reason,
205-
it is generally best to stick with the default preset.
213+
:class:`LZMACompressor` object can be as high as 800 MiB per worker
214+
thread. For this reason, it is generally best to stick with the default
215+
preset.
206216

207217
The *filters* argument (if provided) should be a filter chain specifier.
208218
See :ref:`filter-chain-specs` for details.
@@ -246,6 +256,19 @@ Compressing and decompressing data in memory
246256
:const:`FORMAT_RAW`, but should not be used for other formats.
247257
See :ref:`filter-chain-specs` for more information about filter chains.
248258

259+
Additionally when *format* is specified as :const:`FORMAT_XZ`, adding the
260+
*mt_options* dictionary argument instructs the module to use the
261+
multithreaded decompressor implementation which decompresses blocks in
262+
parallel. These options provided in *mt_options* currently have a meaning,
263+
anything else is silently ignored:
264+
265+
* *threads*: the desired number of threads the underlying library should use
266+
267+
* *memlimit_threading*: A soft memory limit. Lets the underlying library
268+
scale (down) the actual number of worker threads to stay within the budget.
269+
At least one worker will always be used even if over this limit. Use
270+
*memlimit* argument if there is a hard memory limit to enforce.
271+
249272
.. note::
250273
This class does not transparently handle inputs containing multiple
251274
compressed streams, unlike :func:`decompress` and :class:`LZMAFile`. To
@@ -302,16 +325,16 @@ Compressing and decompressing data in memory
302325

303326
.. versionadded:: 3.5
304327

305-
.. function:: compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None)
328+
.. function:: compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None, *, mt_options=None)
306329

307330
Compress *data* (a :class:`bytes` object), returning the compressed data as a
308331
:class:`bytes` object.
309332

310333
See :class:`LZMACompressor` above for a description of the *format*, *check*,
311-
*preset* and *filters* arguments.
334+
*preset*, *filters* and *mt_options* arguments.
312335

313336

314-
.. function:: decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None)
337+
.. function:: decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None, *, mt_options=None)
315338

316339
Decompress *data* (a :class:`bytes` object), returning the uncompressed data
317340
as a :class:`bytes` object.
@@ -320,7 +343,7 @@ Compressing and decompressing data in memory
320343
decompress all of these streams, and return the concatenation of the results.
321344

322345
See :class:`LZMADecompressor` above for a description of the *format*,
323-
*memlimit* and *filters* arguments.
346+
*preset*, *filters* and *mt_options* arguments.
324347

325348

326349
Miscellaneous

Lib/lzma.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ class LZMAFile(_streams.BaseStream):
4747
"""
4848

4949
def __init__(self, filename=None, mode="r", *,
50-
format=None, check=-1, preset=None, filters=None):
50+
format=None, check=-1, preset=None, filters=None,
51+
mt_options=None):
5152
"""Open an LZMA-compressed file in binary mode.
5253
5354
filename can be either an actual file name (given as a str,
@@ -102,14 +103,18 @@ def __init__(self, filename=None, mode="r", *,
102103
raise ValueError("Cannot specify a preset compression "
103104
"level when opening a file for reading")
104105
if format is None:
105-
format = FORMAT_AUTO
106+
if mt_options is None:
107+
format = FORMAT_AUTO
108+
else:
109+
format = FORMAT_XZ
106110
mode_code = _MODE_READ
107111
elif mode in ("w", "wb", "a", "ab", "x", "xb"):
108112
if format is None:
109113
format = FORMAT_XZ
110114
mode_code = _MODE_WRITE
111115
self._compressor = LZMACompressor(format=format, check=check,
112-
preset=preset, filters=filters)
116+
preset=preset, filters=filters,
117+
mt_options=mt_options)
113118
self._pos = 0
114119
else:
115120
raise ValueError("Invalid mode: {!r}".format(mode))
@@ -324,29 +329,33 @@ def open(filename, mode="rb", *,
324329
return binary_file
325330

326331

327-
def compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None):
332+
def compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None, *,
333+
mt_options=None):
328334
"""Compress a block of data.
329335
330336
Refer to LZMACompressor's docstring for a description of the
331-
optional arguments *format*, *check*, *preset* and *filters*.
337+
optional arguments *format*, *check*, *preset*, *filters* and *mt_options*.
332338
333339
For incremental compression, use an LZMACompressor instead.
334340
"""
335-
comp = LZMACompressor(format, check, preset, filters)
341+
comp = LZMACompressor(format, check, preset, filters,
342+
mt_options=mt_options)
336343
return comp.compress(data) + comp.flush()
337344

338345

339-
def decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None):
346+
def decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None, *,
347+
mt_options=None):
340348
"""Decompress a block of data.
341349
342350
Refer to LZMADecompressor's docstring for a description of the
343-
optional arguments *format*, *check* and *filters*.
351+
optional arguments *format*, *check*, *preset*, *filters* and *mt_options*.
344352
345353
For incremental decompression, use an LZMADecompressor instead.
346354
"""
347355
results = []
348356
while True:
349-
decomp = LZMADecompressor(format, memlimit, filters)
357+
decomp = LZMADecompressor(format, memlimit, filters,
358+
mt_options=mt_options)
350359
try:
351360
res = decomp.decompress(data)
352361
except LZMAError:

Lib/test/test_lzma.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,35 @@ def test_bad_filter_spec(self):
7373
with self.assertRaises(ValueError):
7474
LZMACompressor(filters=[{"id": lzma.FILTER_X86, "foo": 0}])
7575

76+
def test_bad_mt_options(self):
77+
with self.assertRaises(TypeError):
78+
LZMACompressor(format=lzma.FORMAT_XZ, mt_options=3)
79+
with self.assertRaises(TypeError):
80+
LZMACompressor(format=lzma.FORMAT_XZ, mt_options={"threads": 3.45})
81+
with self.assertRaises(TypeError):
82+
LZMACompressor(format=lzma.FORMAT_XZ, mt_options={"flags": "asdf"})
83+
# Can only specify MT encoder with XZ
84+
with self.assertRaises(ValueError):
85+
LZMACompressor(format=lzma.FORMAT_AUTO, mt_options=MT_OPTIONS_1)
86+
with self.assertRaises(ValueError):
87+
LZMACompressor(format=lzma.FORMAT_RAW, mt_options=MT_OPTIONS_1)
88+
with self.assertRaises(ValueError):
89+
LZMACompressor(format=lzma.FORMAT_ALONE, mt_options=MT_OPTIONS_1)
90+
91+
with self.assertRaises(TypeError):
92+
LZMADecompressor(format=lzma.FORMAT_XZ, mt_options=3)
93+
with self.assertRaises(TypeError):
94+
LZMADecompressor(format=lzma.FORMAT_XZ,
95+
mt_options={"threads": 3.45})
96+
with self.assertRaises(TypeError):
97+
LZMADecompressor(format=lzma.FORMAT_XZ,
98+
mt_options={"flags": "asdf"})
99+
# Can only specify MT encoder with XZ
100+
with self.assertRaises(ValueError):
101+
LZMADecompressor(format=lzma.FORMAT_RAW, mt_options=MT_OPTIONS_1)
102+
with self.assertRaises(ValueError):
103+
LZMADecompressor(format=lzma.FORMAT_ALONE, mt_options=MT_OPTIONS_1)
104+
76105
def test_decompressor_after_eof(self):
77106
lzd = LZMADecompressor()
78107
lzd.decompress(COMPRESSED_XZ)
@@ -85,6 +114,10 @@ def test_decompressor_memlimit(self):
85114
lzd = LZMADecompressor(lzma.FORMAT_XZ, memlimit=1024)
86115
self.assertRaises(LZMAError, lzd.decompress, COMPRESSED_XZ)
87116

117+
lzd = LZMADecompressor(lzma.FORMAT_XZ, memlimit=1024,
118+
mt_options=MT_OPTIONS_1)
119+
self.assertRaises(LZMAError, lzd.decompress, COMPRESSED_XZ)
120+
88121
lzd = LZMADecompressor(lzma.FORMAT_ALONE, memlimit=1024)
89122
self.assertRaises(LZMAError, lzd.decompress, COMPRESSED_ALONE)
90123

@@ -109,6 +142,10 @@ def test_decompressor_xz(self):
109142
lzd = LZMADecompressor(lzma.FORMAT_XZ)
110143
self._test_decompressor(lzd, COMPRESSED_XZ, lzma.CHECK_CRC64)
111144

145+
def test_decompressor_xz_mt(self):
146+
lzd = LZMADecompressor(lzma.FORMAT_XZ, mt_options=MT_OPTIONS_1)
147+
self._test_decompressor(lzd, COMPRESSED_XZ, lzma.CHECK_CRC64)
148+
112149
def test_decompressor_alone(self):
113150
lzd = LZMADecompressor(lzma.FORMAT_ALONE)
114151
self._test_decompressor(lzd, COMPRESSED_ALONE, lzma.CHECK_NONE)
@@ -281,6 +318,12 @@ def test_roundtrip_xz(self):
281318
lzd = LZMADecompressor()
282319
self._test_decompressor(lzd, cdata, lzma.CHECK_CRC64)
283320

321+
def test_roundtrip_xz_mt(self):
322+
lzc = LZMACompressor(format=lzma.FORMAT_XZ, mt_options=MT_OPTIONS_1)
323+
cdata = lzc.compress(INPUT) + lzc.flush()
324+
lzd = LZMADecompressor()
325+
self._test_decompressor(lzd, cdata, lzma.CHECK_CRC64)
326+
284327
def test_roundtrip_alone(self):
285328
lzc = LZMACompressor(lzma.FORMAT_ALONE)
286329
cdata = lzc.compress(INPUT) + lzc.flush()
@@ -2092,6 +2135,8 @@ def test_filter_properties_roundtrip(self):
20922135
b'\xeb#\x182\x96I\xf7l\xf3r\x00'
20932136
)
20942137

2138+
MT_OPTIONS_1 = {"threads": 4}
2139+
20952140

20962141
if __name__ == "__main__":
20972142
unittest.main()

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1052,6 +1052,7 @@ Toshio Kuratomi
10521052
Ilia Kurenkov
10531053
Vladimir Kushnir
10541054
Erno Kuusela
1055+
Ondřej Kuzník
10551056
Kabir Kwatra
10561057
Ross Lagerwall
10571058
Cameron Laird
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Support the MT (MultiThreaded) encoder and decoder in :mod:`lzma` module.

0 commit comments

Comments
 (0)