Skip to content

Commit a47740c

Browse files
authored
Restore BodyPartReader.decode() as sync method, add decode_async() for non-blocking decompression (aio-libs#11940)
1 parent 453c26d commit a47740c

5 files changed

Lines changed: 123 additions & 14 deletions

File tree

CHANGES/11898.bugfix.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Restored :py:meth:`~aiohttp.BodyPartReader.decode` as a synchronous method
2+
for backward compatibility. The method was inadvertently changed to async
3+
in 3.13.3 as part of the decompression bomb security fix. A new
4+
:py:meth:`~aiohttp.BodyPartReader.decode_async` method is now available
5+
for non-blocking decompression of large payloads. Internal aiohttp code
6+
uses the async variant to maintain security protections -- by :user:`bdraco`.

aiohttp/multipart.py

Lines changed: 49 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ async def read(self, *, decode: bool = False) -> bytes:
314314
data.extend(await self.read_chunk(self.chunk_size))
315315
# https://github.com/python/mypy/issues/17537
316316
if decode: # type: ignore[unreachable]
317-
return await self.decode(data)
317+
return await self.decode_async(data)
318318
return data
319319

320320
async def read_chunk(self, size: int = chunk_size) -> bytes:
@@ -492,20 +492,58 @@ def at_eof(self) -> bool:
492492
"""Returns True if the boundary was reached or False otherwise."""
493493
return self._at_eof
494494

495-
async def decode(self, data: bytes) -> bytes:
496-
"""Decodes data.
495+
def _apply_content_transfer_decoding(self, data: bytes) -> bytes:
496+
"""Apply Content-Transfer-Encoding decoding if header is present."""
497+
if CONTENT_TRANSFER_ENCODING in self.headers:
498+
return self._decode_content_transfer(data)
499+
return data
500+
501+
def _needs_content_decoding(self) -> bool:
502+
"""Check if Content-Encoding decoding should be applied."""
503+
# https://datatracker.ietf.org/doc/html/rfc7578#section-4.8
504+
return not self._is_form_data and CONTENT_ENCODING in self.headers
505+
506+
def decode(self, data: bytes) -> bytes:
507+
"""Decodes data synchronously.
497508
498-
Decoding is done according the specified Content-Encoding
509+
Decodes data according the specified Content-Encoding
499510
or Content-Transfer-Encoding headers value.
511+
512+
Note: For large payloads, consider using decode_async() instead
513+
to avoid blocking the event loop during decompression.
500514
"""
501-
if CONTENT_TRANSFER_ENCODING in self.headers:
502-
data = self._decode_content_transfer(data)
503-
# https://datatracker.ietf.org/doc/html/rfc7578#section-4.8
504-
if not self._is_form_data and CONTENT_ENCODING in self.headers:
505-
return await self._decode_content(data)
515+
data = self._apply_content_transfer_decoding(data)
516+
if self._needs_content_decoding():
517+
return self._decode_content(data)
506518
return data
507519

508-
async def _decode_content(self, data: bytes) -> bytes:
520+
async def decode_async(self, data: bytes) -> bytes:
521+
"""Decodes data asynchronously.
522+
523+
Decodes data according the specified Content-Encoding
524+
or Content-Transfer-Encoding headers value.
525+
526+
This method offloads decompression to an executor for large payloads
527+
to avoid blocking the event loop.
528+
"""
529+
data = self._apply_content_transfer_decoding(data)
530+
if self._needs_content_decoding():
531+
return await self._decode_content_async(data)
532+
return data
533+
534+
def _decode_content(self, data: bytes) -> bytes:
535+
encoding = self.headers.get(CONTENT_ENCODING, "").lower()
536+
if encoding == "identity":
537+
return data
538+
if encoding in {"deflate", "gzip"}:
539+
return ZLibDecompressor(
540+
encoding=encoding,
541+
suppress_deflate_header=True,
542+
).decompress_sync(data, max_length=self._max_decompress_size)
543+
544+
raise RuntimeError(f"unknown content encoding: {encoding}")
545+
546+
async def _decode_content_async(self, data: bytes) -> bytes:
509547
encoding = self.headers.get(CONTENT_ENCODING, "").lower()
510548
if encoding == "identity":
511549
return data
@@ -588,7 +626,7 @@ async def write(self, writer: AbstractStreamWriter) -> None:
588626
field = self._value
589627
chunk = await field.read_chunk(size=2**16)
590628
while chunk:
591-
await writer.write(await field.decode(chunk))
629+
await writer.write(await field.decode_async(chunk))
592630
chunk = await field.read_chunk(size=2**16)
593631

594632

aiohttp/web_request.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -716,7 +716,7 @@ async def post(self) -> "MultiDictProxy[str | bytes | FileField]":
716716
)
717717
chunk = await field.read_chunk(size=2**16)
718718
while chunk:
719-
chunk = await field.decode(chunk)
719+
chunk = await field.decode_async(chunk)
720720
await self._loop.run_in_executor(None, tmp.write, chunk)
721721
size += len(chunk)
722722
if 0 < max_size < size:

docs/multipart_reference.rst

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ Multipart reference
102102

103103
.. method:: decode(data)
104104

105-
Decodes data according the specified ``Content-Encoding``
105+
Decodes data synchronously according the specified ``Content-Encoding``
106106
or ``Content-Transfer-Encoding`` headers value.
107107

108108
Supports ``gzip``, ``deflate`` and ``identity`` encodings for
@@ -117,6 +117,34 @@ Multipart reference
117117

118118
:rtype: bytes
119119

120+
.. note::
121+
122+
For large payloads, consider using :meth:`decode_async` instead
123+
to avoid blocking the event loop during decompression.
124+
125+
.. method:: decode_async(data)
126+
:async:
127+
128+
Decodes data asynchronously according the specified ``Content-Encoding``
129+
or ``Content-Transfer-Encoding`` headers value.
130+
131+
This method offloads decompression to an executor for large payloads
132+
to avoid blocking the event loop.
133+
134+
Supports ``gzip``, ``deflate`` and ``identity`` encodings for
135+
``Content-Encoding`` header.
136+
137+
Supports ``base64``, ``quoted-printable``, ``binary`` encodings for
138+
``Content-Transfer-Encoding`` header.
139+
140+
:param bytearray data: Data to decode.
141+
142+
:raises: :exc:`RuntimeError` - if encoding is unknown.
143+
144+
:rtype: bytes
145+
146+
.. versionadded:: 3.13.4
147+
120148
.. method:: get_charset(default=None)
121149

122150
Returns charset parameter from ``Content-Type`` header or default.

tests/test_multipart.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,9 +394,46 @@ async def test_decode_with_content_transfer_encoding_base64(self) -> None:
394394
result = b""
395395
while not obj.at_eof():
396396
chunk = await obj.read_chunk(size=6)
397-
result += await obj.decode(chunk)
397+
result += obj.decode(chunk)
398398
assert b"Time to Relax!" == result
399399

400+
async def test_decode_async_with_content_transfer_encoding_base64(self) -> None:
401+
h = CIMultiDictProxy(CIMultiDict({CONTENT_TRANSFER_ENCODING: "base64"}))
402+
with Stream(b"VG\r\r\nltZSB0byBSZ\r\nWxheCE=\r\n--:--") as stream:
403+
obj = aiohttp.BodyPartReader(BOUNDARY, h, stream)
404+
result = b""
405+
while not obj.at_eof():
406+
chunk = await obj.read_chunk(size=6)
407+
result += await obj.decode_async(chunk)
408+
assert b"Time to Relax!" == result
409+
410+
async def test_decode_with_content_encoding_deflate(self) -> None:
411+
h = CIMultiDictProxy(CIMultiDict({CONTENT_ENCODING: "deflate"}))
412+
data = b"\x0b\xc9\xccMU(\xc9W\x08J\xcdI\xacP\x04\x00"
413+
with Stream(data + b"\r\n--:--") as stream:
414+
obj = aiohttp.BodyPartReader(BOUNDARY, h, stream)
415+
chunk = await obj.read_chunk(size=len(data))
416+
result = obj.decode(chunk)
417+
assert b"Time to Relax!" == result
418+
419+
async def test_decode_with_content_encoding_identity(self) -> None:
420+
h = CIMultiDictProxy(CIMultiDict({CONTENT_ENCODING: "identity"}))
421+
data = b"Time to Relax!"
422+
with Stream(data + b"\r\n--:--") as stream:
423+
obj = aiohttp.BodyPartReader(BOUNDARY, h, stream)
424+
chunk = await obj.read_chunk(size=len(data))
425+
result = obj.decode(chunk)
426+
assert data == result
427+
428+
async def test_decode_with_content_encoding_unknown(self) -> None:
429+
h = CIMultiDictProxy(CIMultiDict({CONTENT_ENCODING: "snappy"}))
430+
data = b"Time to Relax!"
431+
with Stream(data + b"\r\n--:--") as stream:
432+
obj = aiohttp.BodyPartReader(BOUNDARY, h, stream)
433+
chunk = await obj.read_chunk(size=len(data))
434+
with pytest.raises(RuntimeError, match="unknown content encoding"):
435+
obj.decode(chunk)
436+
400437
async def test_read_with_content_transfer_encoding_quoted_printable(self) -> None:
401438
h = CIMultiDictProxy(
402439
CIMultiDict({CONTENT_TRANSFER_ENCODING: "quoted-printable"})

0 commit comments

Comments
 (0)