Skip to content

Commit 203364b

Browse files
authored
Add support for additional HTTP checksum algorithms and user-specified MD5s (aws#10099)
1 parent 786b2ed commit 203364b

7 files changed

Lines changed: 386 additions & 46 deletions

File tree

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"type": "enhancement",
3+
"category": "``checksums``",
4+
"description": "Add support for SHA512, XXHASH64, XXHASH3, and XXHASH128 HTTP checksum algorithms. Also added pass-through support for user-provided MD5 checksum headers (without client-side MD5 calculation or validation)."
5+
}

awscli/botocore/httpchecksum.py

Lines changed: 91 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,19 @@
2323
import io
2424
import logging
2525
from binascii import crc32
26-
from hashlib import sha1, sha256
26+
from hashlib import sha1, sha256, sha512
2727

2828
from awscrt import checksums as crt_checksums
2929
from botocore.compat import urlparse
3030
from botocore.exceptions import AwsChunkedWrapperError, FlexibleChecksumError
3131
from botocore.model import StructureShape
3232
from botocore.response import StreamingBody
3333
from botocore.useragent import register_feature_id
34-
from botocore.utils import determine_content_length, has_checksum_header
34+
from botocore.utils import (
35+
determine_content_length,
36+
get_checksum_algorithm_headers,
37+
has_checksum_header,
38+
)
3539

3640
logger = logging.getLogger(__name__)
3741

@@ -115,6 +119,42 @@ def digest(self):
115119
return self._int_crc64nvme.to_bytes(8, byteorder="big")
116120

117121

122+
class CrtXxhash64Checksum(BaseChecksum):
123+
# Note: This class is only used if the CRT is available
124+
def __init__(self):
125+
self._xxhash = crt_checksums.XXHash.new_xxhash64()
126+
127+
def update(self, chunk):
128+
self._xxhash.update(chunk)
129+
130+
def digest(self):
131+
return self._xxhash.finalize()
132+
133+
134+
class CrtXxhash3Checksum(BaseChecksum):
135+
# Note: This class is only used if the CRT is available
136+
def __init__(self):
137+
self._xxhash = crt_checksums.XXHash.new_xxhash3_64()
138+
139+
def update(self, chunk):
140+
self._xxhash.update(chunk)
141+
142+
def digest(self):
143+
return self._xxhash.finalize()
144+
145+
146+
class CrtXxhash128Checksum(BaseChecksum):
147+
# Note: This class is only used if the CRT is available
148+
def __init__(self):
149+
self._xxhash = crt_checksums.XXHash.new_xxhash3_128()
150+
151+
def update(self, chunk):
152+
self._xxhash.update(chunk)
153+
154+
def digest(self):
155+
return self._xxhash.finalize()
156+
157+
118158
class Sha1Checksum(BaseChecksum):
119159
def __init__(self):
120160
self._checksum = sha1()
@@ -137,6 +177,17 @@ def digest(self):
137177
return self._checksum.digest()
138178

139179

180+
class Sha512Checksum(BaseChecksum):
181+
def __init__(self):
182+
self._checksum = sha512()
183+
184+
def update(self, chunk):
185+
self._checksum.update(chunk)
186+
187+
def digest(self):
188+
return self._checksum.digest()
189+
190+
140191
class AwsChunkedWrapper:
141192
_DEFAULT_CHUNK_SIZE = 1024 * 1024
142193

@@ -241,6 +292,7 @@ def _validate_checksum(self):
241292
def resolve_checksum_context(request, operation_model, params):
242293
resolve_request_checksum_algorithm(request, operation_model, params)
243294
resolve_response_checksum_algorithms(request, operation_model, params)
295+
_register_checksum_feature_ids(request)
244296

245297

246298
def resolve_request_checksum_algorithm(
@@ -361,7 +413,6 @@ def _apply_request_header_checksum(request):
361413
checksum_cls = _CHECKSUM_CLS.get(algorithm["algorithm"])
362414
digest = checksum_cls().handle(request["body"])
363415
request["headers"][location_name] = digest
364-
_register_checksum_algorithm_feature_id(algorithm)
365416

366417

367418
def _apply_request_trailer_checksum(request):
@@ -385,7 +436,6 @@ def _apply_request_trailer_checksum(request):
385436
else:
386437
headers["Content-Encoding"] = "aws-chunked"
387438
headers["X-Amz-Trailer"] = location_name
388-
_register_checksum_algorithm_feature_id(algorithm)
389439

390440
content_length = determine_content_length(body)
391441
if content_length is not None:
@@ -409,8 +459,29 @@ def _apply_request_trailer_checksum(request):
409459
)
410460

411461

462+
def _register_checksum_feature_ids(request):
463+
"""Register feature IDs for checksum algorithms used in the request."""
464+
if algorithm_headers := get_checksum_algorithm_headers(request):
465+
for header in algorithm_headers:
466+
header = header.upper()
467+
if header not in (
468+
"X-AMZ-CHECKSUM-ALGORITHM",
469+
"X-AMZ-CHECKSUM-MODE",
470+
"X-AMZ-CHECKSUM-TYPE",
471+
):
472+
algorithm_name = header.removeprefix("X-AMZ-CHECKSUM-")
473+
_register_checksum_algorithm_feature_id(algorithm_name)
474+
return
475+
# If no checksum header exists yet, check the resolved context for
476+
# an algorithm that will be applied later by apply_request_checksum.
477+
checksum_context = request.get("context", {}).get("checksum", {})
478+
algorithm = checksum_context.get("request_algorithm")
479+
if algorithm and isinstance(algorithm, dict):
480+
_register_checksum_algorithm_feature_id(algorithm["algorithm"])
481+
482+
412483
def _register_checksum_algorithm_feature_id(algorithm):
413-
checksum_algorithm_name = algorithm["algorithm"].upper()
484+
checksum_algorithm_name = algorithm.upper()
414485
if checksum_algorithm_name == "CRC64NVME":
415486
checksum_algorithm_name = "CRC64"
416487
checksum_algorithm_name_feature_id = (
@@ -514,8 +585,22 @@ def _handle_bytes_response(http_response, response, algorithm):
514585
"crc32": CrtCrc32Checksum,
515586
"sha1": Sha1Checksum,
516587
"sha256": Sha256Checksum,
588+
'sha512': Sha512Checksum,
589+
'xxhash64': CrtXxhash64Checksum,
590+
'xxhash3': CrtXxhash3Checksum,
591+
'xxhash128': CrtXxhash128Checksum,
517592
}
518593

519594

520595
_SUPPORTED_CHECKSUM_ALGORITHMS = list(_CHECKSUM_CLS.keys())
521-
_ALGORITHMS_PRIORITY_LIST = ['crc64nvme', 'crc32c', 'crc32', 'sha1', 'sha256']
596+
_ALGORITHMS_PRIORITY_LIST = [
597+
'xxhash128',
598+
'xxhash3',
599+
'crc64nvme',
600+
'xxhash64',
601+
'crc32c',
602+
'crc32',
603+
'sha1',
604+
'sha256',
605+
'sha512',
606+
]

awscli/botocore/useragent.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,11 @@
9999
'LOGIN_CROSS_DEVICE': 'AB',
100100
'CREDENTIALS_PROFILE_LOGIN': 'AC',
101101
'CREDENTIALS_LOGIN': 'AD',
102+
'FLEXIBLE_CHECKSUMS_REQ_MD5': 'AE',
103+
'FLEXIBLE_CHECKSUMS_REQ_SHA512': 'AF',
104+
'FLEXIBLE_CHECKSUMS_REQ_XXHASH3': 'AG',
105+
'FLEXIBLE_CHECKSUMS_REQ_XXHASH64': 'AH',
106+
'FLEXIBLE_CHECKSUMS_REQ_XXHASH128': 'AI',
102107
}
103108

104109

awscli/botocore/utils.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3119,22 +3119,31 @@ def _is_s3express_request(params):
31193119
return endpoint_properties.get('backend') == 'S3Express'
31203120

31213121

3122-
def has_checksum_header(params):
3122+
def get_checksum_algorithm_headers(params):
31233123
"""
3124-
Checks if a header starting with "x-amz-checksum-" is provided in a request.
3125-
3126-
This function is considered private and subject to abrupt breaking changes or
3127-
removal without prior announcement. Please do not use it directly.
3124+
Returns the list of header names from the request which start with
3125+
"x-amz-checksum-", otherwise returns an empty list.
31283126
"""
31293127
headers = params['headers']
3128+
checksum_headers = []
31303129

31313130
# If a header matching the x-amz-checksum-* pattern is present, we
3132-
# assume a checksum has already been provided by the user.
3131+
# extract and return the algorithm name.
31333132
for header in headers:
3134-
if CHECKSUM_HEADER_PATTERN.match(header):
3135-
return True
3133+
match = CHECKSUM_HEADER_PATTERN.match(header)
3134+
if match:
3135+
checksum_headers.append(header)
3136+
return checksum_headers
31363137

3137-
return False
3138+
3139+
def has_checksum_header(params):
3140+
"""
3141+
Checks if a header starting with "x-amz-checksum-" is provided in a request.
3142+
3143+
This function is considered private and subject to abrupt breaking changes or
3144+
removal without prior announcement. Please do not use it directly.
3145+
"""
3146+
return bool(get_checksum_algorithm_headers(params))
31383147

31393148

31403149
def conditionally_calculate_checksum(params, **kwargs):

0 commit comments

Comments
 (0)