Skip to content

Commit 4644fed

Browse files
gh-144001: Support ignoring the invalid pad character in Base64 decoding (GH-144306)
1 parent c81e184 commit 4644fed

File tree

5 files changed

+118
-61
lines changed

5 files changed

+118
-61
lines changed

Doc/library/base64.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ POST request.
8787

8888
If *ignorechars* is specified, it should be a :term:`bytes-like object`
8989
containing characters to ignore from the input when *validate* is true.
90+
If *ignorechars* contains the pad character ``'='``, the pad characters
91+
presented before the end of the encoded data and the excess pad characters
92+
will be ignored.
9093
The default value of *validate* is ``True`` if *ignorechars* is specified,
9194
``False`` otherwise.
9295

Doc/library/binascii.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ The :mod:`binascii` module defines the following functions:
5656

5757
If *ignorechars* is specified, it should be a :term:`bytes-like object`
5858
containing characters to ignore from the input when *strict_mode* is true.
59+
If *ignorechars* contains the pad character ``'='``, the pad characters
60+
presented before the end of the encoded data and the excess pad characters
61+
will be ignored.
5962
The default value of *strict_mode* is ``True`` if *ignorechars* is specified,
6063
``False`` otherwise.
6164

Lib/test/test_base64.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -306,14 +306,20 @@ def test_b64decode_invalid_chars(self):
306306
# issue 1466065: Test some invalid characters.
307307
tests = ((b'%3d==', b'\xdd', b'%$'),
308308
(b'$3d==', b'\xdd', b'%$'),
309-
(b'[==', b'', None),
309+
(b'[==', b'', b'[='),
310310
(b'YW]3=', b'am', b']'),
311311
(b'3{d==', b'\xdd', b'{}'),
312312
(b'3d}==', b'\xdd', b'{}'),
313313
(b'@@', b'', b'@!'),
314314
(b'!', b'', b'@!'),
315315
(b"YWJj\n", b"abc", b'\n'),
316316
(b'YWJj\nYWI=', b'abcab', b'\n'),
317+
(b'=YWJj', b'abc', b'='),
318+
(b'Y=WJj', b'abc', b'='),
319+
(b'Y==WJj', b'abc', b'='),
320+
(b'Y===WJj', b'abc', b'='),
321+
(b'YW=Jj', b'abc', b'='),
322+
(b'YWJj=', b'abc', b'='),
317323
(b'YW\nJj', b'abc', b'\n'),
318324
(b'YW\nJj', b'abc', bytearray(b'\n')),
319325
(b'YW\nJj', b'abc', memoryview(b'\n')),
@@ -335,9 +341,8 @@ def test_b64decode_invalid_chars(self):
335341
with self.assertRaises(binascii.Error):
336342
# Even empty ignorechars enables the strict mode.
337343
base64.b64decode(bstr, ignorechars=b'')
338-
if ignorechars is not None:
339-
r = base64.b64decode(bstr, ignorechars=ignorechars)
340-
self.assertEqual(r, res)
344+
r = base64.b64decode(bstr, ignorechars=ignorechars)
345+
self.assertEqual(r, res)
341346

342347
with self.assertRaises(TypeError):
343348
base64.b64decode(b'', ignorechars='')

Lib/test/test_binascii.py

Lines changed: 86 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -118,66 +118,78 @@ def addnoise(line):
118118
# empty strings. TBD: shouldn't it raise an exception instead ?
119119
self.assertEqual(binascii.a2b_base64(self.type2test(fillers)), b'')
120120

121-
def test_base64_strict_mode(self):
122-
# Test base64 with strict mode on
123-
def _assertRegexTemplate(assert_regex: str, data: bytes, non_strict_mode_expected_result: bytes):
121+
def test_base64_bad_padding(self):
122+
# Test malformed padding
123+
def _assertRegexTemplate(assert_regex, data,
124+
non_strict_mode_expected_result):
125+
data = self.type2test(data)
124126
with self.assertRaisesRegex(binascii.Error, assert_regex):
125-
binascii.a2b_base64(self.type2test(data), strict_mode=True)
126-
self.assertEqual(binascii.a2b_base64(self.type2test(data), strict_mode=False),
127+
binascii.a2b_base64(data, strict_mode=True)
128+
self.assertEqual(binascii.a2b_base64(data, strict_mode=False),
127129
non_strict_mode_expected_result)
128-
self.assertEqual(binascii.a2b_base64(self.type2test(data)),
130+
self.assertEqual(binascii.a2b_base64(data, strict_mode=True,
131+
ignorechars=b'='),
132+
non_strict_mode_expected_result)
133+
self.assertEqual(binascii.a2b_base64(data),
129134
non_strict_mode_expected_result)
130135

131-
def assertExcessData(data, non_strict_mode_expected_result: bytes):
132-
_assertRegexTemplate(r'(?i)Excess data', data, non_strict_mode_expected_result)
133-
134-
def assertNonBase64Data(data, non_strict_mode_expected_result: bytes):
135-
_assertRegexTemplate(r'(?i)Only base64 data', data, non_strict_mode_expected_result)
136+
def assertLeadingPadding(*args):
137+
_assertRegexTemplate(r'(?i)Leading padding', *args)
136138

137-
def assertLeadingPadding(data, non_strict_mode_expected_result: bytes):
138-
_assertRegexTemplate(r'(?i)Leading padding', data, non_strict_mode_expected_result)
139+
def assertDiscontinuousPadding(*args):
140+
_assertRegexTemplate(r'(?i)Discontinuous padding', *args)
139141

140-
def assertDiscontinuousPadding(data, non_strict_mode_expected_result: bytes):
141-
_assertRegexTemplate(r'(?i)Discontinuous padding', data, non_strict_mode_expected_result)
142+
def assertExcessPadding(*args):
143+
_assertRegexTemplate(r'(?i)Excess padding', *args)
142144

143-
def assertExcessPadding(data, non_strict_mode_expected_result: bytes):
144-
_assertRegexTemplate(r'(?i)Excess padding', data, non_strict_mode_expected_result)
145+
def assertInvalidLength(*args):
146+
_assertRegexTemplate(r'(?i)Invalid.+number of data characters', *args)
145147

146-
# Test excess data exceptions
147-
assertExcessData(b'ab==a', b'i')
148148
assertExcessPadding(b'ab===', b'i')
149149
assertExcessPadding(b'ab====', b'i')
150-
assertNonBase64Data(b'ab==:', b'i')
151-
assertExcessData(b'abc=a', b'i\xb7')
152-
assertNonBase64Data(b'abc=:', b'i\xb7')
153-
assertNonBase64Data(b'ab==\n', b'i')
154150
assertExcessPadding(b'abc==', b'i\xb7')
155151
assertExcessPadding(b'abc===', b'i\xb7')
156152
assertExcessPadding(b'abc====', b'i\xb7')
157153
assertExcessPadding(b'abc=====', b'i\xb7')
158154

159-
# Test non-base64 data exceptions
160-
assertNonBase64Data(b'\nab==', b'i')
161-
assertNonBase64Data(b'ab:(){:|:&};:==', b'i')
162-
assertNonBase64Data(b'a\nb==', b'i')
163-
assertNonBase64Data(b'a\x00b==', b'i')
164-
165-
# Test malformed padding
166155
assertLeadingPadding(b'=', b'')
167156
assertLeadingPadding(b'==', b'')
168157
assertLeadingPadding(b'===', b'')
169158
assertLeadingPadding(b'====', b'')
170159
assertLeadingPadding(b'=====', b'')
160+
assertLeadingPadding(b'=abcd', b'i\xb7\x1d')
161+
assertLeadingPadding(b'==abcd', b'i\xb7\x1d')
162+
assertLeadingPadding(b'===abcd', b'i\xb7\x1d')
163+
assertLeadingPadding(b'====abcd', b'i\xb7\x1d')
164+
assertLeadingPadding(b'=====abcd', b'i\xb7\x1d')
165+
166+
assertInvalidLength(b'a=b==', b'i')
167+
assertInvalidLength(b'a=bc=', b'i\xb7')
168+
assertInvalidLength(b'a=bc==', b'i\xb7')
169+
assertInvalidLength(b'a=bcd', b'i\xb7\x1d')
170+
assertInvalidLength(b'a=bcd=', b'i\xb7\x1d')
171+
171172
assertDiscontinuousPadding(b'ab=c=', b'i\xb7')
172-
assertDiscontinuousPadding(b'ab=ab==', b'i\xb6\x9b')
173-
assertNonBase64Data(b'ab=:=', b'i')
173+
assertDiscontinuousPadding(b'ab=cd', b'i\xb7\x1d')
174+
assertDiscontinuousPadding(b'ab=cd==', b'i\xb7\x1d')
175+
174176
assertExcessPadding(b'abcd=', b'i\xb7\x1d')
175177
assertExcessPadding(b'abcd==', b'i\xb7\x1d')
176178
assertExcessPadding(b'abcd===', b'i\xb7\x1d')
177179
assertExcessPadding(b'abcd====', b'i\xb7\x1d')
178180
assertExcessPadding(b'abcd=====', b'i\xb7\x1d')
181+
assertExcessPadding(b'abcd==', b'i\xb7\x1d')
182+
assertExcessPadding(b'abcd===', b'i\xb7\x1d')
183+
assertExcessPadding(b'abcd====', b'i\xb7\x1d')
184+
assertExcessPadding(b'abcd=====', b'i\xb7\x1d')
185+
assertExcessPadding(b'abcd=efgh', b'i\xb7\x1dy\xf8!')
186+
assertExcessPadding(b'abcd==efgh', b'i\xb7\x1dy\xf8!')
187+
assertExcessPadding(b'abcd===efgh', b'i\xb7\x1dy\xf8!')
188+
assertExcessPadding(b'abcd====efgh', b'i\xb7\x1dy\xf8!')
189+
assertExcessPadding(b'abcd=====efgh', b'i\xb7\x1dy\xf8!')
179190

180191
def test_base64_invalidchars(self):
192+
# Test non-base64 data exceptions
181193
def assertNonBase64Data(data, expected, ignorechars):
182194
data = self.type2test(data)
183195
assert_regex = r'(?i)Only base64 data'
@@ -195,10 +207,11 @@ def assertNonBase64Data(data, expected, ignorechars):
195207
assertNonBase64Data(b'ab:(){:|:&};:==', b'i', ignorechars=b':;(){}|&')
196208
assertNonBase64Data(b'a\nb==', b'i', ignorechars=b'\n')
197209
assertNonBase64Data(b'a\x00b==', b'i', ignorechars=b'\x00')
210+
assertNonBase64Data(b'ab:==', b'i', ignorechars=b':')
211+
assertNonBase64Data(b'ab=:=', b'i', ignorechars=b':')
198212
assertNonBase64Data(b'ab==:', b'i', ignorechars=b':')
199213
assertNonBase64Data(b'abc=:', b'i\xb7', ignorechars=b':')
200214
assertNonBase64Data(b'ab==\n', b'i', ignorechars=b'\n')
201-
assertNonBase64Data(b'ab=:=', b'i', ignorechars=b':')
202215
assertNonBase64Data(b'a\nb==', b'i', ignorechars=bytearray(b'\n'))
203216
assertNonBase64Data(b'a\nb==', b'i', ignorechars=memoryview(b'\n'))
204217

@@ -221,36 +234,69 @@ def assertNonBase64Data(data, expected, ignorechars):
221234
with self.assertRaises(TypeError):
222235
binascii.a2b_base64(data, ignorechars=None)
223236

237+
def test_base64_excess_data(self):
238+
# Test excess data exceptions
239+
def assertExcessData(data, non_strict_expected,
240+
ignore_padchar_expected=None):
241+
assert_regex = r'(?i)Excess data'
242+
data = self.type2test(data)
243+
with self.assertRaisesRegex(binascii.Error, assert_regex):
244+
binascii.a2b_base64(data, strict_mode=True)
245+
self.assertEqual(binascii.a2b_base64(data, strict_mode=False),
246+
non_strict_expected)
247+
if ignore_padchar_expected is not None:
248+
self.assertEqual(binascii.a2b_base64(data, strict_mode=True,
249+
ignorechars=b'='),
250+
ignore_padchar_expected)
251+
self.assertEqual(binascii.a2b_base64(data), non_strict_expected)
252+
253+
assertExcessData(b'ab==c', b'i')
254+
assertExcessData(b'ab==cd', b'i', b'i\xb7\x1d')
255+
assertExcessData(b'abc=d', b'i\xb7', b'i\xb7\x1d')
256+
224257
def test_base64errors(self):
225258
# Test base64 with invalid padding
226-
def assertIncorrectPadding(data):
259+
def assertIncorrectPadding(data, strict_mode=True):
260+
data = self.type2test(data)
227261
with self.assertRaisesRegex(binascii.Error, r'(?i)Incorrect padding'):
228-
binascii.a2b_base64(self.type2test(data))
262+
binascii.a2b_base64(data)
263+
with self.assertRaisesRegex(binascii.Error, r'(?i)Incorrect padding'):
264+
binascii.a2b_base64(data, strict_mode=False)
265+
if strict_mode:
266+
with self.assertRaisesRegex(binascii.Error, r'(?i)Incorrect padding'):
267+
binascii.a2b_base64(data, strict_mode=True)
229268

230269
assertIncorrectPadding(b'ab')
231270
assertIncorrectPadding(b'ab=')
232271
assertIncorrectPadding(b'abc')
233272
assertIncorrectPadding(b'abcdef')
234273
assertIncorrectPadding(b'abcdef=')
235274
assertIncorrectPadding(b'abcdefg')
236-
assertIncorrectPadding(b'a=b=')
237-
assertIncorrectPadding(b'a\nb=')
275+
assertIncorrectPadding(b'a=b=', strict_mode=False)
276+
assertIncorrectPadding(b'a\nb=', strict_mode=False)
238277

239278
# Test base64 with invalid number of valid characters (1 mod 4)
240-
def assertInvalidLength(data):
279+
def assertInvalidLength(data, strict_mode=True):
241280
n_data_chars = len(re.sub(br'[^A-Za-z0-9/+]', br'', data))
281+
data = self.type2test(data)
242282
expected_errmsg_re = \
243283
r'(?i)Invalid.+number of data characters.+' + str(n_data_chars)
244284
with self.assertRaisesRegex(binascii.Error, expected_errmsg_re):
245-
binascii.a2b_base64(self.type2test(data))
285+
binascii.a2b_base64(data)
286+
with self.assertRaisesRegex(binascii.Error, expected_errmsg_re):
287+
binascii.a2b_base64(data, strict_mode=False)
288+
if strict_mode:
289+
with self.assertRaisesRegex(binascii.Error, expected_errmsg_re):
290+
binascii.a2b_base64(data, strict_mode=True)
246291

247292
assertInvalidLength(b'a')
248293
assertInvalidLength(b'a=')
249294
assertInvalidLength(b'a==')
250295
assertInvalidLength(b'a===')
251296
assertInvalidLength(b'a' * 5)
252297
assertInvalidLength(b'a' * (4 * 87 + 1))
253-
assertInvalidLength(b'A\tB\nC ??DE') # only 5 valid characters
298+
assertInvalidLength(b'A\tB\nC ??DE', # only 5 valid characters
299+
strict_mode=False)
254300

255301
def test_uu(self):
256302
MAX_UU = 45

Modules/binascii.c

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -564,26 +564,24 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
564564
pads++;
565565

566566
if (strict_mode) {
567-
if (quad_pos == 0) {
568-
state = get_binascii_state(module);
569-
if (state) {
570-
PyErr_SetString(state->Error, (ascii_data == data->buf)
571-
? "Leading padding not allowed"
572-
: "Excess padding not allowed");
573-
}
574-
goto error_end;
567+
if (quad_pos >= 2 && quad_pos + pads <= 4) {
568+
continue;
569+
}
570+
if (ignorechar(BASE64_PAD, ignorechars, ignorecache)) {
571+
continue;
575572
}
576573
if (quad_pos == 1) {
577574
/* Set an error below. */
578575
break;
579576
}
580-
if (quad_pos + pads > 4) {
581-
state = get_binascii_state(module);
582-
if (state) {
583-
PyErr_SetString(state->Error, "Excess padding not allowed");
584-
}
585-
goto error_end;
577+
state = get_binascii_state(module);
578+
if (state) {
579+
PyErr_SetString(state->Error,
580+
(quad_pos == 0 && ascii_data == data->buf)
581+
? "Leading padding not allowed"
582+
: "Excess padding not allowed");
586583
}
584+
goto error_end;
587585
}
588586
else {
589587
if (quad_pos >= 2 && quad_pos + pads >= 4) {
@@ -592,8 +590,8 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
592590
*/
593591
goto done;
594592
}
593+
continue;
595594
}
596-
continue;
597595
}
598596

599597
unsigned char v = table_a2b_base64[this_ch];
@@ -609,7 +607,9 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
609607
}
610608

611609
// Characters that are not '=', in the middle of the padding, are not allowed
612-
if (strict_mode && pads) {
610+
if (pads && strict_mode &&
611+
!ignorechar(BASE64_PAD, ignorechars, ignorecache))
612+
{
613613
state = get_binascii_state(module);
614614
if (state) {
615615
PyErr_SetString(state->Error, (quad_pos + pads == 4)
@@ -662,7 +662,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode,
662662
goto error_end;
663663
}
664664

665-
if (quad_pos != 0 && quad_pos + pads != 4) {
665+
if (quad_pos != 0 && quad_pos + pads < 4) {
666666
state = get_binascii_state(module);
667667
if (state) {
668668
PyErr_SetString(state->Error, "Incorrect padding");

0 commit comments

Comments
 (0)