Skip to content

Commit aa06c5d

Browse files
committed
Restore base64.py
1 parent 05ae5ad commit aa06c5d

File tree

1 file changed

+171
-8
lines changed

1 file changed

+171
-8
lines changed

Lib/base64.py

Lines changed: 171 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,36 @@ def b16decode(s, casefold=False):
290290
#
291291
# Ascii85 encoding/decoding
292292
#
293+
294+
_a85chars = None
295+
_a85chars2 = None
296+
_A85START = b"<~"
297+
_A85END = b"~>"
298+
299+
def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
300+
# Helper function for a85encode and b85encode
301+
if not isinstance(b, bytes_types):
302+
b = memoryview(b).tobytes()
303+
304+
padding = (-len(b)) % 4
305+
if padding:
306+
b = b + b'\0' * padding
307+
words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b)
308+
309+
chunks = [b'z' if foldnuls and not word else
310+
b'y' if foldspaces and word == 0x20202020 else
311+
(chars2[word // 614125] +
312+
chars2[word // 85 % 7225] +
313+
chars[word % 85])
314+
for word in words]
315+
316+
if padding and not pad:
317+
if chunks[-1] == b'z':
318+
chunks[-1] = chars[0] * 5
319+
chunks[-1] = chunks[-1][:-padding]
320+
321+
return b''.join(chunks)
322+
293323
def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
294324
"""Encode bytes-like object b using Ascii85 and return a bytes object.
295325
@@ -307,8 +337,29 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
307337
adobe controls whether the encoded byte sequence is framed with <~ and ~>,
308338
which is used by the Adobe implementation.
309339
"""
310-
return binascii.b2a_ascii85(b, fold_spaces=foldspaces,
311-
wrap=adobe, width=wrapcol, pad=pad)
340+
global _a85chars, _a85chars2
341+
# Delay the initialization of tables to not waste memory
342+
# if the function is never called
343+
if _a85chars2 is None:
344+
_a85chars = [bytes((i,)) for i in range(33, 118)]
345+
_a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]
346+
347+
result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces)
348+
349+
if adobe:
350+
result = _A85START + result
351+
if wrapcol:
352+
wrapcol = max(2 if adobe else 1, wrapcol)
353+
chunks = [result[i: i + wrapcol]
354+
for i in range(0, len(result), wrapcol)]
355+
if adobe:
356+
if len(chunks[-1]) + 2 > wrapcol:
357+
chunks.append(b'')
358+
result = b'\n'.join(chunks)
359+
if adobe:
360+
result += _A85END
361+
362+
return result
312363

313364
def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
314365
"""Decode the Ascii85 encoded bytes-like object or ASCII string b.
@@ -327,36 +378,148 @@ def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
327378
The result is returned as a bytes object.
328379
"""
329380
b = _bytes_from_decode_data(b)
330-
return binascii.a2b_ascii85(b, fold_spaces=foldspaces,
331-
wrap=adobe, ignore=ignorechars)
381+
if adobe:
382+
if not b.endswith(_A85END):
383+
raise ValueError(
384+
"Ascii85 encoded byte sequences must end "
385+
"with {!r}".format(_A85END)
386+
)
387+
if b.startswith(_A85START):
388+
b = b[2:-2] # Strip off start/end markers
389+
else:
390+
b = b[:-2]
391+
#
392+
# We have to go through this stepwise, so as to ignore spaces and handle
393+
# special short sequences
394+
#
395+
packI = struct.Struct('!I').pack
396+
decoded = []
397+
decoded_append = decoded.append
398+
curr = []
399+
curr_append = curr.append
400+
curr_clear = curr.clear
401+
for x in b + b'u' * 4:
402+
if b'!'[0] <= x <= b'u'[0]:
403+
curr_append(x)
404+
if len(curr) == 5:
405+
acc = 0
406+
for x in curr:
407+
acc = 85 * acc + (x - 33)
408+
try:
409+
decoded_append(packI(acc))
410+
except struct.error:
411+
raise ValueError('Ascii85 overflow') from None
412+
curr_clear()
413+
elif x == b'z'[0]:
414+
if curr:
415+
raise ValueError('z inside Ascii85 5-tuple')
416+
decoded_append(b'\0\0\0\0')
417+
elif foldspaces and x == b'y'[0]:
418+
if curr:
419+
raise ValueError('y inside Ascii85 5-tuple')
420+
decoded_append(b'\x20\x20\x20\x20')
421+
elif x in ignorechars:
422+
# Skip whitespace
423+
continue
424+
else:
425+
raise ValueError('Non-Ascii85 digit found: %c' % x)
426+
427+
result = b''.join(decoded)
428+
padding = 4 - len(curr)
429+
if padding:
430+
# Throw away the extra padding
431+
result = result[:-padding]
432+
return result
433+
434+
# The following code is originally taken (with permission) from Mercurial
435+
436+
_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
437+
b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~")
438+
_b85chars = None
439+
_b85chars2 = None
440+
_b85dec = None
332441

333442
def b85encode(b, pad=False):
334443
"""Encode bytes-like object b in base85 format and return a bytes object.
335444
336445
If pad is true, the input is padded with b'\\0' so its length is a multiple of
337446
4 bytes before encoding.
338447
"""
339-
return binascii.b2a_base85(b, pad=pad, newline=False)
448+
global _b85chars, _b85chars2
449+
# Delay the initialization of tables to not waste memory
450+
# if the function is never called
451+
if _b85chars2 is None:
452+
_b85chars = [bytes((i,)) for i in _b85alphabet]
453+
_b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
454+
return _85encode(b, _b85chars, _b85chars2, pad)
340455

341456
def b85decode(b):
342457
"""Decode the base85-encoded bytes-like object or ASCII string b
343458
344459
The result is returned as a bytes object.
345460
"""
461+
global _b85dec
462+
# Delay the initialization of tables to not waste memory
463+
# if the function is never called
464+
if _b85dec is None:
465+
_b85dec = [None] * 256
466+
for i, c in enumerate(_b85alphabet):
467+
_b85dec[c] = i
468+
346469
b = _bytes_from_decode_data(b)
347-
return binascii.a2b_base85(b, strict_mode=True)
470+
padding = (-len(b)) % 5
471+
b = b + b'~' * padding
472+
out = []
473+
packI = struct.Struct('!I').pack
474+
for i in range(0, len(b), 5):
475+
chunk = b[i:i + 5]
476+
acc = 0
477+
try:
478+
for c in chunk:
479+
acc = acc * 85 + _b85dec[c]
480+
except TypeError:
481+
for j, c in enumerate(chunk):
482+
if _b85dec[c] is None:
483+
raise ValueError('bad base85 character at position %d'
484+
% (i + j)) from None
485+
raise
486+
try:
487+
out.append(packI(acc))
488+
except struct.error:
489+
raise ValueError('base85 overflow in hunk starting at byte %d'
490+
% i) from None
491+
492+
result = b''.join(out)
493+
if padding:
494+
result = result[:-padding]
495+
return result
496+
497+
_z85alphabet = (b'0123456789abcdefghijklmnopqrstuvwxyz'
498+
b'ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#')
499+
# Translating b85 valid but z85 invalid chars to b'\x00' is required
500+
# to prevent them from being decoded as b85 valid chars.
501+
_z85_b85_decode_diff = b';_`|~'
502+
_z85_decode_translation = bytes.maketrans(
503+
_z85alphabet + _z85_b85_decode_diff,
504+
_b85alphabet + b'\x00' * len(_z85_b85_decode_diff)
505+
)
506+
_z85_encode_translation = bytes.maketrans(_b85alphabet, _z85alphabet)
348507

349508
def z85encode(s):
350509
"""Encode bytes-like object b in z85 format and return a bytes object."""
351-
return binascii.b2a_base85(s, newline=False, z85=True)
510+
return b85encode(s).translate(_z85_encode_translation)
352511

353512
def z85decode(s):
354513
"""Decode the z85-encoded bytes-like object or ASCII string b
355514
356515
The result is returned as a bytes object.
357516
"""
358517
s = _bytes_from_decode_data(s)
359-
return binascii.a2b_base85(s, strict_mode=True, z85=True)
518+
s = s.translate(_z85_decode_translation)
519+
try:
520+
return b85decode(s)
521+
except ValueError as e:
522+
raise ValueError(e.args[0].replace('base85', 'z85')) from None
360523

361524
# Legacy interface. This code could be cleaned up since I don't believe
362525
# binascii has any line length limitations. It just doesn't seem worth it

0 commit comments

Comments
 (0)