Skip to content

Commit be7ebb5

Browse files
committed
Add Z85 support to binascii as well
This is as simple as adding two lookup tables and a function parameter. Once again, the speedup is quite substantial.
1 parent dec042a commit be7ebb5

File tree

9 files changed

+124
-61
lines changed

9 files changed

+124
-61
lines changed

Doc/library/binascii.rst

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,21 +120,24 @@ The :mod:`binascii` module defines the following functions:
120120
If *pad* is true, the input is padded to a multiple of 4 before encoding.
121121

122122

123-
.. function:: a2b_base85(string, /, *, strict_mode=False)
123+
.. function:: a2b_base85(string, /, *, strict_mode=False, z85=False)
124124

125125
Convert base85 data back to binary and return the binary data.
126126
More than one line may be passed at a time.
127127

128128
If *strict_mode* is true, only valid base85 data will be converted.
129129
Invalid base85 data will raise :exc:`binascii.Error`.
130130

131+
If *z85* is true, the base85 data uses the Z85 alphabet.
132+
See `Z85 specification <https://rfc.zeromq.org/spec/32/>`_ for more information.
133+
131134
Valid base85 data contains characters from the base85 alphabet in groups
132135
of five (except for the final group, which may have from two to five
133136
characters). Each group encodes 32 bits of binary data in the range from
134137
``0`` to ``2 ** 32 - 1``, inclusive.
135138

136139

137-
.. function:: b2a_base85(data, /, *, pad=False, newline=True)
140+
.. function:: b2a_base85(data, /, *, pad=False, newline=True, z85=False)
138141

139142
Convert binary data to a line of ASCII characters in base85 coding.
140143
The return value is the converted line.
@@ -143,6 +146,9 @@ The :mod:`binascii` module defines the following functions:
143146

144147
If *newline* is true, a newline char is appended to the result.
145148

149+
If *z85* is true, the Z85 alphabet is used for conversion.
150+
See `Z85 specification <https://rfc.zeromq.org/spec/32/>`_ for more information.
151+
146152

147153
.. function:: a2b_qp(data, header=False)
148154

Include/internal/pycore_global_objects_fini_generated.h

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_global_strings.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -780,6 +780,7 @@ struct _Py_global_strings {
780780
STRUCT_FOR_ID(write_through)
781781
STRUCT_FOR_ID(x)
782782
STRUCT_FOR_ID(year)
783+
STRUCT_FOR_ID(z85)
783784
STRUCT_FOR_ID(zdict)
784785
} identifiers;
785786
struct {

Include/internal/pycore_runtime_init_generated.h

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_unicodeobject_generated.h

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/base64.py

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -349,34 +349,17 @@ def b85decode(s):
349349
s = _bytes_from_decode_data(s)
350350
return binascii.a2b_base85(s, strict_mode=True)
351351

352-
_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
353-
b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~")
354-
_z85alphabet = (b'0123456789abcdefghijklmnopqrstuvwxyz'
355-
b'ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#')
356-
# Translating b85 valid but z85 invalid chars to b'\x00' is required
357-
# to prevent them from being decoded as b85 valid chars.
358-
_z85_b85_decode_diff = b';_`|~'
359-
_z85_decode_translation = bytes.maketrans(
360-
_z85alphabet + _z85_b85_decode_diff,
361-
_b85alphabet + b'\x00' * len(_z85_b85_decode_diff)
362-
)
363-
_z85_encode_translation = bytes.maketrans(_b85alphabet, _z85alphabet)
364-
365352
def z85encode(s):
366353
"""Encode the bytes-like object s using z85 and return a bytes object."""
367-
return b85encode(s).translate(_z85_encode_translation)
354+
return binascii.b2a_base85(s, newline=False, z85=True)
368355

369356
def z85decode(s):
370357
"""Decode the z85-encoded bytes-like object or ASCII string s.
371358
372359
The result is returned as a bytes object.
373360
"""
374361
s = _bytes_from_decode_data(s)
375-
s = s.translate(_z85_decode_translation)
376-
try:
377-
return b85decode(s)
378-
except ValueError as e:
379-
raise ValueError(e.args[0].replace('base85', 'z85')) from None
362+
return binascii.a2b_base85(s, strict_mode=True, z85=True)
380363

381364
# Legacy interface. This code could be cleaned up since I don't believe
382365
# binascii has any line length limitations. It just doesn't seem worth it

Lib/test/test_binascii.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,13 @@ def test_base85_newline(self):
518518
self.assertEqual(binascii.b2a_base85(b, newline=True), b"bTe}aAO\n")
519519
self.assertEqual(binascii.b2a_base85(b, newline=False), b"bTe}aAO")
520520

521+
def test_base85_z85(self):
522+
# Test base85 z85 parameter
523+
b = self.type2test(b"t3s\t ")
524+
a = self.type2test(b"BtE$Aao\n")
525+
self.assertEqual(binascii.b2a_base85(b, z85=True), b"BtE$Aao\n")
526+
self.assertEqual(binascii.a2b_base85(a, z85=True), b"t3s\t ")
527+
521528
def test_uu(self):
522529
MAX_UU = 45
523530
for backtick in (True, False):

Modules/binascii.c

Lines changed: 53 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,26 @@ static const unsigned char table_a2b_base85_a85[] = {
145145
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
146146
};
147147

148+
static const unsigned char table_a2b_base85_z85[] = {
149+
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
150+
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
151+
-1,68,-1,84, 83,82,72,-1, 75,76,70,65, -1,63,62,69,
152+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,64,-1, 73,66,74,71,
153+
81,36,37,38, 39,40,41,42, 43,44,45,46, 47,48,49,50,
154+
51,52,53,54, 55,56,57,58, 59,60,61,77, -1,78,67,-1,
155+
-1,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24,
156+
25,26,27,28, 29,30,31,32, 33,34,35,79, -1,80,-1,-1,
157+
158+
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
159+
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
160+
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
161+
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
162+
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
163+
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
164+
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
165+
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
166+
};
167+
148168
static const unsigned char table_b2a_base85[] =
149169
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
150170
"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~";
@@ -153,6 +173,10 @@ static const unsigned char table_b2a_base85_a85[] =
153173
"!\"#$%&\'()*+,-./0123456789:;<=>?@" \
154174
"ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstu";
155175

176+
static const unsigned char table_b2a_base85_z85[] =
177+
"0123456789abcdefghijklmnopqrstuvwxyz" \
178+
"ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/\x2a?&<>()[]{}@%$#"; /* clinic doesn't like '/' followed by '*' */
179+
156180
#define BASE85_A85_PREFIX '<'
157181
#define BASE85_A85_AFFIX '~'
158182
#define BASE85_A85_SUFFIX '>'
@@ -911,17 +935,21 @@ binascii.a2b_base85
911935
/
912936
*
913937
strict_mode: bool = False
914-
When set to True, bytes that are not part of the base85 standard
915-
are not allowed.
938+
When set to True, bytes that are not in the base85 alphabet
939+
(or the Z85 alphabet, if z85 is True) are not allowed.
940+
z85: bool = False
941+
When set to True, the Z85 alphabet is used instead of the standard
942+
base85 alphabet.
916943
917944
Decode a line of base85 data.
918945
[clinic start generated code]*/
919946

920947
static PyObject *
921-
binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, int strict_mode)
922-
/*[clinic end generated code: output=337b9418636f30f4 input=a7555d0e33783562]*/
948+
binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, int strict_mode,
949+
int z85)
950+
/*[clinic end generated code: output=c5b9118ffe77f1cb input=65c2a532ad64ebd5]*/
923951
{
924-
const unsigned char *ascii_data;
952+
const unsigned char *ascii_data, *table_a2b;
925953
unsigned char *bin_data;
926954
int group_pos = 0;
927955
unsigned char this_ch, this_digit;
@@ -930,6 +958,7 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, int strict_mode)
930958
_PyBytesWriter writer;
931959
binascii_state *state;
932960

961+
table_a2b = z85 ? table_a2b_base85_z85 : table_a2b_base85;
933962
ascii_data = data->buf;
934963
ascii_len = data->len;
935964

@@ -948,7 +977,7 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, int strict_mode)
948977
/* Shift (in radix-85) data or padding into our buffer. */
949978
if (ascii_len > 0) {
950979
this_ch = *ascii_data;
951-
this_digit = table_a2b_base85[this_ch];
980+
this_digit = table_a2b[this_ch];
952981
} else {
953982
/* Pad with largest radix-85 digit when decoding. */
954983
this_digit = 84;
@@ -960,7 +989,8 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, int strict_mode)
960989
if (state == NULL) {
961990
goto error_end;
962991
}
963-
PyErr_SetString(state->Error, "base85 overflow");
992+
PyErr_SetString(state->Error,
993+
z85 ? "z85 overflow" : "base85 overflow");
964994
goto error_end;
965995
}
966996
leftchar += this_digit;
@@ -970,7 +1000,8 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, int strict_mode)
9701000
if (state == NULL) {
9711001
goto error_end;
9721002
}
973-
PyErr_Format(state->Error, "'%c' invalid in base85", this_ch);
1003+
PyErr_Format(state->Error, "'%c' %s", this_ch,
1004+
z85 ? "invalid in z85" : "invalid in base85");
9741005
goto error_end;
9751006
}
9761007

@@ -985,7 +1016,8 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, int strict_mode)
9851016
if (state == NULL) {
9861017
goto error_end;
9871018
}
988-
PyErr_SetString(state->Error, "base85 data has invalid length");
1019+
PyErr_Format(state->Error, "%s data has invalid length",
1020+
z85 ? "z85" : "base85");
9891021
goto error_end;
9901022
}
9911023

@@ -1016,21 +1048,24 @@ binascii.b2a_base85
10161048
Pad input to a multiple of 4 before encoding.
10171049
newline: bool = True
10181050
Append a newline to the result.
1051+
z85: bool = False
1052+
Use Z85 alphabet instead of standard base85 alphabet.
10191053
10201054
Base85-code line of data.
10211055
[clinic start generated code]*/
10221056

10231057
static PyObject *
10241058
binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, int pad,
1025-
int newline)
1026-
/*[clinic end generated code: output=56936eb231e15dc0 input=3899d4f5c3a589a0]*/
1059+
int newline, int z85)
1060+
/*[clinic end generated code: output=d3740e9a20c8e071 input=e4e07591f7a11ae4]*/
10271061
{
10281062
unsigned char *ascii_data;
1029-
const unsigned char *bin_data;
1063+
const unsigned char *bin_data, *table_b2a;
10301064
uint32_t leftchar = 0;
10311065
Py_ssize_t bin_len, group_len, out_len;
10321066
_PyBytesWriter writer;
10331067

1068+
table_b2a = z85 ? table_b2a_base85_z85 : table_b2a_base85;
10341069
bin_data = data->buf;
10351070
bin_len = data->len;
10361071

@@ -1052,15 +1087,15 @@ binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, int pad,
10521087
leftchar = (bin_data[0] << 24) | (bin_data[1] << 16) |
10531088
(bin_data[2] << 8) | bin_data[3];
10541089

1055-
ascii_data[4] = table_b2a_base85[leftchar % 85];
1090+
ascii_data[4] = table_b2a[leftchar % 85];
10561091
leftchar /= 85;
1057-
ascii_data[3] = table_b2a_base85[leftchar % 85];
1092+
ascii_data[3] = table_b2a[leftchar % 85];
10581093
leftchar /= 85;
1059-
ascii_data[2] = table_b2a_base85[leftchar % 85];
1094+
ascii_data[2] = table_b2a[leftchar % 85];
10601095
leftchar /= 85;
1061-
ascii_data[1] = table_b2a_base85[leftchar % 85];
1096+
ascii_data[1] = table_b2a[leftchar % 85];
10621097
leftchar /= 85;
1063-
ascii_data[0] = table_b2a_base85[leftchar];
1098+
ascii_data[0] = table_b2a[leftchar];
10641099

10651100
ascii_data += 5;
10661101
}
@@ -1076,7 +1111,7 @@ binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, int pad,
10761111
group_len = pad ? 5 : bin_len + 1;
10771112
for (Py_ssize_t i = 4; i >= 0; i--) {
10781113
if (i < group_len) {
1079-
ascii_data[i] = table_b2a_base85[leftchar % 85];
1114+
ascii_data[i] = table_b2a[leftchar % 85];
10801115
}
10811116
leftchar /= 85;
10821117
}

0 commit comments

Comments
 (0)