Skip to content

Commit 6b2a196

Browse files
gh-95382: Use cache for indentations in the JSON encoder (GH-118636)
1 parent 91f4908 commit 6b2a196

File tree

1 file changed

+118
-64
lines changed

1 file changed

+118
-64
lines changed

Modules/_json.c

Lines changed: 118 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,11 @@ encoder_dealloc(PyObject *self);
8686
static int
8787
encoder_clear(PyEncoderObject *self);
8888
static int
89-
encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent);
89+
encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level, PyObject *indent_cache);
9090
static int
91-
encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent);
91+
encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj, Py_ssize_t indent_level, PyObject *indent_cache);
9292
static int
93-
encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent);
93+
encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level, PyObject *indent_cache);
9494
static PyObject *
9595
_encoded_const(PyObject *obj);
9696
static void
@@ -1252,17 +1252,92 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
12521252
return (PyObject *)s;
12531253
}
12541254

1255+
1256+
/* indent_cache is a list that contains intermixed values at even and odd
1257+
* positions:
1258+
*
1259+
* 2*k : '\n' + indent * (k + initial_indent_level)
1260+
* strings written after opening and before closing brackets
1261+
* 2*k-1 : item_separator + '\n' + indent * (k + initial_indent_level)
1262+
* strings written between items
1263+
*
1264+
* Its size is always an odd number.
1265+
*/
12551266
static PyObject *
1256-
_create_newline_indent(PyObject *indent, Py_ssize_t indent_level)
1267+
create_indent_cache(PyEncoderObject *s, Py_ssize_t indent_level)
12571268
{
12581269
PyObject *newline_indent = PyUnicode_FromOrdinal('\n');
12591270
if (newline_indent != NULL && indent_level) {
12601271
PyUnicode_AppendAndDel(&newline_indent,
1261-
PySequence_Repeat(indent, indent_level));
1272+
PySequence_Repeat(s->indent, indent_level));
1273+
}
1274+
if (newline_indent == NULL) {
1275+
return NULL;
1276+
}
1277+
PyObject *indent_cache = PyList_New(1);
1278+
if (indent_cache == NULL) {
1279+
Py_DECREF(newline_indent);
1280+
return NULL;
12621281
}
1263-
return newline_indent;
1282+
PyList_SET_ITEM(indent_cache, 0, newline_indent);
1283+
return indent_cache;
1284+
}
1285+
1286+
/* Extend indent_cache by adding values for the next level.
1287+
* It should have values for the indent_level-1 level before the call.
1288+
*/
1289+
static int
1290+
update_indent_cache(PyEncoderObject *s,
1291+
Py_ssize_t indent_level, PyObject *indent_cache)
1292+
{
1293+
assert(indent_level * 2 == PyList_GET_SIZE(indent_cache) + 1);
1294+
assert(indent_level > 0);
1295+
PyObject *newline_indent = PyList_GET_ITEM(indent_cache, (indent_level - 1)*2);
1296+
newline_indent = PyUnicode_Concat(newline_indent, s->indent);
1297+
if (newline_indent == NULL) {
1298+
return -1;
1299+
}
1300+
PyObject *separator_indent = PyUnicode_Concat(s->item_separator, newline_indent);
1301+
if (separator_indent == NULL) {
1302+
Py_DECREF(newline_indent);
1303+
return -1;
1304+
}
1305+
1306+
if (PyList_Append(indent_cache, separator_indent) < 0 ||
1307+
PyList_Append(indent_cache, newline_indent) < 0)
1308+
{
1309+
Py_DECREF(separator_indent);
1310+
Py_DECREF(newline_indent);
1311+
return -1;
1312+
}
1313+
Py_DECREF(separator_indent);
1314+
Py_DECREF(newline_indent);
1315+
return 0;
12641316
}
12651317

1318+
static PyObject *
1319+
get_item_separator(PyEncoderObject *s,
1320+
Py_ssize_t indent_level, PyObject *indent_cache)
1321+
{
1322+
assert(indent_level > 0);
1323+
if (indent_level * 2 > PyList_GET_SIZE(indent_cache)) {
1324+
if (update_indent_cache(s, indent_level, indent_cache) < 0) {
1325+
return NULL;
1326+
}
1327+
}
1328+
assert(indent_level * 2 < PyList_GET_SIZE(indent_cache));
1329+
return PyList_GET_ITEM(indent_cache, indent_level * 2 - 1);
1330+
}
1331+
1332+
static int
1333+
write_newline_indent(PyUnicodeWriter *writer,
1334+
Py_ssize_t indent_level, PyObject *indent_cache)
1335+
{
1336+
PyObject *newline_indent = PyList_GET_ITEM(indent_cache, indent_level * 2);
1337+
return PyUnicodeWriter_WriteStr(writer, newline_indent);
1338+
}
1339+
1340+
12661341
static PyObject *
12671342
encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
12681343
{
@@ -1280,20 +1355,20 @@ encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
12801355
return NULL;
12811356
}
12821357

1283-
PyObject *newline_indent = NULL;
1358+
PyObject *indent_cache = NULL;
12841359
if (self->indent != Py_None) {
1285-
newline_indent = _create_newline_indent(self->indent, indent_level);
1286-
if (newline_indent == NULL) {
1360+
indent_cache = create_indent_cache(self, indent_level);
1361+
if (indent_cache == NULL) {
12871362
PyUnicodeWriter_Discard(writer);
12881363
return NULL;
12891364
}
12901365
}
1291-
if (encoder_listencode_obj(self, writer, obj, newline_indent)) {
1366+
if (encoder_listencode_obj(self, writer, obj, indent_level, indent_cache)) {
12921367
PyUnicodeWriter_Discard(writer);
1293-
Py_XDECREF(newline_indent);
1368+
Py_XDECREF(indent_cache);
12941369
return NULL;
12951370
}
1296-
Py_XDECREF(newline_indent);
1371+
Py_XDECREF(indent_cache);
12971372

12981373
PyObject *str = PyUnicodeWriter_Finish(writer);
12991374
if (str == NULL) {
@@ -1381,7 +1456,8 @@ _steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen)
13811456

13821457
static int
13831458
encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
1384-
PyObject *obj, PyObject *newline_indent)
1459+
PyObject *obj,
1460+
Py_ssize_t indent_level, PyObject *indent_cache)
13851461
{
13861462
/* Encode Python object obj to a JSON term */
13871463
PyObject *newobj;
@@ -1421,14 +1497,14 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
14211497
else if (PyList_Check(obj) || PyTuple_Check(obj)) {
14221498
if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
14231499
return -1;
1424-
rv = encoder_listencode_list(s, writer, obj, newline_indent);
1500+
rv = encoder_listencode_list(s, writer, obj, indent_level, indent_cache);
14251501
_Py_LeaveRecursiveCall();
14261502
return rv;
14271503
}
14281504
else if (PyDict_Check(obj)) {
14291505
if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
14301506
return -1;
1431-
rv = encoder_listencode_dict(s, writer, obj, newline_indent);
1507+
rv = encoder_listencode_dict(s, writer, obj, indent_level, indent_cache);
14321508
_Py_LeaveRecursiveCall();
14331509
return rv;
14341510
}
@@ -1462,7 +1538,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
14621538
Py_XDECREF(ident);
14631539
return -1;
14641540
}
1465-
rv = encoder_listencode_obj(s, writer, newobj, newline_indent);
1541+
rv = encoder_listencode_obj(s, writer, newobj, indent_level, indent_cache);
14661542
_Py_LeaveRecursiveCall();
14671543

14681544
Py_DECREF(newobj);
@@ -1485,7 +1561,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
14851561
static int
14861562
encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *first,
14871563
PyObject *dct, PyObject *key, PyObject *value,
1488-
PyObject *newline_indent,
1564+
Py_ssize_t indent_level, PyObject *indent_cache,
14891565
PyObject *item_separator)
14901566
{
14911567
PyObject *keystr = NULL;
@@ -1541,7 +1617,7 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
15411617
if (PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
15421618
return -1;
15431619
}
1544-
if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) {
1620+
if (encoder_listencode_obj(s, writer, value, indent_level, indent_cache) < 0) {
15451621
_PyErr_FormatNote("when serializing %T item %R", dct, key);
15461622
return -1;
15471623
}
@@ -1550,15 +1626,14 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
15501626

15511627
static int
15521628
encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
1553-
PyObject *dct, PyObject *newline_indent)
1629+
PyObject *dct,
1630+
Py_ssize_t indent_level, PyObject *indent_cache)
15541631
{
15551632
/* Encode Python dict dct a JSON term */
15561633
PyObject *ident = NULL;
15571634
PyObject *items = NULL;
15581635
PyObject *key, *value;
15591636
bool first = true;
1560-
PyObject *new_newline_indent = NULL;
1561-
PyObject *separator_indent = NULL;
15621637

15631638
if (PyDict_GET_SIZE(dct) == 0) {
15641639
/* Fast path */
@@ -1585,19 +1660,13 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
15851660
goto bail;
15861661
}
15871662

1588-
PyObject *current_item_separator = s->item_separator; // borrowed reference
1663+
PyObject *separator = s->item_separator; // borrowed reference
15891664
if (s->indent != Py_None) {
1590-
new_newline_indent = PyUnicode_Concat(newline_indent, s->indent);
1591-
if (new_newline_indent == NULL) {
1592-
goto bail;
1593-
}
1594-
separator_indent = PyUnicode_Concat(current_item_separator, new_newline_indent);
1595-
if (separator_indent == NULL) {
1596-
goto bail;
1597-
}
1598-
// update item separator with a borrowed reference
1599-
current_item_separator = separator_indent;
1600-
if (PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
1665+
indent_level++;
1666+
separator = get_item_separator(s, indent_level, indent_cache);
1667+
if (separator == NULL ||
1668+
write_newline_indent(writer, indent_level, indent_cache) < 0)
1669+
{
16011670
goto bail;
16021671
}
16031672
}
@@ -1618,8 +1687,8 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
16181687
key = PyTuple_GET_ITEM(item, 0);
16191688
value = PyTuple_GET_ITEM(item, 1);
16201689
if (encoder_encode_key_value(s, writer, &first, dct, key, value,
1621-
new_newline_indent,
1622-
current_item_separator) < 0)
1690+
indent_level, indent_cache,
1691+
separator) < 0)
16231692
goto bail;
16241693
}
16251694
Py_CLEAR(items);
@@ -1628,8 +1697,8 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
16281697
Py_ssize_t pos = 0;
16291698
while (PyDict_Next(dct, &pos, &key, &value)) {
16301699
if (encoder_encode_key_value(s, writer, &first, dct, key, value,
1631-
new_newline_indent,
1632-
current_item_separator) < 0)
1700+
indent_level, indent_cache,
1701+
separator) < 0)
16331702
goto bail;
16341703
}
16351704
}
@@ -1640,10 +1709,8 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
16401709
Py_CLEAR(ident);
16411710
}
16421711
if (s->indent != Py_None) {
1643-
Py_CLEAR(new_newline_indent);
1644-
Py_CLEAR(separator_indent);
1645-
1646-
if (PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
1712+
indent_level--;
1713+
if (write_newline_indent(writer, indent_level, indent_cache) < 0) {
16471714
goto bail;
16481715
}
16491716
}
@@ -1656,20 +1723,17 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
16561723
bail:
16571724
Py_XDECREF(items);
16581725
Py_XDECREF(ident);
1659-
Py_XDECREF(separator_indent);
1660-
Py_XDECREF(new_newline_indent);
16611726
return -1;
16621727
}
16631728

16641729
static int
16651730
encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
1666-
PyObject *seq, PyObject *newline_indent)
1731+
PyObject *seq,
1732+
Py_ssize_t indent_level, PyObject *indent_cache)
16671733
{
16681734
PyObject *ident = NULL;
16691735
PyObject *s_fast = NULL;
16701736
Py_ssize_t i;
1671-
PyObject *new_newline_indent = NULL;
1672-
PyObject *separator_indent = NULL;
16731737

16741738
ident = NULL;
16751739
s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
@@ -1702,28 +1766,21 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
17021766

17031767
PyObject *separator = s->item_separator; // borrowed reference
17041768
if (s->indent != Py_None) {
1705-
new_newline_indent = PyUnicode_Concat(newline_indent, s->indent);
1706-
if (new_newline_indent == NULL) {
1707-
goto bail;
1708-
}
1709-
1710-
if (PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) {
1711-
goto bail;
1712-
}
1713-
1714-
separator_indent = PyUnicode_Concat(separator, new_newline_indent);
1715-
if (separator_indent == NULL) {
1769+
indent_level++;
1770+
separator = get_item_separator(s, indent_level, indent_cache);
1771+
if (separator == NULL ||
1772+
write_newline_indent(writer, indent_level, indent_cache) < 0)
1773+
{
17161774
goto bail;
17171775
}
1718-
separator = separator_indent; // assign separator with borrowed reference
17191776
}
17201777
for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
17211778
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
17221779
if (i) {
17231780
if (PyUnicodeWriter_WriteStr(writer, separator) < 0)
17241781
goto bail;
17251782
}
1726-
if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) {
1783+
if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) {
17271784
_PyErr_FormatNote("when serializing %T item %zd", seq, i);
17281785
goto bail;
17291786
}
@@ -1735,9 +1792,8 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
17351792
}
17361793

17371794
if (s->indent != Py_None) {
1738-
Py_CLEAR(new_newline_indent);
1739-
Py_CLEAR(separator_indent);
1740-
if (PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) {
1795+
indent_level--;
1796+
if (write_newline_indent(writer, indent_level, indent_cache) < 0) {
17411797
goto bail;
17421798
}
17431799
}
@@ -1751,8 +1807,6 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
17511807
bail:
17521808
Py_XDECREF(ident);
17531809
Py_DECREF(s_fast);
1754-
Py_XDECREF(separator_indent);
1755-
Py_XDECREF(new_newline_indent);
17561810
return -1;
17571811
}
17581812

0 commit comments

Comments
 (0)