Skip to content

Commit 7ac0868

Browse files
gh-135573: Make pickled lists, sets and dicts a tiny bit smaller (GH-144162)
Ensure that APPENDS and SETITEMS are never used for a batch of size 1. Ensure that ADDITEMS and SETITEMS are never used for a batch of size 0. This harmonizes the C implementation with the Python implementation which already guarantees this and makes a pickle a tiny bit smaller with a tiny chance (about 0.1%). Saves 1 byte for list and dict with size 1001, 2001, ... Saves 2 bytes for set and dict with size 1000, 2000, ...
1 parent 9e8fa2d commit 7ac0868

File tree

1 file changed

+39
-40
lines changed

1 file changed

+39
-40
lines changed

Modules/_pickle.c

Lines changed: 39 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -3066,11 +3066,6 @@ batch_list(PickleState *state, PicklerObject *self, PyObject *iter, PyObject *or
30663066

30673067
assert(iter != NULL);
30683068

3069-
/* XXX: I think this function could be made faster by avoiding the
3070-
iterator interface and fetching objects directly from list using
3071-
PyList_GET_ITEM.
3072-
*/
3073-
30743069
if (self->proto == 0) {
30753070
/* APPENDS isn't available; do one at a time. */
30763071
for (;; total++) {
@@ -3192,24 +3187,24 @@ batch_list_exact(PickleState *state, PicklerObject *self, PyObject *obj)
31923187
assert(obj != NULL);
31933188
assert(self->proto > 0);
31943189
assert(PyList_CheckExact(obj));
3195-
3196-
if (PyList_GET_SIZE(obj) == 1) {
3197-
item = PyList_GET_ITEM(obj, 0);
3198-
Py_INCREF(item);
3199-
int err = save(state, self, item, 0);
3200-
Py_DECREF(item);
3201-
if (err < 0) {
3202-
_PyErr_FormatNote("when serializing %T item 0", obj);
3203-
return -1;
3204-
}
3205-
if (_Pickler_Write(self, &append_op, 1) < 0)
3206-
return -1;
3207-
return 0;
3208-
}
3190+
assert(PyList_GET_SIZE(obj));
32093191

32103192
/* Write in batches of BATCHSIZE. */
32113193
total = 0;
32123194
do {
3195+
if (PyList_GET_SIZE(obj) - total == 1) {
3196+
item = PyList_GET_ITEM(obj, total);
3197+
Py_INCREF(item);
3198+
int err = save(state, self, item, 0);
3199+
Py_DECREF(item);
3200+
if (err < 0) {
3201+
_PyErr_FormatNote("when serializing %T item %zd", obj, total);
3202+
return -1;
3203+
}
3204+
if (_Pickler_Write(self, &append_op, 1) < 0)
3205+
return -1;
3206+
return 0;
3207+
}
32133208
this_batch = 0;
32143209
if (_Pickler_Write(self, &mark_op, 1) < 0)
32153210
return -1;
@@ -3470,28 +3465,29 @@ batch_dict_exact(PickleState *state, PicklerObject *self, PyObject *obj)
34703465
assert(self->proto > 0);
34713466

34723467
dict_size = PyDict_GET_SIZE(obj);
3473-
3474-
/* Special-case len(d) == 1 to save space. */
3475-
if (dict_size == 1) {
3476-
PyDict_Next(obj, &ppos, &key, &value);
3477-
Py_INCREF(key);
3478-
Py_INCREF(value);
3479-
if (save(state, self, key, 0) < 0) {
3480-
goto error;
3481-
}
3482-
if (save(state, self, value, 0) < 0) {
3483-
_PyErr_FormatNote("when serializing %T item %R", obj, key);
3484-
goto error;
3485-
}
3486-
Py_CLEAR(key);
3487-
Py_CLEAR(value);
3488-
if (_Pickler_Write(self, &setitem_op, 1) < 0)
3489-
return -1;
3490-
return 0;
3491-
}
3468+
assert(dict_size);
34923469

34933470
/* Write in batches of BATCHSIZE. */
3471+
Py_ssize_t total = 0;
34943472
do {
3473+
if (dict_size - total == 1) {
3474+
PyDict_Next(obj, &ppos, &key, &value);
3475+
Py_INCREF(key);
3476+
Py_INCREF(value);
3477+
if (save(state, self, key, 0) < 0) {
3478+
goto error;
3479+
}
3480+
if (save(state, self, value, 0) < 0) {
3481+
_PyErr_FormatNote("when serializing %T item %R", obj, key);
3482+
goto error;
3483+
}
3484+
Py_CLEAR(key);
3485+
Py_CLEAR(value);
3486+
if (_Pickler_Write(self, &setitem_op, 1) < 0)
3487+
return -1;
3488+
return 0;
3489+
}
3490+
34953491
i = 0;
34963492
if (_Pickler_Write(self, &mark_op, 1) < 0)
34973493
return -1;
@@ -3507,6 +3503,7 @@ batch_dict_exact(PickleState *state, PicklerObject *self, PyObject *obj)
35073503
}
35083504
Py_CLEAR(key);
35093505
Py_CLEAR(value);
3506+
total++;
35103507
if (++i == BATCHSIZE)
35113508
break;
35123509
}
@@ -3519,7 +3516,7 @@ batch_dict_exact(PickleState *state, PicklerObject *self, PyObject *obj)
35193516
return -1;
35203517
}
35213518

3522-
} while (i == BATCHSIZE);
3519+
} while (total < dict_size);
35233520
return 0;
35243521
error:
35253522
Py_XDECREF(key);
@@ -3637,6 +3634,7 @@ save_set(PickleState *state, PicklerObject *self, PyObject *obj)
36373634
return 0; /* nothing to do */
36383635

36393636
/* Write in batches of BATCHSIZE. */
3637+
Py_ssize_t total = 0;
36403638
do {
36413639
i = 0;
36423640
if (_Pickler_Write(self, &mark_op, 1) < 0)
@@ -3651,6 +3649,7 @@ save_set(PickleState *state, PicklerObject *self, PyObject *obj)
36513649
_PyErr_FormatNote("when serializing %T element", obj);
36523650
break;
36533651
}
3652+
total++;
36543653
if (++i == BATCHSIZE)
36553654
break;
36563655
}
@@ -3666,7 +3665,7 @@ save_set(PickleState *state, PicklerObject *self, PyObject *obj)
36663665
"set changed size during iteration");
36673666
return -1;
36683667
}
3669-
} while (i == BATCHSIZE);
3668+
} while (total < set_size);
36703669

36713670
return 0;
36723671
}

0 commit comments

Comments
 (0)