Skip to content

Commit db4c09a

Browse files
committed
gh-139871: Optimize bytearray unique bytes iconcat
If the bytearray is empty and a uniquely referenced bytes object is being concatenated (ex. one just recieved from read), just use its storage as the backing for the bytearray rather than copying it. build_bytes_unique: Mean +- std dev: [base] 383 ns +- 11 ns -> [iconcat_opt] 342 ns +- 5 ns: 1.12x faster build_bytearray: Mean +- std dev: [base] 496 ns +- 8 ns -> [iconcat_opt] 471 ns +- 13 ns: 1.05x faster encode: Mean +- std dev: [base] 482 us +- 2 us -> [iconcat_opt] 13.8 us +- 0.1 us: 34.78x faster Benchmark hidden because not significant (1): build_bytes Geometric mean: 2.53x faster note: Performance of build_bytes is expected to stay constant. ```python import pyperf runner = pyperf.Runner() count1 = 1_000 count2 = 100 count3 = 10_000 CHUNK_A = b'a' * count1 CHUNK_B = b'b' * count2 CHUNK_C = b'c' * count3 def build_bytes(): # Bytes not uniquely referenced. ba = bytearray() ba += CHUNK_A ba += CHUNK_B ba += CHUNK_C def build_bytes_unique(): ba = bytearray() # Repeat inline results in uniquely referenced bytes. ba += b'a' * count1 ba += b'b' * count2 ba += b'c' * count3 def build_bytearray(): # Each bytearray appended is uniquely referenced. ba = bytearray() ba += bytearray(CHUNK_A) ba += bytearray(CHUNK_B) ba += bytearray(CHUNK_C) runner.bench_func('build_bytes', build_bytes) runner.bench_func('build_bytes_unique', build_bytes_unique) runner.bench_func('build_bytearray', build_bytearray) runner.timeit( name="encode", setup="a = 'a' * 1_000_000", stmt="bytearray(a, encoding='utf8')") ```
1 parent 227b9d3 commit db4c09a

File tree

1 file changed

+42
-13
lines changed

1 file changed

+42
-13
lines changed

Objects/bytearrayobject.c

Lines changed: 42 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,43 @@ bytearray_iconcat_lock_held(PyObject *op, PyObject *other)
333333
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op);
334334
PyByteArrayObject *self = _PyByteArray_CAST(op);
335335

336+
// optimization: Avoid copying the bytes coming in when possible.
337+
if (self->ob_alloc == 0 && _PyObject_IsUniquelyReferenced(other)) {
338+
// note: ob_bytes_object is always the immortal empty bytes here.
339+
if (!_canresize(self)) {
340+
return NULL;
341+
}
342+
343+
/* Get the bytes out of the temporary bytearray.
344+
345+
Just returning other doesn't work as __init__ calls this and can't
346+
change self. */
347+
if (PyByteArray_CheckExact(other)) {
348+
PyObject *taken = PyObject_CallMethodNoArgs(other,
349+
&_Py_ID(take_bytes));
350+
if (taken == NULL) {
351+
return NULL;
352+
}
353+
// Avoid Py_INCREF needed for argument case.
354+
Py_ssize_t size = Py_SIZE(taken);
355+
self->ob_bytes_object = taken;
356+
bytearray_reinit_from_bytes(self, size, size);
357+
return Py_NewRef(self);
358+
}
359+
360+
if (PyBytes_CheckExact(other)) {
361+
Py_ssize_t size = Py_SIZE(other);
362+
self->ob_bytes_object = other;
363+
bytearray_reinit_from_bytes(self, size, size);
364+
Py_INCREF(self->ob_bytes_object);
365+
366+
// Caller has a reference still and its decref will return
367+
// bytes to be uniquely referenced.
368+
assert(Py_REFCNT(self->ob_bytes_object) == 2);
369+
return Py_NewRef(self);
370+
}
371+
}
372+
336373
Py_buffer vo;
337374
if (PyObject_GetBuffer(other, &vo, PyBUF_SIMPLE) != 0) {
338375
PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
@@ -977,22 +1014,14 @@ bytearray___init___impl(PyByteArrayObject *self, PyObject *arg,
9771014
}
9781015
}
9791016

980-
/* Use the buffer API */
1017+
/* Use the buffer API. Defer to iconcat which optimizes. */
9811018
if (PyObject_CheckBuffer(arg)) {
982-
Py_ssize_t size;
983-
Py_buffer view;
984-
if (PyObject_GetBuffer(arg, &view, PyBUF_FULL_RO) < 0)
1019+
PyObject *new = bytearray_iconcat((PyObject *)self, arg);
1020+
if (new == NULL) {
9851021
return -1;
986-
size = view.len;
987-
if (PyByteArray_Resize((PyObject *)self, size) < 0) goto fail;
988-
if (PyBuffer_ToContiguous(PyByteArray_AS_STRING(self),
989-
&view, size, 'C') < 0)
990-
goto fail;
991-
PyBuffer_Release(&view);
1022+
}
1023+
Py_DECREF(new);
9921024
return 0;
993-
fail:
994-
PyBuffer_Release(&view);
995-
return -1;
9961025
}
9971026

9981027
if (PyList_CheckExact(arg) || PyTuple_CheckExact(arg)) {

0 commit comments

Comments
 (0)