diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py index 3f569e6baee676..0ffedd195b5017 100644 --- a/Lib/test/test_unicodedata.py +++ b/Lib/test/test_unicodedata.py @@ -617,7 +617,7 @@ def test_issue10254(self): self.assertEqual(self.db.normalize('NFC', a), b) def test_long_combining_mark_run(self): - # GH-XXXXX: avoid quadratic canonical ordering. + # gh-149079: avoid quadratic canonical ordering. payload = "a" + ("\u0300\u0327" * 32) nfd = "a" + ("\u0327" * 32) + ("\u0300" * 32) nfc = "\u00e0" + ("\u0327" * 32) + ("\u0300" * 31) @@ -628,7 +628,7 @@ def test_long_combining_mark_run(self): self.assertEqual(self.db.normalize("NFKC", payload), nfc) def test_combining_mark_run_fast_paths(self): - # GH-XXXXX: cover short runs and already-sorted long runs. + # gh-149079: cover short runs and already-sorted long runs. short_payload = "a" + ("\u0300\u0327" * 9) + "\u0300" short_nfd = "a" + ("\u0327" * 9) + ("\u0300" * 10) short_nfc = "\u00e0" + ("\u0327" * 9) + ("\u0300" * 9) diff --git a/Misc/NEWS.d/next/Security/2026-04-27-16-36-11.gh-issue-149079.vKl-LM.rst b/Misc/NEWS.d/next/Security/2026-04-27-16-36-11.gh-issue-149079.vKl-LM.rst index 72e8374acffb1f..4ed22b58f7405f 100644 --- a/Misc/NEWS.d/next/Security/2026-04-27-16-36-11.gh-issue-149079.vKl-LM.rst +++ b/Misc/NEWS.d/next/Security/2026-04-27-16-36-11.gh-issue-149079.vKl-LM.rst @@ -1,5 +1,5 @@ Fix a potential denial of service in :func:`unicodedata.normalize`. The -canonical ordering step of Unicode normalization used an O(n²) insertion +canonical ordering step of Unicode normalization used a quadratic-time insertion sort for reordering combining characters, which could be exploited with crafted input containing many combining characters in non-canonical order. Replaced with a linear-time counting sort for long runs. diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c index 9ac2faffb40bfb..d132e77030bf29 100644 --- a/Modules/unicodedata.c +++ b/Modules/unicodedata.c @@ -586,22 +586,14 @@ canonical_ordering_sort_counting(int kind, void *data, Py_ssize_t counts[256] = {0}; Py_ssize_t run_length = end - start; Py_ssize_t total = 0; - unsigned char min_combining = 255; - unsigned char max_combining = 0; for (Py_ssize_t i = start; i < end; i++) { Py_UCS4 code = PyUnicode_READ(kind, data, i); unsigned char combining = _getrecord_ex(code)->combining; counts[combining]++; - if (combining < min_combining) { - min_combining = combining; - } - if (combining > max_combining) { - max_combining = combining; - } } - for (Py_ssize_t i = min_combining; i <= max_combining; i++) { + for (size_t i = 0; i < Py_ARRAY_LENGTH(counts); i++) { Py_ssize_t count = counts[i]; counts[i] = total; total += count; @@ -629,7 +621,7 @@ nfd_nfkd(PyObject *self, PyObject *input, int k) void *result_data; /* Longest decomposition in Unicode 3.2: U+FDFA */ Py_UCS4 stack[20]; - Py_ssize_t space, isize, length; + Py_ssize_t space, isize; int index, prefix, count, stackptr; unsigned char prev, cur; Py_UCS4 *sortbuf = NULL; @@ -727,25 +719,24 @@ nfd_nfkd(PyObject *self, PyObject *input, int k) result_kind = PyUnicode_KIND(result); result_data = PyUnicode_DATA(result); - length = PyUnicode_GET_LENGTH(result); /* Sort each consecutive combining-character run canonically. */ i = 0; - while (i < length) { + while (i < o) { Py_ssize_t run_length, run_start; int needs_sort = 0; - prev = _getrecord_ex( - PyUnicode_READ(result_kind, result_data, i))->combining; + Py_UCS4 ch = PyUnicode_READ(result_kind, result_data, i); + prev = _getrecord_ex(ch)->combining; if (prev == 0) { i++; continue; } run_start = i++; - while (i < length) { - cur = _getrecord_ex( - PyUnicode_READ(result_kind, result_data, i))->combining; + while (i < o) { + Py_UCS4 ch = PyUnicode_READ(result_kind, result_data, i); + cur = _getrecord_ex(ch)->combining; if (cur == 0) { break; } @@ -767,8 +758,9 @@ nfd_nfkd(PyObject *self, PyObject *input, int k) } if (run_length > sortbuflen) { - Py_UCS4 *new_sortbuf = PyMem_Realloc(sortbuf, - run_length * sizeof(Py_UCS4)); + Py_UCS4 *new_sortbuf = PyMem_Resize(sortbuf, + Py_UCS4, + run_length); if (new_sortbuf == NULL) { PyErr_NoMemory(); PyMem_Free(sortbuf);