From b1bc868fba29846b0b27e4c78cb04eae423d9eb0 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Thu, 29 Jan 2026 12:26:11 +0000 Subject: [PATCH 1/7] gh-144319: Add huge pages support for pymalloc (#144320) --- Doc/c-api/memory.rst | 6 ++- Doc/using/configure.rst | 15 ++++++ Doc/whatsnew/3.15.rst | 6 +++ Include/internal/pycore_obmalloc.h | 22 +++++++-- ...-01-29-01-42-14.gh-issue-144319._7EtdB.rst | 1 + Objects/obmalloc.c | 20 ++++++++ PCbuild/build.bat | 3 ++ PCbuild/pyproject.props | 3 +- PCbuild/readme.txt | 5 ++ configure | 47 +++++++++++++++++++ configure.ac | 23 +++++++++ pyconfig.h.in | 3 ++ 12 files changed, 149 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-01-29-01-42-14.gh-issue-144319._7EtdB.rst diff --git a/Doc/c-api/memory.rst b/Doc/c-api/memory.rst index a3be75a2a76d60..58f0de5d0fc541 100644 --- a/Doc/c-api/memory.rst +++ b/Doc/c-api/memory.rst @@ -677,7 +677,11 @@ The pymalloc allocator Python has a *pymalloc* allocator optimized for small objects (smaller or equal to 512 bytes) with a short lifetime. It uses memory mappings called "arenas" with a fixed size of either 256 KiB on 32-bit platforms or 1 MiB on 64-bit -platforms. It falls back to :c:func:`PyMem_RawMalloc` and +platforms. When Python is configured with :option:`--with-pymalloc-hugepages`, +the arena size on 64-bit platforms is increased to 2 MiB to match the huge page +size, and arena allocation will attempt to use huge pages (``MAP_HUGETLB`` on +Linux, ``MEM_LARGE_PAGES`` on Windows) with automatic fallback to regular pages. +It falls back to :c:func:`PyMem_RawMalloc` and :c:func:`PyMem_RawRealloc` for allocations larger than 512 bytes. *pymalloc* is the :ref:`default allocator ` of the diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index af055d35290429..c455272af72715 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -783,6 +783,21 @@ also be used to improve performance. See also :envvar:`PYTHONMALLOC` environment variable. +.. option:: --with-pymalloc-hugepages + + Enable huge page support for :ref:`pymalloc ` arenas (disabled by + default). When enabled, the arena size on 64-bit platforms is increased to + 2 MiB and arena allocation uses ``MAP_HUGETLB`` (Linux) or + ``MEM_LARGE_PAGES`` (Windows) with automatic fallback to regular pages. + + The configure script checks that the platform supports ``MAP_HUGETLB`` + and emits a warning if it is not available. + + On Windows, use the ``--pymalloc-hugepages`` flag with ``build.bat`` or + set the ``UsePymallocHugepages`` MSBuild property. + + .. versionadded:: 3.15 + .. option:: --without-doc-strings Disable static documentation strings to reduce the memory footprint (enabled diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 22d8e2493241c5..68c491f8a8cbb6 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -1477,6 +1477,12 @@ Build changes modules that are missing or packaged separately. (Contributed by Stan Ulbrych and Petr Viktorin in :gh:`139707`.) +* The new configure option :option:`--with-pymalloc-hugepages` enables huge + page support for :ref:`pymalloc ` arenas. When enabled, arena size + increases to 2 MiB and allocation uses ``MAP_HUGETLB`` (Linux) or + ``MEM_LARGE_PAGES`` (Windows) with automatic fallback to regular pages. + On Windows, use ``build.bat --pymalloc-hugepages``. + * Annotating anonymous mmap usage is now supported if Linux kernel supports :manpage:`PR_SET_VMA_ANON_NAME ` (Linux 5.17 or newer). Annotations are visible in ``/proc//maps`` if the kernel supports the feature diff --git a/Include/internal/pycore_obmalloc.h b/Include/internal/pycore_obmalloc.h index a7ba8f340737aa..0b23bb48dd5c1b 100644 --- a/Include/internal/pycore_obmalloc.h +++ b/Include/internal/pycore_obmalloc.h @@ -208,7 +208,11 @@ typedef unsigned int pymem_uint; /* assuming >= 16 bits */ * mappings to reduce heap fragmentation. */ #ifdef USE_LARGE_ARENAS -#define ARENA_BITS 20 /* 1 MiB */ +# ifdef PYMALLOC_USE_HUGEPAGES +# define ARENA_BITS 21 /* 2 MiB */ +# else +# define ARENA_BITS 20 /* 1 MiB */ +# endif #else #define ARENA_BITS 18 /* 256 KiB */ #endif @@ -469,7 +473,7 @@ nfp free pools in usable_arenas. */ /* How many arena_objects do we initially allocate? - * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the + * 16 = can allocate 16 arenas = 16 * ARENA_SIZE before growing the * `arenas` vector. */ #define INITIAL_ARENA_OBJECTS 16 @@ -512,7 +516,11 @@ struct _obmalloc_mgmt { memory address bit allocation for keys - 64-bit pointers, IGNORE_BITS=0 and 2^20 arena size: + ARENA_BITS is configurable: 20 (1 MiB) by default on 64-bit, or + 21 (2 MiB) when PYMALLOC_USE_HUGEPAGES is enabled. All bit widths + below are derived from ARENA_BITS automatically. + + 64-bit pointers, IGNORE_BITS=0 and 2^20 arena size (default): 15 -> MAP_TOP_BITS 15 -> MAP_MID_BITS 14 -> MAP_BOT_BITS @@ -520,6 +528,14 @@ struct _obmalloc_mgmt { ---- 64 + 64-bit pointers, IGNORE_BITS=0 and 2^21 arena size (hugepages): + 15 -> MAP_TOP_BITS + 15 -> MAP_MID_BITS + 13 -> MAP_BOT_BITS + 21 -> ideal aligned arena + ---- + 64 + 64-bit pointers, IGNORE_BITS=16, and 2^20 arena size: 16 -> IGNORE_BITS 10 -> MAP_TOP_BITS diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-01-29-01-42-14.gh-issue-144319._7EtdB.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-01-29-01-42-14.gh-issue-144319._7EtdB.rst new file mode 100644 index 00000000000000..805ba6067edd87 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-01-29-01-42-14.gh-issue-144319._7EtdB.rst @@ -0,0 +1 @@ +Add huge pages support for the pymalloc allocator. Patch by Pablo Galindo diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index b24723f16cf43d..71dc4bf0d0461c 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -496,10 +496,30 @@ void * _PyMem_ArenaAlloc(void *Py_UNUSED(ctx), size_t size) { #ifdef MS_WINDOWS +# ifdef PYMALLOC_USE_HUGEPAGES + void *ptr = VirtualAlloc(NULL, size, + MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, + PAGE_READWRITE); + if (ptr != NULL) + return ptr; + /* Fall back to regular pages */ +# endif return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); #elif defined(ARENAS_USE_MMAP) void *ptr; +# ifdef PYMALLOC_USE_HUGEPAGES +# ifdef MAP_HUGETLB + ptr = mmap(NULL, size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, -1, 0); + if (ptr != MAP_FAILED) { + assert(ptr != NULL); + (void)_PyAnnotateMemoryMap(ptr, size, "cpython:pymalloc:hugepage"); + return ptr; + } + /* Fall back to regular pages */ +# endif +# endif ptr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (ptr == MAP_FAILED) diff --git a/PCbuild/build.bat b/PCbuild/build.bat index e4de9a80d76a9e..8c24309be262ba 100644 --- a/PCbuild/build.bat +++ b/PCbuild/build.bat @@ -42,6 +42,7 @@ echo. --experimental-jit-interpreter Enable the experimental Tier 2 interprete echo. --pystats Enable PyStats collection. echo. --tail-call-interp Enable tail-calling interpreter (requires LLVM 19 or higher). echo. --enable-stackref-debug Enable stackref debugging mode. +echo. --pymalloc-hugepages Enable huge page support for pymalloc arenas. echo. echo.Available flags to avoid building certain modules. echo.These flags have no effect if '-e' is not given: @@ -100,6 +101,7 @@ if "%~1"=="--without-remote-debug" (set DisableRemoteDebug=true) & shift & goto if "%~1"=="--pystats" (set PyStats=1) & shift & goto CheckOpts if "%~1"=="--tail-call-interp" (set UseTailCallInterp=true) & shift & goto CheckOpts if "%~1"=="--enable-stackref-debug" (set StackRefDebug=true) & shift & goto CheckOpts +if "%~1"=="--pymalloc-hugepages" (set UsePymallocHugepages=true) & shift & goto CheckOpts rem These use the actual property names used by MSBuild. We could just let rem them in through the environment, but we specify them on the command line rem anyway for visibility so set defaults after this @@ -205,6 +207,7 @@ echo on /p:UseTailCallInterp=%UseTailCallInterp%^ /p:DisableRemoteDebug=%DisableRemoteDebug%^ /p:StackRefDebug=%StackRefDebug%^ + /p:UsePymallocHugepages=%UsePymallocHugepages%^ %1 %2 %3 %4 %5 %6 %7 %8 %9 @echo off diff --git a/PCbuild/pyproject.props b/PCbuild/pyproject.props index 53bfe5e3ea95cc..94ae718d58c4ba 100644 --- a/PCbuild/pyproject.props +++ b/PCbuild/pyproject.props @@ -50,11 +50,12 @@ <_PlatformPreprocessorDefinition Condition="$(Platform) == 'x64' and $(PlatformToolset) != 'ClangCL'">_M_X64;$(_PlatformPreprocessorDefinition) <_Py3NamePreprocessorDefinition>PY3_DLLNAME=L"$(Py3DllName)$(PyDebugExt)"; <_FreeThreadedPreprocessorDefinition Condition="$(DisableGil) == 'true'">Py_GIL_DISABLED=1; + <_PymallocHugepagesPreprocessorDefinition Condition="$(UsePymallocHugepages) == 'true'">PYMALLOC_USE_HUGEPAGES=1; $(PySourcePath)Include;$(PySourcePath)Include\internal;$(PySourcePath)Include\internal\mimalloc;$(PySourcePath)PC;%(AdditionalIncludeDirectories) - WIN32;$(_Py3NamePreprocessorDefinition)$(_PlatformPreprocessorDefinition)$(_DebugPreprocessorDefinition)$(_PyStatsPreprocessorDefinition)$(_PydPreprocessorDefinition)$(_FreeThreadedPreprocessorDefinition)%(PreprocessorDefinitions) + WIN32;$(_Py3NamePreprocessorDefinition)$(_PlatformPreprocessorDefinition)$(_DebugPreprocessorDefinition)$(_PyStatsPreprocessorDefinition)$(_PydPreprocessorDefinition)$(_FreeThreadedPreprocessorDefinition)$(_PymallocHugepagesPreprocessorDefinition)%(PreprocessorDefinitions) _Py_USING_PGO=1;%(PreprocessorDefinitions) MaxSpeed diff --git a/PCbuild/readme.txt b/PCbuild/readme.txt index 313982ed28a5dc..c5d38296070e02 100644 --- a/PCbuild/readme.txt +++ b/PCbuild/readme.txt @@ -359,6 +359,11 @@ Supported flags are: * WITH_COMPUTED_GOTOS: build the interpreter using "computed gotos". Currently only supported by clang-cl. +* UsePymallocHugepages: enable huge page support for pymalloc arenas. + When enabled, the arena size on 64-bit platforms is increased to 2 MiB + and arena allocation uses MEM_LARGE_PAGES with automatic fallback to + regular pages. Can also be enabled via `--pymalloc-hugepages` flag. + Static library -------------- diff --git a/configure b/configure index c826a1bb85667b..30e35a0f55292a 100755 --- a/configure +++ b/configure @@ -1128,6 +1128,7 @@ enable_ipv6 with_doc_strings with_mimalloc with_pymalloc +with_pymalloc_hugepages with_c_locale_coercion with_valgrind with_dtrace @@ -1935,6 +1936,9 @@ Optional Packages: --with-mimalloc build with mimalloc memory allocator (default is yes if C11 stdatomic.h is available.) --with-pymalloc enable specialized mallocs (default is yes) + --with-pymalloc-hugepages + enable huge page support for pymalloc arenas + (default is no) --with-c-locale-coercion enable C locale coercion to a UTF-8 based locale (default is yes) @@ -18949,6 +18953,49 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $with_pymalloc" >&5 printf "%s\n" "$with_pymalloc" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for --with-pymalloc-hugepages" >&5 +printf %s "checking for --with-pymalloc-hugepages... " >&6; } + +# Check whether --with-pymalloc-hugepages was given. +if test ${with_pymalloc_hugepages+y} +then : + withval=$with_pymalloc_hugepages; +fi + +if test "$with_pymalloc_hugepages" = "yes" +then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include + +int +main (void) +{ + +int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; +(void)flags; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + +printf "%s\n" "#define PYMALLOC_USE_HUGEPAGES 1" >>confdefs.h + +else case e in #( + e) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: --with-pymalloc-hugepages requested but MAP_HUGETLB not found" >&5 +printf "%s\n" "$as_me: WARNING: --with-pymalloc-hugepages requested but MAP_HUGETLB not found" >&2;} + with_pymalloc_hugepages=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${with_pymalloc_hugepages:-no}" >&5 +printf "%s\n" "${with_pymalloc_hugepages:-no}" >&6; } + # Check for --with-c-locale-coercion { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for --with-c-locale-coercion" >&5 printf %s "checking for --with-c-locale-coercion... " >&6; } diff --git a/configure.ac b/configure.ac index 322d33dd0e3c99..bc63d651f58065 100644 --- a/configure.ac +++ b/configure.ac @@ -5061,6 +5061,29 @@ then fi AC_MSG_RESULT([$with_pymalloc]) +AC_MSG_CHECKING([for --with-pymalloc-hugepages]) +AC_ARG_WITH( + [pymalloc-hugepages], + [AS_HELP_STRING([--with-pymalloc-hugepages], + [enable huge page support for pymalloc arenas (default is no)])]) +if test "$with_pymalloc_hugepages" = "yes" +then + dnl configure only runs on Unix-like systems; Windows uses MEM_LARGE_PAGES + dnl via VirtualAlloc but does not use configure. Only check MAP_HUGETLB here. + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[ +#include + ]], [[ +int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; +(void)flags; + ]])], + [AC_DEFINE([PYMALLOC_USE_HUGEPAGES], [1], + [Define to use huge pages for pymalloc arenas])], + [AC_MSG_WARN([--with-pymalloc-hugepages requested but MAP_HUGETLB not found]) + with_pymalloc_hugepages=no]) +fi +AC_MSG_RESULT([${with_pymalloc_hugepages:-no}]) + # Check for --with-c-locale-coercion AC_MSG_CHECKING([for --with-c-locale-coercion]) AC_ARG_WITH( diff --git a/pyconfig.h.in b/pyconfig.h.in index 4ae2abeabf1d41..3d901e01fe03c8 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -1739,6 +1739,9 @@ /* Define as the preferred size in bits of long digits */ #undef PYLONG_BITS_IN_DIGIT +/* Define to use huge pages for pymalloc arenas */ +#undef PYMALLOC_USE_HUGEPAGES + /* enabled builtin hash modules */ #undef PY_BUILTIN_HASHLIB_HASHES From 927eb448aad6436d794ec9f55780013e25609600 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 29 Jan 2026 13:50:07 +0100 Subject: [PATCH 2/7] gh-144309: Build Python with POSIX 2024 (#144310) On FreeBSD, the ppoll() function is only visible if the POSIX version is 2024 or newer. --- .../Build/2026-01-28-19-04-12.gh-issue-144309.3sMFOh.rst | 1 + configure | 6 +++--- configure.ac | 8 ++++---- pyconfig.h.in | 2 +- 4 files changed, 9 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2026-01-28-19-04-12.gh-issue-144309.3sMFOh.rst diff --git a/Misc/NEWS.d/next/Build/2026-01-28-19-04-12.gh-issue-144309.3sMFOh.rst b/Misc/NEWS.d/next/Build/2026-01-28-19-04-12.gh-issue-144309.3sMFOh.rst new file mode 100644 index 00000000000000..c64ef494d27380 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2026-01-28-19-04-12.gh-issue-144309.3sMFOh.rst @@ -0,0 +1 @@ +Build Python with POSIX 2024, instead of POSIX 2008. Patch by Victor Stinner. diff --git a/configure b/configure index 30e35a0f55292a..cd8983683333cd 100755 --- a/configure +++ b/configure @@ -4757,9 +4757,9 @@ esac if test $define_xopen_source = yes then - # X/Open 7, incorporating POSIX.1-2008 + # X/Open 8, incorporating POSIX.1-2024 -printf "%s\n" "#define _XOPEN_SOURCE 700" >>confdefs.h +printf "%s\n" "#define _XOPEN_SOURCE 800" >>confdefs.h # On Tru64 Unix 4.0F, defining _XOPEN_SOURCE also requires @@ -4771,7 +4771,7 @@ printf "%s\n" "#define _XOPEN_SOURCE_EXTENDED 1" >>confdefs.h -printf "%s\n" "#define _POSIX_C_SOURCE 200809L" >>confdefs.h +printf "%s\n" "#define _POSIX_C_SOURCE 202405L" >>confdefs.h fi diff --git a/configure.ac b/configure.ac index bc63d651f58065..e9b45d459fee2a 100644 --- a/configure.ac +++ b/configure.ac @@ -916,8 +916,8 @@ esac if test $define_xopen_source = yes then - # X/Open 7, incorporating POSIX.1-2008 - AC_DEFINE([_XOPEN_SOURCE], [700], + # X/Open 8, incorporating POSIX.1-2024 + AC_DEFINE([_XOPEN_SOURCE], [800], [Define to the level of X/Open that your system supports]) # On Tru64 Unix 4.0F, defining _XOPEN_SOURCE also requires @@ -927,8 +927,8 @@ then AC_DEFINE([_XOPEN_SOURCE_EXTENDED], [1], [Define to activate Unix95-and-earlier features]) - AC_DEFINE([_POSIX_C_SOURCE], [200809L], - [Define to activate features from IEEE Stds 1003.1-2008]) + AC_DEFINE([_POSIX_C_SOURCE], [202405L], + [Define to activate features from IEEE Std 1003.1-2024]) fi # On HP-UX mbstate_t requires _INCLUDE__STDC_A1_SOURCE diff --git a/pyconfig.h.in b/pyconfig.h.in index 3d901e01fe03c8..e2009b2d9ee57e 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -2042,7 +2042,7 @@ /* Define on NetBSD to activate all library features */ #undef _NETBSD_SOURCE -/* Define to activate features from IEEE Stds 1003.1-2008 */ +/* Define to activate features from IEEE Std 1003.1-2024 */ #undef _POSIX_C_SOURCE /* Define if you have POSIX threads, and your system does not define that. */ From a7cec565c17facf7fc2419f655fc43a621ae7dbc Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 29 Jan 2026 15:04:37 +0200 Subject: [PATCH 3/7] gh-144206: Improve error messages for buffer overflow in fcntl.fcntl() and fcntl.ioctl() (GH-144273) --- ...-01-27-14-23-10.gh-issue-144206.l0un4U.rst | 2 ++ Modules/fcntlmodule.c | 31 ++++++++++++++++--- 2 files changed, 28 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-01-27-14-23-10.gh-issue-144206.l0un4U.rst diff --git a/Misc/NEWS.d/next/Library/2026-01-27-14-23-10.gh-issue-144206.l0un4U.rst b/Misc/NEWS.d/next/Library/2026-01-27-14-23-10.gh-issue-144206.l0un4U.rst new file mode 100644 index 00000000000000..1e16d28a756296 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-01-27-14-23-10.gh-issue-144206.l0un4U.rst @@ -0,0 +1,2 @@ +Improve error messages for buffer overflow in :func:`fcntl.fcntl` and +:func:`fcntl.ioctl`. diff --git a/Modules/fcntlmodule.c b/Modules/fcntlmodule.c index e373bf368813ac..ce636c574ed5ff 100644 --- a/Modules/fcntlmodule.c +++ b/Modules/fcntlmodule.c @@ -111,7 +111,11 @@ fcntl_fcntl_impl(PyObject *module, int fd, int code, PyObject *arg) return !async_err ? PyErr_SetFromErrno(PyExc_OSError) : NULL; } if (memcmp(buf + len, guard, GUARDSZ) != 0) { - PyErr_SetString(PyExc_SystemError, "buffer overflow"); + PyErr_SetString(PyExc_SystemError, + "Possible stack corruption in fcntl() due to " + "buffer overflow. " + "Provide an argument of sufficient size as " + "determined by the operation."); return NULL; } return PyBytes_FromStringAndSize(buf, len); @@ -139,7 +143,11 @@ fcntl_fcntl_impl(PyObject *module, int fd, int code, PyObject *arg) return NULL; } if (ptr[len] != '\0') { - PyErr_SetString(PyExc_SystemError, "buffer overflow"); + PyErr_SetString(PyExc_SystemError, + "Memory corruption in fcntl() due to " + "buffer overflow. " + "Provide an argument of sufficient size as " + "determined by the operation."); PyBytesWriter_Discard(writer); return NULL; } @@ -264,7 +272,12 @@ fcntl_ioctl_impl(PyObject *module, int fd, unsigned long code, PyObject *arg, } PyBuffer_Release(&view); if (ptr == buf && memcmp(buf + len, guard, GUARDSZ) != 0) { - PyErr_SetString(PyExc_SystemError, "buffer overflow"); + PyErr_SetString(PyExc_SystemError, + "Possible stack corruption in ioctl() due to " + "buffer overflow. " + "Provide a writable buffer argument of " + "sufficient size as determined by " + "the operation."); return NULL; } return PyLong_FromLong(ret); @@ -293,7 +306,11 @@ fcntl_ioctl_impl(PyObject *module, int fd, unsigned long code, PyObject *arg, return !async_err ? PyErr_SetFromErrno(PyExc_OSError) : NULL; } if (memcmp(buf + len, guard, GUARDSZ) != 0) { - PyErr_SetString(PyExc_SystemError, "buffer overflow"); + PyErr_SetString(PyExc_SystemError, + "Possible stack corruption in ioctl() due to " + "buffer overflow. " + "Provide an argument of sufficient size as " + "determined by the operation."); return NULL; } return PyBytes_FromStringAndSize(buf, len); @@ -321,7 +338,11 @@ fcntl_ioctl_impl(PyObject *module, int fd, unsigned long code, PyObject *arg, return NULL; } if (ptr[len] != '\0') { - PyErr_SetString(PyExc_SystemError, "buffer overflow"); + PyErr_SetString(PyExc_SystemError, + "Memory corruption in ioctl() due to " + "buffer overflow. " + "Provide an argument of sufficient size as " + "determined by the operation."); PyBytesWriter_Discard(writer); return NULL; } From 0fa1fc69f3ff0deb778e3c9f37cdc0c702bc8ad2 Mon Sep 17 00:00:00 2001 From: cui Date: Thu, 29 Jan 2026 21:26:38 +0800 Subject: [PATCH 4/7] gh-144322: typo This errors to These errors (#144323) --- Parser/tokenizer/helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Parser/tokenizer/helpers.c b/Parser/tokenizer/helpers.c index a03531a744136d..fda8216a3005b9 100644 --- a/Parser/tokenizer/helpers.c +++ b/Parser/tokenizer/helpers.c @@ -65,7 +65,7 @@ _syntaxerror_range(struct tok_state *tok, const char *format, int _PyTokenizer_syntaxerror(struct tok_state *tok, const char *format, ...) { - // This errors are cleaned on startup. Todo: Fix it. + // These errors are cleaned on startup. Todo: Fix it. va_list vargs; va_start(vargs, format); int ret = _syntaxerror_range(tok, format, -1, -1, vargs); From 92c0ec2b007757287a5c4791437a8d5a6173ce58 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 29 Jan 2026 17:33:10 +0200 Subject: [PATCH 5/7] gh-144264: Speed up Base64 decoding of data containing ignored characters (GH-144265) Try the fast path again after decoding a quad the slow path. Use a bitmap cache for the ignorechars argument. --- Lib/test/test_binascii.py | 11 +++++ ...-01-27-10-02-04.gh-issue-144264.Wmzbol.rst | 3 ++ Modules/binascii.c | 42 ++++++++++++++----- 3 files changed, 45 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-01-27-10-02-04.gh-issue-144264.Wmzbol.rst diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 4cfc332e89bea8..49accb08b62e40 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -202,6 +202,17 @@ def assertNonBase64Data(data, expected, ignorechars): assertNonBase64Data(b'a\nb==', b'i', ignorechars=bytearray(b'\n')) assertNonBase64Data(b'a\nb==', b'i', ignorechars=memoryview(b'\n')) + # Same cell in the cache: '\r' >> 3 == '\n' >> 3. + data = self.type2test(b'\r\n') + with self.assertRaises(binascii.Error): + binascii.a2b_base64(data, ignorechars=b'\r') + self.assertEqual(binascii.a2b_base64(data, ignorechars=b'\r\n'), b'') + # Same bit mask in the cache: '*' & 31 == '\n' & 31. + data = self.type2test(b'*\n') + with self.assertRaises(binascii.Error): + binascii.a2b_base64(data, ignorechars=b'*') + self.assertEqual(binascii.a2b_base64(data, ignorechars=b'*\n'), b'') + data = self.type2test(b'a\nb==') with self.assertRaises(TypeError): binascii.a2b_base64(data, ignorechars='') diff --git a/Misc/NEWS.d/next/Library/2026-01-27-10-02-04.gh-issue-144264.Wmzbol.rst b/Misc/NEWS.d/next/Library/2026-01-27-10-02-04.gh-issue-144264.Wmzbol.rst new file mode 100644 index 00000000000000..11e3fdeb4355cf --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-01-27-10-02-04.gh-issue-144264.Wmzbol.rst @@ -0,0 +1,3 @@ +Speed up Base64 decoding of data containing ignored characters (both in +non-strict mode and with an explicit *ignorechars* argument). +It is now up to 2 times faster for multiline Base64 data. diff --git a/Modules/binascii.c b/Modules/binascii.c index 593b27ac5ede65..201e7798bb7a8c 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -469,12 +469,23 @@ binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick) return PyBytesWriter_FinishWithPointer(writer, ascii_data); } +typedef unsigned char ignorecache_t[32]; static int -ignorechar(unsigned char c, Py_buffer *ignorechars) +ignorechar(unsigned char c, const Py_buffer *ignorechars, + ignorecache_t ignorecache) { - return (ignorechars->buf != NULL && - memchr(ignorechars->buf, c, ignorechars->len)); + if (ignorechars == NULL) { + return 0; + } + if (ignorecache[c >> 3] & (1 << (c & 7))) { + return 1; + } + if (memchr(ignorechars->buf, c, ignorechars->len)) { + ignorecache[c >> 3] |= 1 << (c & 7); + return 1; + } + return 0; } /*[clinic input] @@ -508,6 +519,13 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, if (strict_mode == -1) { strict_mode = (ignorechars->buf != NULL); } + if (!strict_mode || ignorechars->buf == NULL || ignorechars->len == 0) { + ignorechars = NULL; + } + ignorecache_t ignorecache; + if (ignorechars != NULL) { + memset(ignorecache, 0, sizeof(ignorecache)); + } /* Allocate the buffer */ Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */ @@ -517,8 +535,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, } unsigned char *bin_data = PyBytesWriter_GetData(writer); - size_t i = 0; /* Current position in input */ - +fastpath: /* Fast path: use optimized decoder for complete quads. * This works for both strict and non-strict mode for valid input. * The fast path stops at padding, invalid chars, or incomplete groups. @@ -527,7 +544,8 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, Py_ssize_t fast_chars = base64_decode_fast(ascii_data, (Py_ssize_t)ascii_len, bin_data, table_a2b_base64); if (fast_chars > 0) { - i = (size_t)fast_chars; + ascii_data += fast_chars; + ascii_len -= fast_chars; bin_data += (fast_chars / 4) * 3; } } @@ -536,8 +554,8 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, int quad_pos = 0; unsigned char leftchar = 0; int pads = 0; - for (; i < ascii_len; i++) { - unsigned char this_ch = ascii_data[i]; + for (; ascii_len; ascii_data++, ascii_len--) { + unsigned char this_ch = *ascii_data; /* Check for pad sequences and ignore ** the invalid ones. @@ -549,7 +567,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, if (quad_pos == 0) { state = get_binascii_state(module); if (state) { - PyErr_SetString(state->Error, (i == 0) + PyErr_SetString(state->Error, (ascii_data == data->buf) ? "Leading padding not allowed" : "Excess padding not allowed"); } @@ -580,7 +598,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, unsigned char v = table_a2b_base64[this_ch]; if (v >= 64) { - if (strict_mode && !ignorechar(this_ch, ignorechars)) { + if (strict_mode && !ignorechar(this_ch, ignorechars, ignorecache)) { state = get_binascii_state(module); if (state) { PyErr_SetString(state->Error, "Only base64 data is allowed"); @@ -621,7 +639,9 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, quad_pos = 0; *bin_data++ = (leftchar << 6) | (v); leftchar = 0; - break; + ascii_data++; + ascii_len--; + goto fastpath; } } From 26996b59ab21fb0a5a4a585f386bbb05935c9bd9 Mon Sep 17 00:00:00 2001 From: Hai Zhu Date: Fri, 30 Jan 2026 00:58:01 +0800 Subject: [PATCH 6/7] gh-143946: Add more debug info in `optimize_uops` (GH-144262) --- Python/optimizer_analysis.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 35d72e851af667..039aacf23ae3a3 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -52,8 +52,6 @@ #define DPRINTF(level, ...) \ if (get_lltrace() >= (level)) { printf(__VA_ARGS__); } - - static void dump_abstract_stack(_Py_UOpsAbstractFrame *frame, JitOptRef *stack_pointer) { @@ -83,8 +81,25 @@ dump_abstract_stack(_Py_UOpsAbstractFrame *frame, JitOptRef *stack_pointer) fflush(stdout); } +static void +dump_uop(JitOptContext *ctx, const char *label, int index, + const _PyUOpInstruction *instr, JitOptRef *stack_pointer) +{ + if (get_lltrace() >= 3) { + printf("%4d %s: ", index, label); + _PyUOpPrint(instr); + printf("\n"); + if (get_lltrace() >= 5 && ctx->frame->code != ((PyCodeObject *)&_Py_InitCleanup)) { + dump_abstract_stack(ctx->frame, stack_pointer); + } + } +} + +#define DUMP_UOP dump_uop + #else #define DPRINTF(level, ...) + #define DUMP_UOP(ctx, label, index, instr, stack_pointer) #endif static int @@ -347,19 +362,19 @@ get_code_with_logging(_PyUOpInstruction *op) uint64_t push_operand = op->operand0; if (push_operand & 1) { co = (PyCodeObject *)(push_operand & ~1); - DPRINTF(3, "code=%p ", co); + DPRINTF(3, " code=%p\n", co); assert(PyCode_Check(co)); } else { PyFunctionObject *func = (PyFunctionObject *)push_operand; - DPRINTF(3, "func=%p ", func); + DPRINTF(3, " func=%p ", func); if (func == NULL) { DPRINTF(3, "\n"); DPRINTF(1, "Missing function\n"); return NULL; } co = (PyCodeObject *)func->func_code; - DPRINTF(3, "code=%p ", co); + DPRINTF(3, "code=%p\n", co); } return co; } @@ -493,16 +508,7 @@ optimize_uops( stack_pointer = ctx->frame->stack_pointer; } -#ifdef Py_DEBUG - if (get_lltrace() >= 3) { - printf("%4d abs: ", (int)(this_instr - trace)); - _PyUOpPrint(this_instr); - printf(" \n"); - if (get_lltrace() >= 5 && !CURRENT_FRAME_IS_INIT_SHIM()) { - dump_abstract_stack(ctx->frame, stack_pointer); - } - } -#endif + DUMP_UOP(ctx, "abs", this_instr - trace, this_instr, stack_pointer); _PyUOpInstruction *out_ptr = ctx->out_buffer.next; @@ -519,6 +525,7 @@ optimize_uops( *(ctx->out_buffer.next++) = *this_instr; } assert(ctx->frame != NULL); + DUMP_UOP(ctx, "out", uop_buffer_length(&ctx->out_buffer) - 1, out_ptr, stack_pointer); if (!CURRENT_FRAME_IS_INIT_SHIM() && !ctx->done) { DPRINTF(3, " stack_level %d\n", STACK_LEVEL()); ctx->frame->stack_pointer = stack_pointer; From 14c5339a389a1161da4521d4c41236acb448baad Mon Sep 17 00:00:00 2001 From: Yongtao Huang Date: Fri, 30 Jan 2026 01:08:14 +0800 Subject: [PATCH 7/7] Test: fix stale uops usage in `test_capi/test_opt.py` (GH-144239) Signed-off-by: Yongtao Huang --- Lib/test/test_capi/test_opt.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 7808700f6a2346..a379d1be2f9bd3 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -3750,6 +3750,7 @@ def test_is_none(n): res, ex = self._run_with_optimizer(test_is_none, TIER2_THRESHOLD) self.assertEqual(res, True) self.assertIsNotNone(ex) + uops = get_opnames(ex) self.assertIn("_IS_OP", uops) self.assertIn("_POP_TOP_NOP", uops)