From 5aea6ec85bf047365eb6c233cf174d0606ea1b31 Mon Sep 17 00:00:00 2001 From: vaggelisd Date: Wed, 29 Apr 2026 10:37:43 +0300 Subject: [PATCH 1/5] [mypyc] Make separate=True work for mypy self-compile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Six fixes on top of #21299 — all required to compile mypy itself or to install a separate=True wheel via pip. - mypyc/build.py: pip invokes setup.py twice when building a wheel. On the second invocation mypy's incremental cache is fully warm, so we generate no new C source for any group; the resulting extensions ship without their entry points and import as stubs. Fix: when a group emits no C source, reuse the .c file from the previous pass. - mypyc/codegen/{emit,emitfunc}.py: when code in one compiled group reads an attribute on an object whose class lives in another group, the generated cast depends on that other group's struct definitions. We weren't recording the dependency, so the C compiler couldn't see the layout and the build failed. Fix: register the dependency at the cast site. - mypyc/codegen/emitmodule.py + mypyc/build.py: when mypy compiles itself, a generated shim file can share a basename with a runtime C file. The C compiler resolves the runtime include relative to the shim's directory and picks up the shim instead. Fix: emit those includes with the <> form so the search uses -I paths only. The `get_header_deps` regex was tightened to match both quote styles (otherwise headers in <> form drop out of Extension.depends and incremental rebuilds miss layout changes). - mypyc/lib-rt/misc_ops.c: each compiled module gets its own shared library next to it in the package tree. The runtime was computing the module's file path as if a single shared library sat above the whole package, which doubled the package prefix and broke submodule lookups. Fix: detect the per-module case and use only the module's leaf name. - mypyc/irbuild/prepare.py: traits and builtin-derived classes don't get a real C constructor emitted. A clean build sidesteps that, but a fully cached rebuild was taking the direct-call path and producing C that referenced a constructor that doesn't exist. Fix: skip the registration the same way a clean build does. - mypyc/build.py: on every build_ext, setuptools rewrites every compiled .so in the source tree even when nothing changed. On macOS this invalidates the OS signature cache, so every import on the next run pays a re-verification cost. Fix: skip the copy when source and destination already match — takes a 1-line edit rebuild from ~72s to ~6s. setup.py also gets a MYPYC_SEPARATE env knob so CI can exercise the codegen path against mypy itself. --- mypyc/build.py | 67 +++++++++++++++++++++++++++++++++++-- mypyc/codegen/emit.py | 12 +++++++ mypyc/codegen/emitfunc.py | 5 +++ mypyc/codegen/emitmodule.py | 11 +++--- mypyc/irbuild/prepare.py | 7 +++- mypyc/lib-rt/misc_ops.c | 52 ++++++++++++++++++++++------ setup.py | 2 ++ 7 files changed, 139 insertions(+), 17 deletions(-) diff --git a/mypyc/build.py b/mypyc/build.py index 439734e39b9ec..28b7717379b80 100644 --- a/mypyc/build.py +++ b/mypyc/build.py @@ -449,6 +449,52 @@ def write_file(path: str, contents: str) -> None: os.utime(path, times=(new_mtime, new_mtime)) +_setuptools_patch_applied = False + + +def _patch_setuptools_copy_extensions_to_source() -> None: + """Skip redundant `.so` copies in --inplace builds. + + setuptools' copy_extensions_to_source rewrites every `.so` in the + source tree on every build_ext, even when nothing changed. On macOS + this invalidates AMFI's signature cache (~100 ms re-verification per + `.so` on the next import), eating most of the separate=True + incremental speedup. We patch it to skip the copy when src and dst + already match. Idempotent; applied from mypycify(). + """ + global _setuptools_patch_applied + if _setuptools_patch_applied: + return + _setuptools_patch_applied = True + + from setuptools.command.build_ext import build_ext as _build_ext + + def _files_match(a: str, b: str) -> bool: + try: + sa = os.stat(a) + sb = os.stat(b) + except OSError: + return False + # Compare size + whole-second mtime. distutils' copy_file + # propagates the source mtime, but macOS drops sub-second + # precision on write so the float values never match verbatim. + return sa.st_size == sb.st_size and int(sa.st_mtime) == int(sb.st_mtime) + + def patched(self: Any) -> None: + build_py = self.get_finalized_command("build_py") + for ext in self.extensions: + inplace_file, regular_file = self._get_inplace_equivalent(build_py, ext) + if _files_match(regular_file, inplace_file): + continue + if os.path.exists(regular_file) or not ext.optional: + self.copy_file(regular_file, inplace_file, level=self.verbose) + if ext._needs_stub: + inplace_stub = self._get_equivalent_stub(ext, inplace_file) + self._write_stub_file(inplace_stub, ext, compile=True) + + _build_ext.copy_extensions_to_source = patched # type: ignore[method-assign] + + def construct_groups( sources: list[BuildSource], separate: bool | list[tuple[list[str], str | None]], @@ -508,7 +554,7 @@ def get_header_deps(cfiles: list[tuple[str, str]]) -> list[str]: """ headers: set[str] = set() for _, contents in cfiles: - headers.update(re.findall(r'#include "(.*)"', contents)) + headers.update(re.findall(r'#include [<"]([^>"]+)[>"]', contents)) return sorted(headers) @@ -564,7 +610,7 @@ def mypyc_build( # Write out the generated C and collect the files for each group # Should this be here?? group_cfilenames: list[tuple[list[str], list[str]]] = [] - for cfiles in group_cfiles: + for (group_sources, group_name), cfiles in zip(groups, group_cfiles): cfilenames = [] for cfile, ctext in cfiles: cfile = os.path.join(compiler_options.target_dir, cfile) @@ -573,6 +619,20 @@ def mypyc_build( if os.path.splitext(cfile)[1] == ".c": cfilenames.append(cfile) + # Fully-cached SCC (e.g. pip's second setup.py invoke for the + # wheel phase): mypyc returns empty ctext but the previous run's + # .c file is still on disk. Reuse it so we don't link with + # sources=[]. + if not cfilenames and group_name is not None: + from mypyc.codegen.emitmodule import group_dir as _group_dir + + short_suffix = "_" + exported_name(group_name.split(".")[-1]) + existing = os.path.join( + compiler_options.target_dir, _group_dir(group_name), f"__native{short_suffix}.c" + ) + if os.path.exists(existing): + cfilenames.append(existing) + deps = [os.path.join(compiler_options.target_dir, dep) for dep in get_header_deps(cfiles)] group_cfilenames.append((cfilenames, deps)) @@ -747,6 +807,9 @@ def mypycify( have no backward compatibility guarantees! """ + # Skip redundant inplace .so copies on every build_ext invocation. + _patch_setuptools_copy_extensions_to_source() + # Figure out our configuration compiler_options = CompilerOptions( strip_asserts=strip_asserts, diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 54e77836a76ca..957559423869a 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -314,6 +314,18 @@ def get_group_prefix(self, obj: ClassIR | FuncDecl) -> str: # See docs above return self.get_module_group_prefix(obj.module_name) + def register_group_dep(self, cl: ClassIR) -> None: + """Record `cl`'s defining group as a cross-group dep, if any. + + Call this when emitting code that refers to `cl`'s struct + layout: the .c file consuming that layout needs the defining + group's `__native_*.h` included, and group_deps drives which + headers get pulled in. + """ + target_group = self.context.group_map.get(cl.module_name) + if target_group and target_group != self.context.group_name: + self.context.group_deps.add(target_group) + def static_name(self, id: str, module: str | None, prefix: str = STATIC_PREFIX) -> str: """Create name of a C static variable. diff --git a/mypyc/codegen/emitfunc.py b/mypyc/codegen/emitfunc.py index 737b002b4201b..2f00fe165c664 100644 --- a/mypyc/codegen/emitfunc.py +++ b/mypyc/codegen/emitfunc.py @@ -348,6 +348,11 @@ def get_attr_expr(self, obj: str, op: GetAttr | SetAttr, decl_cl: ClassIR) -> st classes, and *(obj + attr_offset) for attributes defined by traits. We also insert all necessary C casts here. """ + # The struct cast below needs the defining group's __native.h + # included by the consuming .c file. Record both the receiver + # and declaring classes as cross-group deps. + self.emitter.register_group_dep(op.class_type.class_ir) + self.emitter.register_group_dep(decl_cl) cast = f"({op.class_type.struct_name(self.emitter.names)} *)" if decl_cl.is_trait and op.class_type.class_ir.is_trait: # For pure trait access find the offset first, offsets diff --git a/mypyc/codegen/emitmodule.py b/mypyc/codegen/emitmodule.py index 043a8929cbd92..7dd08897b11bd 100644 --- a/mypyc/codegen/emitmodule.py +++ b/mypyc/codegen/emitmodule.py @@ -614,16 +614,19 @@ def generate_c_for_modules(self) -> list[tuple[str, str]]: base_emitter = Emitter(self.context) # Optionally just include the runtime library c files to - # reduce the number of compiler invocations needed + # reduce the number of compiler invocations needed. + # Use <> form (only -I paths) so a shim file with the same + # basename as a runtime file can't shadow it. Triggered by + # mypyc/lower/int_ops.py vs lib-rt/int_ops.c on mypy self-compile. if self.compiler_options.include_runtime_files: for name in RUNTIME_C_FILES: - base_emitter.emit_line(f'#include "{name}"') + base_emitter.emit_line(f"#include <{name}>") # Include conditional source files source_deps = collect_source_dependencies(self.modules) for source_dep in sorted(source_deps, key=lambda d: d.path): - base_emitter.emit_line(f'#include "{source_dep.path}"') + base_emitter.emit_line(f"#include <{source_dep.path}>") if self.compiler_options.depends_on_librt_internal: - base_emitter.emit_line('#include "internal/librt_internal_api.c"') + base_emitter.emit_line("#include ") base_emitter.emit_line(f'#include "__native{self.short_group_suffix}.h"') base_emitter.emit_line(f'#include "__native_internal{self.short_group_suffix}.h"') emitter = base_emitter diff --git a/mypyc/irbuild/prepare.py b/mypyc/irbuild/prepare.py index 09bfc8339b404..861c67f105257 100644 --- a/mypyc/irbuild/prepare.py +++ b/mypyc/irbuild/prepare.py @@ -182,7 +182,12 @@ def load_type_map(mapper: Mapper, modules: list[MypyFile], deser_ctx: DeserMaps) continue mapper.type_to_ir[node.node] = ir mapper.symbol_fullnames.add(node.node.fullname) - mapper.func_to_decl[node.node] = ir.ctor + # Trait/builtin-base classes have an ir.ctor FuncDecl + # but no emitted CPyDef_, so a cross-group direct + # call would hit an undefined symbol. Mirror the same + # skip in prepare_ext_class_def. + if not ir.is_trait and not ir.builtin_base: + mapper.func_to_decl[node.node] = ir.ctor for module in modules: for func in get_module_func_defs(module): diff --git a/mypyc/lib-rt/misc_ops.c b/mypyc/lib-rt/misc_ops.c index 2aaadb2ac47d2..235a5c4196459 100644 --- a/mypyc/lib-rt/misc_ops.c +++ b/mypyc/lib-rt/misc_ops.c @@ -1281,12 +1281,17 @@ static int CPyImport_SetModuleFile(PyObject *modobj, PyObject *module_name, Py_DECREF(file); return 0; } - // Derive __file__ from the shared library's __file__ (for its - // directory), the module name (dots -> path separators), and the - // extension suffix. E.g. for module "a.b.c", shared lib - // "/path/to/group__mypyc.cpython-312-x86_64-linux-gnu.so", - // suffix ".cpython-312-x86_64-linux-gnu.so": - // => "/path/to/a/b/c.cpython-312-x86_64-linux-gnu.so" + // Derive __file__ from the shared lib's directory, the module + // name, and the extension suffix. Two layouts: + // + // Monolithic: one shared lib above the package tree holds many + // modules, so append the full dotted module path. + // separate=True: each module has its own "__mypyc.so" + // next to the module, so dirname(shared_lib) is already inside + // the parent package. Append only the last segment. + // + // Detect the separate=True case by matching the shared lib's + // basename against "__mypyc". PyObject *derived_file = NULL; if (shared_lib_file != NULL && shared_lib_file != Py_None && PyUnicode_Check(shared_lib_file)) { @@ -1314,30 +1319,57 @@ static int CPyImport_SetModuleFile(PyObject *modobj, PyObject *module_name, if (module_path == NULL) { return -1; } + + // Compute the module's last dotted segment for the separate=True check. + Py_ssize_t name_len = PyUnicode_GetLength(module_name); + Py_ssize_t last_dot = PyUnicode_FindChar(module_name, '.', 0, name_len, -1); + PyObject *last_segment = last_dot >= 0 + ? PyUnicode_Substring(module_name, last_dot + 1, name_len) + : (Py_INCREF(module_name), module_name); + if (last_segment == NULL) { + Py_DECREF(module_path); + return -1; + } + // Compare shared_lib_file basename against "__mypyc". + PyObject *expected_basename = PyUnicode_FromFormat( + "%U__mypyc%U", last_segment, ext_suffix); + PyObject *actual_basename = sep >= 0 + ? PyUnicode_Substring(shared_lib_file, sep + 1, sf_len) + : (Py_INCREF(shared_lib_file), shared_lib_file); + int is_per_module_lib = 0; + if (expected_basename != NULL && actual_basename != NULL) { + is_per_module_lib = + (PyUnicode_Compare(expected_basename, actual_basename) == 0); + } + Py_XDECREF(expected_basename); + Py_XDECREF(actual_basename); + // For packages, __file__ should point to __init__, // e.g. "a/b/__init__.cpython-312-x86_64-linux-gnu.so". + PyObject *file_path = is_per_module_lib ? last_segment : module_path; if (sep >= 0) { PyObject *dir = PyUnicode_Substring(shared_lib_file, 0, sep); if (dir != NULL) { if (is_package) { derived_file = PyUnicode_FromFormat( "%U%c%U%c__init__%U", dir, (int)sep_char, - module_path, (int)sep_char, ext_suffix); + file_path, (int)sep_char, ext_suffix); } else { derived_file = PyUnicode_FromFormat( "%U%c%U%U", dir, (int)sep_char, - module_path, ext_suffix); + file_path, ext_suffix); } Py_DECREF(dir); } } else { if (is_package) { derived_file = PyUnicode_FromFormat( - "%U%c__init__%U", module_path, (int)SEP[0], ext_suffix); + "%U%c__init__%U", file_path, (int)SEP[0], ext_suffix); } else { - derived_file = PyUnicode_FromFormat("%U%U", module_path, ext_suffix); + derived_file = PyUnicode_FromFormat("%U%U", file_path, ext_suffix); } } + Py_DECREF(last_segment); Py_DECREF(module_path); } if (derived_file == NULL && !PyErr_Occurred()) { diff --git a/setup.py b/setup.py index d36a6bfa2c2dc..1879f6892ba8f 100644 --- a/setup.py +++ b/setup.py @@ -153,6 +153,7 @@ def run(self) -> None: debug_level = os.getenv("MYPYC_DEBUG_LEVEL", "1") force_multifile = os.getenv("MYPYC_MULTI_FILE", "") == "1" log_trace = bool(int(os.getenv("MYPYC_LOG_TRACE", "0"))) + separate = os.getenv("MYPYC_SEPARATE", "") == "1" ext_modules = mypycify( mypyc_targets + ["--config-file=mypy_bootstrap.ini"], opt_level=opt_level, @@ -161,6 +162,7 @@ def run(self) -> None: # our Appveyor builds run out of memory sometimes. multi_file=sys.platform == "win32" or force_multifile, log_trace=log_trace, + separate=separate, # Mypy itself is allowed to use native_internal extension. depends_on_librt_internal=True, ) From 9a6b559885215f98fe82c2eccdcd5170cc214ab5 Mon Sep 17 00:00:00 2001 From: vaggelisd Date: Mon, 4 May 2026 14:21:41 +0300 Subject: [PATCH 2/5] [mypyc] Address review feedback on separate=True self-compile fixes - mypyc/codegen/emitmodule.py + mypyc/build.py: drop the path reconstruction in mypyc_build; pull the file list straight from the IR cache's src_hashes when a group is fully cached. Covers multi_file mode and group_name=None for free. - mypyc/build.py: drop deps from get_header_deps that don't exist under target_dir. The widened regex picks up system headers like ; feeding non-existent paths into Extension.depends forces a full rebuild on every run. - mypyc/lib-rt/misc_ops.c: split the two ternaries with INCREF side effects in CPyImport_SetModuleFile into if/else. - mypyc/irbuild/prepare.py: fix stale comment reference (prepare_ext_class_def -> prepare_init_method). --- mypyc/build.py | 29 ++++++++++++----------------- mypyc/codegen/emitmodule.py | 33 ++++++++++++++++++++++++++++++++- mypyc/irbuild/prepare.py | 4 ++-- mypyc/lib-rt/misc_ops.c | 20 ++++++++++++++------ 4 files changed, 60 insertions(+), 26 deletions(-) diff --git a/mypyc/build.py b/mypyc/build.py index 28b7717379b80..b8382a02fa2dd 100644 --- a/mypyc/build.py +++ b/mypyc/build.py @@ -610,30 +610,25 @@ def mypyc_build( # Write out the generated C and collect the files for each group # Should this be here?? group_cfilenames: list[tuple[list[str], list[str]]] = [] - for (group_sources, group_name), cfiles in zip(groups, group_cfiles): + for cfiles in group_cfiles: cfilenames = [] for cfile, ctext in cfiles: cfile = os.path.join(compiler_options.target_dir, cfile) - if not options.mypyc_skip_c_generation: + # Empty contents marks a file the previous run already wrote + # (fully-cached group): skip the rewrite and just reuse it. + if ctext and not options.mypyc_skip_c_generation: write_file(cfile, ctext) if os.path.splitext(cfile)[1] == ".c": cfilenames.append(cfile) - # Fully-cached SCC (e.g. pip's second setup.py invoke for the - # wheel phase): mypyc returns empty ctext but the previous run's - # .c file is still on disk. Reuse it so we don't link with - # sources=[]. - if not cfilenames and group_name is not None: - from mypyc.codegen.emitmodule import group_dir as _group_dir - - short_suffix = "_" + exported_name(group_name.split(".")[-1]) - existing = os.path.join( - compiler_options.target_dir, _group_dir(group_name), f"__native{short_suffix}.c" - ) - if os.path.exists(existing): - cfilenames.append(existing) - - deps = [os.path.join(compiler_options.target_dir, dep) for dep in get_header_deps(cfiles)] + # The header regex matches both quote styles, so the result can + # include system headers like `` that don't live under + # target_dir. Joining those produces non-existent paths which + # would force a full rebuild on every run via Extension.depends. + candidate_deps = ( + os.path.join(compiler_options.target_dir, dep) for dep in get_header_deps(cfiles) + ) + deps = [d for d in candidate_deps if os.path.exists(d)] group_cfilenames.append((cfilenames, deps)) return groups, group_cfilenames, source_deps diff --git a/mypyc/codegen/emitmodule.py b/mypyc/codegen/emitmodule.py index 7dd08897b11bd..d435f3d273322 100644 --- a/mypyc/codegen/emitmodule.py +++ b/mypyc/codegen/emitmodule.py @@ -362,7 +362,12 @@ def compile_ir_to_c( if source.module in modules } if not group_modules: - ctext[group_name] = [] + # Fully-cached group (e.g. pip's second setup.py invoke for + # the wheel phase): no fresh IR was produced. Reuse the file + # list recorded in any module's IR cache so the linker still + # sees the previous run's outputs; empty content is a "do + # not rewrite" sentinel for mypyc_build. + ctext[group_name] = _load_cached_group_files(group_sources, result) continue generator = GroupGenerator( group_modules, source_paths, group_name, mapper.group_map, names, compiler_options @@ -372,6 +377,32 @@ def compile_ir_to_c( return ctext +def _load_cached_group_files( + group_sources: list[BuildSource], result: BuildResult +) -> list[tuple[str, str]]: + """Read the .c/.h paths recorded for this group on the previous run. + + All modules in a group share the same src_hashes map, so the first + readable IR cache is sufficient. Returns paths paired with empty + content so callers can distinguish "reuse on disk" from "newly + generated". + """ + for source in group_sources: + state = result.graph.get(source.module) + if state is None: + continue + try: + ir_json = result.manager.metastore.read(get_state_ir_cache_name(state)) + except (FileNotFoundError, OSError): + continue + try: + ir_data = json.loads(ir_json) + except json.JSONDecodeError: + continue + return [(path, "") for path in ir_data.get("src_hashes", {})] + return [] + + def get_ir_cache_name(id: str, path: str, options: Options) -> str: meta_path, _, _ = get_cache_names(id, path, options) # Mypyc uses JSON cache even with --fixed-format-cache (for now). diff --git a/mypyc/irbuild/prepare.py b/mypyc/irbuild/prepare.py index 861c67f105257..f143ce1b44025 100644 --- a/mypyc/irbuild/prepare.py +++ b/mypyc/irbuild/prepare.py @@ -184,8 +184,8 @@ def load_type_map(mapper: Mapper, modules: list[MypyFile], deser_ctx: DeserMaps) mapper.symbol_fullnames.add(node.node.fullname) # Trait/builtin-base classes have an ir.ctor FuncDecl # but no emitted CPyDef_, so a cross-group direct - # call would hit an undefined symbol. Mirror the same - # skip in prepare_ext_class_def. + # call would hit an undefined symbol. Mirror the skip + # in prepare_init_method. if not ir.is_trait and not ir.builtin_base: mapper.func_to_decl[node.node] = ir.ctor diff --git a/mypyc/lib-rt/misc_ops.c b/mypyc/lib-rt/misc_ops.c index 235a5c4196459..392dba0deca4c 100644 --- a/mypyc/lib-rt/misc_ops.c +++ b/mypyc/lib-rt/misc_ops.c @@ -1323,9 +1323,13 @@ static int CPyImport_SetModuleFile(PyObject *modobj, PyObject *module_name, // Compute the module's last dotted segment for the separate=True check. Py_ssize_t name_len = PyUnicode_GetLength(module_name); Py_ssize_t last_dot = PyUnicode_FindChar(module_name, '.', 0, name_len, -1); - PyObject *last_segment = last_dot >= 0 - ? PyUnicode_Substring(module_name, last_dot + 1, name_len) - : (Py_INCREF(module_name), module_name); + PyObject *last_segment; + if (last_dot >= 0) { + last_segment = PyUnicode_Substring(module_name, last_dot + 1, name_len); + } else { + last_segment = module_name; + Py_INCREF(last_segment); + } if (last_segment == NULL) { Py_DECREF(module_path); return -1; @@ -1333,9 +1337,13 @@ static int CPyImport_SetModuleFile(PyObject *modobj, PyObject *module_name, // Compare shared_lib_file basename against "__mypyc". PyObject *expected_basename = PyUnicode_FromFormat( "%U__mypyc%U", last_segment, ext_suffix); - PyObject *actual_basename = sep >= 0 - ? PyUnicode_Substring(shared_lib_file, sep + 1, sf_len) - : (Py_INCREF(shared_lib_file), shared_lib_file); + PyObject *actual_basename; + if (sep >= 0) { + actual_basename = PyUnicode_Substring(shared_lib_file, sep + 1, sf_len); + } else { + actual_basename = shared_lib_file; + Py_INCREF(actual_basename); + } int is_per_module_lib = 0; if (expected_basename != NULL && actual_basename != NULL) { is_per_module_lib = From f9df01388e9d6ae5d9d2c4b525ac917688484142 Mon Sep 17 00:00:00 2001 From: vaggelisd Date: Wed, 6 May 2026 10:17:52 +0300 Subject: [PATCH 3/5] [mypyc] Scope build_ext patch to mypyc-generated extensions The copy_extensions_to_source patch runs during setup()'s build_ext command, after mypycify() has returned, so it can't be unwound with a context manager. Instead, tag every extension mypycify produces and gate the skip on that marker so: - Extensions from other setuptools consumers in the same setup.py get the unmodified upstream behavior (including stub writes). - For our extensions, skipping the stub write alongside the copy is safe because mypyc never sets _needs_stub. --- mypyc/build.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/mypyc/build.py b/mypyc/build.py index b8382a02fa2dd..106801fc5d60f 100644 --- a/mypyc/build.py +++ b/mypyc/build.py @@ -449,18 +449,25 @@ def write_file(path: str, contents: str) -> None: os.utime(path, times=(new_mtime, new_mtime)) +_MYPYC_EXTENSION_MARKER = "_mypyc_skip_redundant_inplace_copy" _setuptools_patch_applied = False def _patch_setuptools_copy_extensions_to_source() -> None: - """Skip redundant `.so` copies in --inplace builds. + """Skip redundant `.so` copies for extensions we generated. setuptools' copy_extensions_to_source rewrites every `.so` in the source tree on every build_ext, even when nothing changed. On macOS this invalidates AMFI's signature cache (~100 ms re-verification per `.so` on the next import), eating most of the separate=True - incremental speedup. We patch it to skip the copy when src and dst - already match. Idempotent; applied from mypycify(). + incremental speedup. + + The patch is global because copy_extensions_to_source runs during + setup()'s build_ext command, after mypycify() has already returned; + we can't scope a context manager around it. Instead the skip only + fires for extensions tagged by mypycify (via the marker attribute), + so other setuptools users in the same setup.py see the unmodified + upstream behavior, including stub writes. Idempotent. """ global _setuptools_patch_applied if _setuptools_patch_applied: @@ -484,7 +491,14 @@ def patched(self: Any) -> None: build_py = self.get_finalized_command("build_py") for ext in self.extensions: inplace_file, regular_file = self._get_inplace_equivalent(build_py, ext) - if _files_match(regular_file, inplace_file): + # Only short-circuit for extensions mypycify produced. + # Skipping the copy also skips the stub write below, which + # is safe here because mypyc-generated extensions never set + # _needs_stub. For any other extension, fall through to the + # original setuptools behavior. + if getattr(ext, _MYPYC_EXTENSION_MARKER, False) and _files_match( + regular_file, inplace_file + ): continue if os.path.exists(regular_file) or not ext.optional: self.copy_file(regular_file, inplace_file, level=self.verbose) @@ -919,4 +933,9 @@ def mypycify( ) ) + # Tag every extension we own so the build_ext patch knows it's + # safe to skip the redundant inplace copy for these specifically. + for ext in extensions: + setattr(ext, _MYPYC_EXTENSION_MARKER, True) + return extensions From e86f2d3f6b00b1ec59d09631e738237f25bceb72 Mon Sep 17 00:00:00 2001 From: vaggelisd Date: Wed, 6 May 2026 13:44:03 +0300 Subject: [PATCH 4/5] [mypyc] Delegate non-skipped work to original copy_extensions_to_source Capture the upstream method at patch time and call it for everything we don't want to skip. The patched body now only filters out the mypyc extensions whose `.so` already matches the inplace destination and hands the rest to setuptools verbatim, so future changes in the upstream method body propagate without us having to mirror them. --- mypyc/build.py | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/mypyc/build.py b/mypyc/build.py index 106801fc5d60f..8f0fe4a517a5d 100644 --- a/mypyc/build.py +++ b/mypyc/build.py @@ -467,7 +467,7 @@ def _patch_setuptools_copy_extensions_to_source() -> None: we can't scope a context manager around it. Instead the skip only fires for extensions tagged by mypycify (via the marker attribute), so other setuptools users in the same setup.py see the unmodified - upstream behavior, including stub writes. Idempotent. + upstream behavior, including stub writes. """ global _setuptools_patch_applied if _setuptools_patch_applied: @@ -476,6 +476,8 @@ def _patch_setuptools_copy_extensions_to_source() -> None: from setuptools.command.build_ext import build_ext as _build_ext + original = _build_ext.copy_extensions_to_source + def _files_match(a: str, b: str) -> bool: try: sa = os.stat(a) @@ -488,23 +490,28 @@ def _files_match(a: str, b: str) -> bool: return sa.st_size == sb.st_size and int(sa.st_mtime) == int(sb.st_mtime) def patched(self: Any) -> None: + # Find mypyc-generated extensions whose .so already matches the + # inplace destination -- those are the ones to skip. Anything + # else (non-mypyc, or mypyc but stale) goes through the + # unmodified original method, so we don't have to keep its body + # in sync as setuptools evolves. build_py = self.get_finalized_command("build_py") + to_skip = [] for ext in self.extensions: - inplace_file, regular_file = self._get_inplace_equivalent(build_py, ext) - # Only short-circuit for extensions mypycify produced. - # Skipping the copy also skips the stub write below, which - # is safe here because mypyc-generated extensions never set - # _needs_stub. For any other extension, fall through to the - # original setuptools behavior. - if getattr(ext, _MYPYC_EXTENSION_MARKER, False) and _files_match( - regular_file, inplace_file - ): + if not getattr(ext, _MYPYC_EXTENSION_MARKER, False): continue - if os.path.exists(regular_file) or not ext.optional: - self.copy_file(regular_file, inplace_file, level=self.verbose) - if ext._needs_stub: - inplace_stub = self._get_equivalent_stub(ext, inplace_file) - self._write_stub_file(inplace_stub, ext, compile=True) + inplace_file, regular_file = self._get_inplace_equivalent(build_py, ext) + if _files_match(regular_file, inplace_file): + to_skip.append(ext) + if not to_skip: + original(self) + return + saved = self.extensions + self.extensions = [e for e in saved if e not in to_skip] + try: + original(self) + finally: + self.extensions = saved _build_ext.copy_extensions_to_source = patched # type: ignore[method-assign] From a2c896db0582f1187273f25868c9aa726c5043f9 Mon Sep 17 00:00:00 2001 From: vaggelisd Date: Wed, 6 May 2026 14:05:51 +0300 Subject: [PATCH 5/5] [mypyc] Simplify the copy_extensions_to_source patch Pull the per-extension predicate into is_redundant(), drop the explicit to_skip list and the early-return -- the list comprehension handles the no-skip case on its own. Rewrite the inline comment in plain English. --- mypyc/build.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/mypyc/build.py b/mypyc/build.py index 8f0fe4a517a5d..1e1240f3c68d7 100644 --- a/mypyc/build.py +++ b/mypyc/build.py @@ -490,24 +490,21 @@ def _files_match(a: str, b: str) -> bool: return sa.st_size == sb.st_size and int(sa.st_mtime) == int(sb.st_mtime) def patched(self: Any) -> None: - # Find mypyc-generated extensions whose .so already matches the - # inplace destination -- those are the ones to skip. Anything - # else (non-mypyc, or mypyc but stale) goes through the - # unmodified original method, so we don't have to keep its body - # in sync as setuptools evolves. build_py = self.get_finalized_command("build_py") - to_skip = [] - for ext in self.extensions: + + def is_redundant(ext: Any) -> bool: if not getattr(ext, _MYPYC_EXTENSION_MARKER, False): - continue + return False inplace_file, regular_file = self._get_inplace_equivalent(build_py, ext) - if _files_match(regular_file, inplace_file): - to_skip.append(ext) - if not to_skip: - original(self) - return + return _files_match(regular_file, inplace_file) + + # Hide our already-fresh extensions from setuptools' loop and + # let it handle whatever's left. Delegating instead of + # reimplementing the body means future setuptools changes carry + # over for free. self.extensions is restored before we return + # so anything that inspects it later sees the original list. saved = self.extensions - self.extensions = [e for e in saved if e not in to_skip] + self.extensions = [ext for ext in saved if not is_redundant(ext)] try: original(self) finally: