From 46093f8fbe6f457fa6df21d55f6c239f49484dba Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Thu, 6 Mar 2025 11:15:38 +0000 Subject: [PATCH 1/3] gh-130887: remove trailing jump in AArch64 JIT stencils In order to keep the alignment of the code body, the removed jump and the 0 padding at the end of AArch64 JIT stencils have been replaced with nop instructions. --- Tools/jit/_stencils.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index 4ddbe967438bd1..7febd04e4416d4 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -209,7 +209,15 @@ def pad(self, alignment: int) -> None: self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}") self.body.extend([0] * padding) - def remove_jump(self, *, alignment: int = 1) -> None: + def add_nop(self, alignment: int) -> None: + """Add a NOP if the offset is not aligned.""" + offset = len(self.body) + nop = b"\x1f\x20\x03\xD5" + if offset % alignment: + self.disassembly.append(f"{offset:x}: d503201f\t\t nop") + self.body.extend(nop) + + def remove_jump(self) -> None: """Remove a zero-length continuation jump, if it exists.""" hole = max(self.holes, key=lambda hole: hole.offset) match hole: @@ -244,8 +252,9 @@ def remove_jump(self, *, alignment: int = 1) -> None: jump = b"\x00\x00\x00\x14" case _: return - if self.body[offset:] == jump and offset % alignment == 0: + if self.body[offset:] == jump: self.body = self.body[:offset] + self.disassembly = self.disassembly[:-2] self.holes.remove(hole) @@ -289,8 +298,8 @@ def process_relocations( self._trampolines.add(ordinal) hole.addend = ordinal hole.symbol = None - self.code.remove_jump(alignment=alignment) - self.code.pad(alignment) + self.code.remove_jump() + self.code.add_nop(alignment=alignment) self.data.pad(8) for stencil in [self.code, self.data]: for hole in stencil.holes: From 460d0d3b1d218f1b8ca0f67ff300826994e422be Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Mon, 10 Mar 2025 12:09:01 +0000 Subject: [PATCH 2/3] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2025-03-10-12-08-57.gh-issue-130887.f823Ih.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-03-10-12-08-57.gh-issue-130887.f823Ih.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-03-10-12-08-57.gh-issue-130887.f823Ih.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-10-12-08-57.gh-issue-130887.f823Ih.rst new file mode 100644 index 00000000000000..daf9c8ea09f081 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-10-12-08-57.gh-issue-130887.f823Ih.rst @@ -0,0 +1 @@ +Optimize the AArch64 code generation for the JIT. Patch by Diego Russo From d77cf294107d4d26f671f68f64fcbbf4c9d801c4 Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Thu, 20 Mar 2025 16:40:43 +0000 Subject: [PATCH 3/3] Address feedback --- Tools/jit/_stencils.py | 29 +++++++++++++++++------------ Tools/jit/_targets.py | 13 ++++++++++++- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index 7febd04e4416d4..8faa9e8cac2d85 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -209,13 +209,22 @@ def pad(self, alignment: int) -> None: self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}") self.body.extend([0] * padding) - def add_nop(self, alignment: int) -> None: - """Add a NOP if the offset is not aligned.""" + def add_nops(self, nop: bytes, alignment: int) -> None: + """Add NOPs until there is alignment. Fail if it is not possible.""" offset = len(self.body) - nop = b"\x1f\x20\x03\xD5" - if offset % alignment: - self.disassembly.append(f"{offset:x}: d503201f\t\t nop") - self.body.extend(nop) + nop_size = len(nop) + + # Calculate the gap to the next multiple of alignment. + gap = -offset % alignment + if gap: + if gap % nop_size == 0: + count = gap // nop_size + self.body.extend(nop * count) + else: + raise ValueError( + f"Cannot add nops of size '{nop_size}' to a body with " + f"offset '{offset}' to align with '{alignment}'" + ) def remove_jump(self) -> None: """Remove a zero-length continuation jump, if it exists.""" @@ -254,7 +263,6 @@ def remove_jump(self) -> None: return if self.body[offset:] == jump: self.body = self.body[:offset] - self.disassembly = self.disassembly[:-2] self.holes.remove(hole) @@ -275,10 +283,7 @@ class StencilGroup: _trampolines: set[int] = dataclasses.field(default_factory=set, init=False) def process_relocations( - self, - known_symbols: dict[str, int], - *, - alignment: int = 1, + self, known_symbols: dict[str, int], *, alignment: int = 1, nop: bytes = b"" ) -> None: """Fix up all GOT and internal relocations for this stencil group.""" for hole in self.code.holes.copy(): @@ -299,7 +304,7 @@ def process_relocations( hole.addend = ordinal hole.symbol = None self.code.remove_jump() - self.code.add_nop(alignment=alignment) + self.code.add_nops(nop=nop, alignment=alignment) self.data.pad(8) for stencil in [self.code, self.data]: for hole in stencil.holes: diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index aa2b56abf446b1..b5a839e07d4daf 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -44,6 +44,15 @@ class _Target(typing.Generic[_S, _R]): verbose: bool = False known_symbols: dict[str, int] = dataclasses.field(default_factory=dict) + def _get_nop(self) -> bytes: + if re.fullmatch(r"aarch64-.*", self.triple): + nop = b"\x1f\x20\x03\xD5" + elif re.fullmatch(r"x86_64-.*|i686.*", self.triple): + nop = b"\x90" + else: + raise ValueError(f"NOP not defined for {self.triple}") + return nop + def _compute_digest(self, out: pathlib.Path) -> str: hasher = hashlib.sha256() hasher.update(self.triple.encode()) @@ -172,7 +181,9 @@ async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: stencil_groups = {task.get_name(): task.result() for task in tasks} for stencil_group in stencil_groups.values(): stencil_group.process_relocations( - known_symbols=self.known_symbols, alignment=self.alignment + known_symbols=self.known_symbols, + alignment=self.alignment, + nop=self._get_nop(), ) return stencil_groups