From 72427a982657f7443f4c29a68148e50d8a70e8c7 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Fri, 9 Jan 2026 10:59:56 +0000 Subject: [PATCH 1/6] [mypyc] Add inline primitives for bytes.__getitem__ --- mypyc/irbuild/specialize.py | 59 +++++++++++++++++++++++++++++- mypyc/lib-rt/bytes_extra_ops.h | 21 +++++++++++ mypyc/primitives/bytes_ops.py | 38 +++++++++++++++++++ mypyc/test-data/irbuild-bytes.test | 22 +++++++++++ 4 files changed, 139 insertions(+), 1 deletion(-) diff --git a/mypyc/irbuild/specialize.py b/mypyc/irbuild/specialize.py index c0ed5f9f69da..e70d9bc192e9 100644 --- a/mypyc/irbuild/specialize.py +++ b/mypyc/irbuild/specialize.py @@ -94,7 +94,13 @@ join_formatted_strings, tokenizer_format_call, ) -from mypyc.primitives.bytes_ops import isinstance_bytearray, isinstance_bytes +from mypyc.primitives.bytes_ops import ( + bytes_adjust_index_op, + bytes_get_item_unsafe_op, + bytes_range_check_op, + isinstance_bytearray, + isinstance_bytes, +) from mypyc.primitives.dict_ops import ( dict_items_op, dict_keys_op, @@ -1299,3 +1305,54 @@ def translate_bytes_writer_set_item( ) return builder.none() + + +@specialize_dunder("__getitem__", bytes_rprimitive) +def translate_bytes_get_item( + builder: IRBuilder, base_expr: Expression, args: list[Expression], ctx_expr: Expression +) -> Value | None: + """Optimized bytes.__getitem__ implementation with bounds checking.""" + # Check that we have exactly one argument + if len(args) != 1: + return None + + # Get the bytes object + obj = builder.accept(base_expr) + + # Get the index argument + index = builder.accept(args[0]) + + # Only use the optimized version for i64 index (requires experimental mode) + if not is_int64_rprimitive(index.type): + return None + + # Adjust the index (handle negative indices) + adjusted_index = builder.primitive_op( + bytes_adjust_index_op, [obj, index], ctx_expr.line + ) + + # Check if the adjusted index is in valid range + range_check = builder.primitive_op( + bytes_range_check_op, [obj, adjusted_index], ctx_expr.line + ) + + # Create blocks for branching + valid_block = BasicBlock() + invalid_block = BasicBlock() + + builder.add_bool_branch(range_check, valid_block, invalid_block) + + # Handle invalid index - raise IndexError + builder.activate_block(invalid_block) + builder.add( + RaiseStandardError(RaiseStandardError.INDEX_ERROR, "index out of range", ctx_expr.line) + ) + builder.add(Unreachable()) + + # Handle valid index - get the item + builder.activate_block(valid_block) + result = builder.primitive_op( + bytes_get_item_unsafe_op, [obj, adjusted_index], ctx_expr.line + ) + + return result diff --git a/mypyc/lib-rt/bytes_extra_ops.h b/mypyc/lib-rt/bytes_extra_ops.h index eebb5a345438..0f2917764ba0 100644 --- a/mypyc/lib-rt/bytes_extra_ops.h +++ b/mypyc/lib-rt/bytes_extra_ops.h @@ -2,9 +2,30 @@ #define MYPYC_BYTES_EXTRA_OPS_H #include +#include #include "CPy.h" // Optimized bytes translate operation PyObject *CPyBytes_Translate(PyObject *bytes, PyObject *table); +// Optimized bytes.__getitem__ operations + +// If index is negative, convert to non-negative index (no range checking) +static inline int64_t CPyBytes_AdjustIndex(PyObject *obj, int64_t index) { + if (index < 0) { + return index + Py_SIZE(obj); + } + return index; +} + +// Check if index is in valid range [0, len) +static inline bool CPyBytes_RangeCheck(PyObject *obj, int64_t index) { + return index >= 0 && index < Py_SIZE(obj); +} + +// Get byte at index (no bounds checking) - returns as CPyTagged +static inline CPyTagged CPyBytes_GetItemUnsafe(PyObject *obj, int64_t index) { + return ((CPyTagged)(uint8_t)(PyBytes_AS_STRING(obj))[index]) << 1; +} + #endif diff --git a/mypyc/primitives/bytes_ops.py b/mypyc/primitives/bytes_ops.py index 2268acd80aa5..c39ccbc4fae3 100644 --- a/mypyc/primitives/bytes_ops.py +++ b/mypyc/primitives/bytes_ops.py @@ -12,15 +12,18 @@ c_int_rprimitive, c_pyssize_t_rprimitive, dict_rprimitive, + int64_rprimitive, int_rprimitive, list_rprimitive, object_rprimitive, str_rprimitive, + uint8_rprimitive, ) from mypyc.primitives.registry import ( ERR_NEG_INT, binary_op, custom_op, + custom_primitive_op, function_op, load_address_op, method_op, @@ -166,3 +169,38 @@ c_function_name="CPyBytes_Ord", error_kind=ERR_MAGIC, ) + +# Optimized bytes.__getitem__ operations + +# bytes index adjustment - convert negative index to positive +bytes_adjust_index_op = custom_primitive_op( + name="bytes_adjust_index", + arg_types=[bytes_rprimitive, int64_rprimitive], + return_type=int64_rprimitive, + c_function_name="CPyBytes_AdjustIndex", + error_kind=ERR_NEVER, + experimental=True, + dependencies=[BYTES_EXTRA_OPS], +) + +# bytes range check - check if index is in valid range +bytes_range_check_op = custom_primitive_op( + name="bytes_range_check", + arg_types=[bytes_rprimitive, int64_rprimitive], + return_type=bool_rprimitive, + c_function_name="CPyBytes_RangeCheck", + error_kind=ERR_NEVER, + experimental=True, + dependencies=[BYTES_EXTRA_OPS], +) + +# bytes.__getitem__() - get byte at index (no bounds checking) +bytes_get_item_unsafe_op = custom_primitive_op( + name="bytes_get_item_unsafe", + arg_types=[bytes_rprimitive, int64_rprimitive], + return_type=int_rprimitive, + c_function_name="CPyBytes_GetItemUnsafe", + error_kind=ERR_NEVER, + experimental=True, + dependencies=[BYTES_EXTRA_OPS], +) diff --git a/mypyc/test-data/irbuild-bytes.test b/mypyc/test-data/irbuild-bytes.test index 613ba4e46eee..269f9a28fa61 100644 --- a/mypyc/test-data/irbuild-bytes.test +++ b/mypyc/test-data/irbuild-bytes.test @@ -113,6 +113,28 @@ L0: r0 = CPyBytes_GetItem(a, i) return r0 +[case testBytesIndex_experimental_64bit] +from mypy_extensions import i64 + +def f(a: bytes, i: i64) -> int: + return a[i] +[out] +def f(a, i): + a :: bytes + i, r0 :: i64 + r1, r2 :: bool + r3 :: int +L0: + r0 = CPyBytes_AdjustIndex(a, i) + r1 = CPyBytes_RangeCheck(a, r0) + if r1 goto L2 else goto L1 :: bool +L1: + r2 = raise IndexError('index out of range') + unreachable +L2: + r3 = CPyBytes_GetItemUnsafe(a, r0) + return r3 + [case testBytesConcat] def f(a: bytes, b: bytes) -> bytes: return a + b From d2f9c63352ea82b4948469e50bfdeae899f796c3 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Fri, 9 Jan 2026 11:06:05 +0000 Subject: [PATCH 2/6] Refactor __getitem__ specializations --- mypyc/irbuild/specialize.py | 120 +++++++++++++++++++----------------- 1 file changed, 63 insertions(+), 57 deletions(-) diff --git a/mypyc/irbuild/specialize.py b/mypyc/irbuild/specialize.py index e70d9bc192e9..68965e12a5b3 100644 --- a/mypyc/irbuild/specialize.py +++ b/mypyc/irbuild/specialize.py @@ -1212,30 +1212,56 @@ def translate_object_setattr(builder: IRBuilder, expr: CallExpr, callee: RefExpr return builder.call_c(generic_setattr, [self_reg, name_reg, value], expr.line) -@specialize_dunder("__getitem__", bytes_writer_rprimitive) -def translate_bytes_writer_get_item( - builder: IRBuilder, base_expr: Expression, args: list[Expression], ctx_expr: Expression +def translate_getitem_with_bounds_check( + builder: IRBuilder, + base_expr: Expression, + args: list[Expression], + ctx_expr: Expression, + adjust_index_op: PrimitiveDescription, + range_check_op: PrimitiveDescription, + get_item_unsafe_op: PrimitiveDescription, + require_i64_index: bool = False, ) -> Value | None: - """Optimized BytesWriter.__getitem__ implementation with bounds checking.""" + """Shared helper for optimized __getitem__ with bounds checking. + + This implements the common pattern of: + 1. Adjusting negative indices + 2. Checking if index is in valid range + 3. Raising IndexError if out of range + 4. Getting the item if in range + + Args: + builder: The IR builder + base_expr: The base object expression + args: The arguments to __getitem__ (should be length 1) + ctx_expr: The context expression for line numbers + adjust_index_op: Primitive op to adjust negative indices + range_check_op: Primitive op to check if index is in valid range + get_item_unsafe_op: Primitive op to get item (no bounds checking) + require_i64_index: If True, only use optimization for i64 indices + + Returns: + The result value, or None if optimization doesn't apply + """ # Check that we have exactly one argument if len(args) != 1: return None - # Get the BytesWriter object + # Get the object obj = builder.accept(base_expr) # Get the index argument index = builder.accept(args[0]) + # If required, check that index is i64 (for experimental mode) + if require_i64_index and not is_int64_rprimitive(index.type): + return None + # Adjust the index (handle negative indices) - adjusted_index = builder.primitive_op( - bytes_writer_adjust_index_op, [obj, index], ctx_expr.line - ) + adjusted_index = builder.primitive_op(adjust_index_op, [obj, index], ctx_expr.line) # Check if the adjusted index is in valid range - range_check = builder.primitive_op( - bytes_writer_range_check_op, [obj, adjusted_index], ctx_expr.line - ) + range_check = builder.primitive_op(range_check_op, [obj, adjusted_index], ctx_expr.line) # Create blocks for branching valid_block = BasicBlock() @@ -1252,13 +1278,27 @@ def translate_bytes_writer_get_item( # Handle valid index - get the item builder.activate_block(valid_block) - result = builder.primitive_op( - bytes_writer_get_item_unsafe_op, [obj, adjusted_index], ctx_expr.line - ) + result = builder.primitive_op(get_item_unsafe_op, [obj, adjusted_index], ctx_expr.line) return result +@specialize_dunder("__getitem__", bytes_writer_rprimitive) +def translate_bytes_writer_get_item( + builder: IRBuilder, base_expr: Expression, args: list[Expression], ctx_expr: Expression +) -> Value | None: + """Optimized BytesWriter.__getitem__ implementation with bounds checking.""" + return translate_getitem_with_bounds_check( + builder, + base_expr, + args, + ctx_expr, + bytes_writer_adjust_index_op, + bytes_writer_range_check_op, + bytes_writer_get_item_unsafe_op, + ) + + @specialize_dunder("__setitem__", bytes_writer_rprimitive) def translate_bytes_writer_set_item( builder: IRBuilder, base_expr: Expression, args: list[Expression], ctx_expr: Expression @@ -1312,47 +1352,13 @@ def translate_bytes_get_item( builder: IRBuilder, base_expr: Expression, args: list[Expression], ctx_expr: Expression ) -> Value | None: """Optimized bytes.__getitem__ implementation with bounds checking.""" - # Check that we have exactly one argument - if len(args) != 1: - return None - - # Get the bytes object - obj = builder.accept(base_expr) - - # Get the index argument - index = builder.accept(args[0]) - - # Only use the optimized version for i64 index (requires experimental mode) - if not is_int64_rprimitive(index.type): - return None - - # Adjust the index (handle negative indices) - adjusted_index = builder.primitive_op( - bytes_adjust_index_op, [obj, index], ctx_expr.line - ) - - # Check if the adjusted index is in valid range - range_check = builder.primitive_op( - bytes_range_check_op, [obj, adjusted_index], ctx_expr.line - ) - - # Create blocks for branching - valid_block = BasicBlock() - invalid_block = BasicBlock() - - builder.add_bool_branch(range_check, valid_block, invalid_block) - - # Handle invalid index - raise IndexError - builder.activate_block(invalid_block) - builder.add( - RaiseStandardError(RaiseStandardError.INDEX_ERROR, "index out of range", ctx_expr.line) - ) - builder.add(Unreachable()) - - # Handle valid index - get the item - builder.activate_block(valid_block) - result = builder.primitive_op( - bytes_get_item_unsafe_op, [obj, adjusted_index], ctx_expr.line + return translate_getitem_with_bounds_check( + builder, + base_expr, + args, + ctx_expr, + bytes_adjust_index_op, + bytes_range_check_op, + bytes_get_item_unsafe_op, + require_i64_index=True, ) - - return result From d0e671801a2bd016acadc5af8303475acf6d54be Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Fri, 9 Jan 2026 11:06:42 +0000 Subject: [PATCH 3/6] Simplify --- mypyc/irbuild/specialize.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/mypyc/irbuild/specialize.py b/mypyc/irbuild/specialize.py index 68965e12a5b3..05c492c57265 100644 --- a/mypyc/irbuild/specialize.py +++ b/mypyc/irbuild/specialize.py @@ -1220,7 +1220,6 @@ def translate_getitem_with_bounds_check( adjust_index_op: PrimitiveDescription, range_check_op: PrimitiveDescription, get_item_unsafe_op: PrimitiveDescription, - require_i64_index: bool = False, ) -> Value | None: """Shared helper for optimized __getitem__ with bounds checking. @@ -1253,10 +1252,6 @@ def translate_getitem_with_bounds_check( # Get the index argument index = builder.accept(args[0]) - # If required, check that index is i64 (for experimental mode) - if require_i64_index and not is_int64_rprimitive(index.type): - return None - # Adjust the index (handle negative indices) adjusted_index = builder.primitive_op(adjust_index_op, [obj, index], ctx_expr.line) @@ -1360,5 +1355,4 @@ def translate_bytes_get_item( bytes_adjust_index_op, bytes_range_check_op, bytes_get_item_unsafe_op, - require_i64_index=True, ) From 961118b92ca0e559be8f4ca0a3d50ed1918f3cae Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Fri, 9 Jan 2026 11:10:30 +0000 Subject: [PATCH 4/6] Fix tests --- mypyc/test-data/irbuild-bytes.test | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/mypyc/test-data/irbuild-bytes.test b/mypyc/test-data/irbuild-bytes.test index 269f9a28fa61..fdaa77c9ad2e 100644 --- a/mypyc/test-data/irbuild-bytes.test +++ b/mypyc/test-data/irbuild-bytes.test @@ -103,17 +103,6 @@ L0: return r0 [case testBytesIndex] -def f(a: bytes, i: int) -> int: - return a[i] -[out] -def f(a, i): - a :: bytes - i, r0 :: int -L0: - r0 = CPyBytes_GetItem(a, i) - return r0 - -[case testBytesIndex_experimental_64bit] from mypy_extensions import i64 def f(a: bytes, i: i64) -> int: From 7a933dc0cfa2e86fce41592a64e8f943a42f9aa3 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Fri, 9 Jan 2026 11:45:39 +0000 Subject: [PATCH 5/6] Update docstring --- mypyc/irbuild/specialize.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mypyc/irbuild/specialize.py b/mypyc/irbuild/specialize.py index 05c492c57265..cb9510bc890b 100644 --- a/mypyc/irbuild/specialize.py +++ b/mypyc/irbuild/specialize.py @@ -1237,7 +1237,6 @@ def translate_getitem_with_bounds_check( adjust_index_op: Primitive op to adjust negative indices range_check_op: Primitive op to check if index is in valid range get_item_unsafe_op: Primitive op to get item (no bounds checking) - require_i64_index: If True, only use optimization for i64 indices Returns: The result value, or None if optimization doesn't apply From e0b3e7fe799e1969e2fd22f1566511363aa51553 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Fri, 9 Jan 2026 13:15:14 +0000 Subject: [PATCH 6/6] Lint --- mypyc/primitives/bytes_ops.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mypyc/primitives/bytes_ops.py b/mypyc/primitives/bytes_ops.py index c39ccbc4fae3..22223279d43a 100644 --- a/mypyc/primitives/bytes_ops.py +++ b/mypyc/primitives/bytes_ops.py @@ -17,7 +17,6 @@ list_rprimitive, object_rprimitive, str_rprimitive, - uint8_rprimitive, ) from mypyc.primitives.registry import ( ERR_NEG_INT,