Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions changes/3797.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix an issue that prevents the correct parsing of special NumPy ``uint32`` dtypes resulting e.g.
from bit wise operations on ``uint32`` arrays on Windows.
22 changes: 22 additions & 0 deletions src/zarr/core/dtype/npy/int.py
Original file line number Diff line number Diff line change
Expand Up @@ -1070,6 +1070,28 @@ class UInt32(BaseInt[np.dtypes.UInt32DType, np.uint32], HasEndianness):
_zarr_v3_name: ClassVar[Literal["uint32"]] = "uint32"
_zarr_v2_names: ClassVar[tuple[Literal[">u4"], Literal["<u4"]]] = (">u4", "<u4")

@classmethod
def _check_native_dtype(cls: type[Self], dtype: TBaseDType) -> TypeGuard[np.dtypes.UInt32DType]:
"""
A type guard that checks if the input is assignable to the type of ``cls.dtype_class``

This method is overridden for this particular data type because of a Windows-specific issue
where ``np.array([1], dtype=np.uint32) & 1`` creates an instance of ``np.dtypes.UIntDType``,
rather than an instance of ``np.dtypes.UInt32DType``, even though both represent 32-bit
unsigned integers. (In contrast to ``np.dtype('i')``, ``np.dtype('u')`` raises an error.)

Parameters
----------
dtype : TDType
The dtype to check.

Returns
-------
Bool
True if the dtype matches, False otherwise.
"""
return super()._check_native_dtype(dtype) or dtype == np.dtypes.UInt32DType()

@classmethod
def from_native_dtype(cls, dtype: TBaseDType) -> Self:
"""
Expand Down
11 changes: 10 additions & 1 deletion tests/test_dtype/test_npy/test_int.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,16 @@ class TestUInt16(BaseTestZDType):
class TestUInt32(BaseTestZDType):
test_cls = UInt32
scalar_type = np.uint32
valid_dtype = (np.dtype(">u4"), np.dtype("<u4"))

# On Windows, this creates an UIntDType (instead of UInt32DType),
# similar to how np.dtype('i') creates an IntDType instead of Int32DType.
# However, np.dtype('u') raises a TypeError.
uint_dtype = (np.array([1], dtype=np.uint32) & 1).dtype
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what's the & 1 for?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To create the special dtype. Without the & 1, it is UInt32Dtype, with & 1, it is UIntDtype.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, thanks for the clarification. Why is numpy doing this?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have no idea. It took me a while to figure out how to create an array with such a dtype after I encountered this error in more complex code.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes I can imagine how frustrating that must have been. if this happens again we might need to consider relying on some kind of structural checks instead of trusting numpy's class hierarchy.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm looking for reports of this on the numpy issue tracker, this one seems related: numpy/numpy#17351


# The behavior of some tests associated with this class variable are
# order-dependent -- uint_dtype correctly fails certain tests only if it's not
# in the last position of the tuple. I have no idea how this is possible!
valid_dtype = (uint_dtype, np.dtype(">u4"), np.dtype("<u4"))
invalid_dtype = (
np.dtype(np.int8),
np.dtype(np.int16),
Expand Down