Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions arkouda/pandas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1395,32 +1395,45 @@ def isnull(self) -> Series:
"""
Series.isnull is an alias for Series.isna.

Detect missing values.
Detect missing values in the Series.

Return a boolean same-sized object indicating if the values are NA. NA values,
such as numpy.NaN, gets mapped to True values.
Everything else gets mapped to False values.
Characters such as empty strings '' are not considered NA values.
``Series.isnull`` is an alias for ``Series.isna`` and returns a boolean
Series indicating which elements are considered missing (null). The result
is indexed the same as the original Series.

Null detection follows pandas semantics:

* Numeric values equal to ``NaN`` are considered null.
* ``None`` values are considered null.
* For string data, ``None`` is null but the literal string ``"NaN"`` is not.
* Empty strings ``''`` are **not** considered null.

All other values are treated as non-null.

Returns
-------
Series
Mask of bool values for each element in Series
that indicates whether an element is an NA value.
A boolean Series of the same size, where ``True`` indicates a missing
(NA) value and ``False`` indicates a valid value.

See Also
--------
Series.notnull : Boolean inverse of ``isnull``.
Series.isna : Identical method; primary implementation.
pandas.Series.isnull : Reference behavior.

Examples
--------
>>> import arkouda as ak
>>> from arkouda import Series
>>> import numpy as np

>>> s = Series(ak.array([1, 2, np.nan]), index = ak.array([1, 2, 4]))
>>> s = Series(ak.array([1, 2, np.nan]), index=ak.array([1, 2, 4]))
>>> s.isnull()
1 False
2 False
4 True
dtype: bool

"""
return self.isna()

Expand Down
114 changes: 114 additions & 0 deletions tests/pandas/series_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -802,3 +802,117 @@ def test_iloc(self):
_s1.iloc[[True, False, True]]
with pytest.raises(IndexError):
s1.iloc[[True, False, True]]

def test_isnull_across_types(self):
"""
Condensed set of Series.isnull tests covering Arkouda-supported types:
- numeric with NaN
- numeric with None (manually mapped to NaN)
- strings ('' is NOT null and None is not allowed)
- no-null arrays
- all-null arrays (numeric)
- empty arrays
"""

numeric_cases = [
[1, 0, np.nan, 3, np.nan], # numeric with NaN
[1, None, 3, None], # numeric with None → must map to NaN manually
[1, 2, 3], # no nulls
[np.nan, np.nan, np.nan], # all nulls
[], # empty
]

string_cases = [
["a", "", "c", ""], # valid string array
]

# ---- numeric cases ----
for case in numeric_cases:
# Convert None → np.nan (Arkouda requirement)
cleaned = [float(x) if x is not None else np.nan for x in case]

pd_case = pd.Series(cleaned, dtype="float")
expected = pd_case.isnull().to_list()

ak_case = ak.Series(cleaned)
result = ak_case.isnull()

ak_list = result.values.to_ndarray().tolist()
assert ak_list == expected

# ---- string cases ----
for case in string_cases:
pd_case = pd.Series(case, dtype="string")
# Arkouda never treats '' as null
expected = [False] * len(case)

ak_case = ak.Series(case)
result = ak_case.isnull()
ak_list = result.values.to_ndarray().tolist()

assert ak_list == expected

def test_series_isnull_notnull_symmetry(self):
"""Test that notnull is always the logical negation of isnull."""
data = ak.Series([1, ak.nan, 3, ak.nan])

isnull = data.isnull().values
notnull = data.notnull().values

assert (isnull == ~notnull).all()

def test_isnull_series_and_dataframe(self):
"""
Test Series.isnull behavior and DataFrame null handling
via column-wise Series.isnull(), matching pandas semantics
for Arkouda-supported dtypes.
"""

# ---- Series: numeric ----
pd_series_num = pd.Series([1.0, np.nan, 3.0, np.nan])
ak_series_num = ak.Series([1.0, ak.nan, 3.0, ak.nan])

assert (ak_series_num.isnull().values.to_ndarray()
== pd_series_num.isnull().values).all()

# ---- Series: string ----
pd_series_str = pd.Series(["a", "", "b", ""])
ak_series_str = ak.Series(["a", "", "b", ""])

assert (ak_series_str.isnull().values.to_ndarray()
== pd_series_str.isnull().values).all()

# ---- DataFrame: numeric (column-wise) ----
pd_df = pd.DataFrame(
{
"a": [1.0, np.nan, 3.0],
"b": [np.nan, 2.0, np.nan],
}
)
ak_df = ak.DataFrame(pd_df)

for col in pd_df.columns:
pd_mask = pd_df[col].isnull().values

# 🔑 Wrap column pdarray as Series
ak_col = ak.Series(ak_df[col])
ak_mask = ak_col.isnull().values.to_ndarray()

assert (ak_mask == pd_mask).all()

def test_isnull_large_random(self):
"""Stress test Series.isnull with many random NaNs."""
np_data = np.random.rand(5000)
np_data[np.random.choice(5000, size=250, replace=False)] = np.nan

# Pandas oracle
pd_mask = pd.Series(np_data).isnull().values

# Arkouda Series
ak_series = ak.Series(ak.array(np_data))
ak_mask = ak_series.isnull().values

ak_mask_np = ak_mask.to_ndarray()

assert (ak_mask_np == pd_mask).all()
assert ak_mask.sum() == pd_mask.sum()
Loading