From c007d0c7434fea198f4b3cc29db5014cedf0ac9e Mon Sep 17 00:00:00 2001 From: jaketrookman Date: Fri, 23 Jan 2026 16:09:41 -0500 Subject: [PATCH] update doc string and add new tests that pass --- arkouda/pandas/series.py | 31 +++++++--- tests/pandas/series_test.py | 114 ++++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+), 9 deletions(-) diff --git a/arkouda/pandas/series.py b/arkouda/pandas/series.py index ac493fcf803..80a710af750 100644 --- a/arkouda/pandas/series.py +++ b/arkouda/pandas/series.py @@ -1395,18 +1395,32 @@ def isnull(self) -> Series: """ Series.isnull is an alias for Series.isna. - Detect missing values. + Detect missing values in the Series. - Return a boolean same-sized object indicating if the values are NA. NA values, - such as numpy.NaN, gets mapped to True values. - Everything else gets mapped to False values. - Characters such as empty strings '' are not considered NA values. + ``Series.isnull`` is an alias for ``Series.isna`` and returns a boolean + Series indicating which elements are considered missing (null). The result + is indexed the same as the original Series. + + Null detection follows pandas semantics: + + * Numeric values equal to ``NaN`` are considered null. + * ``None`` values are considered null. + * For string data, ``None`` is null but the literal string ``"NaN"`` is not. + * Empty strings ``''`` are **not** considered null. + + All other values are treated as non-null. Returns ------- Series - Mask of bool values for each element in Series - that indicates whether an element is an NA value. + A boolean Series of the same size, where ``True`` indicates a missing + (NA) value and ``False`` indicates a valid value. + + See Also + -------- + Series.notnull : Boolean inverse of ``isnull``. + Series.isna : Identical method; primary implementation. + pandas.Series.isnull : Reference behavior. Examples -------- @@ -1414,13 +1428,12 @@ def isnull(self) -> Series: >>> from arkouda import Series >>> import numpy as np - >>> s = Series(ak.array([1, 2, np.nan]), index = ak.array([1, 2, 4])) + >>> s = Series(ak.array([1, 2, np.nan]), index=ak.array([1, 2, 4])) >>> s.isnull() 1 False 2 False 4 True dtype: bool - """ return self.isna() diff --git a/tests/pandas/series_test.py b/tests/pandas/series_test.py index 1ac6dace3d9..8ccb3fda06a 100644 --- a/tests/pandas/series_test.py +++ b/tests/pandas/series_test.py @@ -802,3 +802,117 @@ def test_iloc(self): _s1.iloc[[True, False, True]] with pytest.raises(IndexError): s1.iloc[[True, False, True]] + + def test_isnull_across_types(self): + """ + Condensed set of Series.isnull tests covering Arkouda-supported types: + - numeric with NaN + - numeric with None (manually mapped to NaN) + - strings ('' is NOT null and None is not allowed) + - no-null arrays + - all-null arrays (numeric) + - empty arrays + """ + + numeric_cases = [ + [1, 0, np.nan, 3, np.nan], # numeric with NaN + [1, None, 3, None], # numeric with None → must map to NaN manually + [1, 2, 3], # no nulls + [np.nan, np.nan, np.nan], # all nulls + [], # empty + ] + + string_cases = [ + ["a", "", "c", ""], # valid string array + ] + + # ---- numeric cases ---- + for case in numeric_cases: + # Convert None → np.nan (Arkouda requirement) + cleaned = [float(x) if x is not None else np.nan for x in case] + + pd_case = pd.Series(cleaned, dtype="float") + expected = pd_case.isnull().to_list() + + ak_case = ak.Series(cleaned) + result = ak_case.isnull() + + ak_list = result.values.to_ndarray().tolist() + assert ak_list == expected + + # ---- string cases ---- + for case in string_cases: + pd_case = pd.Series(case, dtype="string") + # Arkouda never treats '' as null + expected = [False] * len(case) + + ak_case = ak.Series(case) + result = ak_case.isnull() + ak_list = result.values.to_ndarray().tolist() + + assert ak_list == expected + + def test_series_isnull_notnull_symmetry(self): + """Test that notnull is always the logical negation of isnull.""" + data = ak.Series([1, ak.nan, 3, ak.nan]) + + isnull = data.isnull().values + notnull = data.notnull().values + + assert (isnull == ~notnull).all() + + def test_isnull_series_and_dataframe(self): + """ + Test Series.isnull behavior and DataFrame null handling + via column-wise Series.isnull(), matching pandas semantics + for Arkouda-supported dtypes. + """ + + # ---- Series: numeric ---- + pd_series_num = pd.Series([1.0, np.nan, 3.0, np.nan]) + ak_series_num = ak.Series([1.0, ak.nan, 3.0, ak.nan]) + + assert (ak_series_num.isnull().values.to_ndarray() + == pd_series_num.isnull().values).all() + + # ---- Series: string ---- + pd_series_str = pd.Series(["a", "", "b", ""]) + ak_series_str = ak.Series(["a", "", "b", ""]) + + assert (ak_series_str.isnull().values.to_ndarray() + == pd_series_str.isnull().values).all() + + # ---- DataFrame: numeric (column-wise) ---- + pd_df = pd.DataFrame( + { + "a": [1.0, np.nan, 3.0], + "b": [np.nan, 2.0, np.nan], + } + ) + ak_df = ak.DataFrame(pd_df) + + for col in pd_df.columns: + pd_mask = pd_df[col].isnull().values + + # 🔑 Wrap column pdarray as Series + ak_col = ak.Series(ak_df[col]) + ak_mask = ak_col.isnull().values.to_ndarray() + + assert (ak_mask == pd_mask).all() + + def test_isnull_large_random(self): + """Stress test Series.isnull with many random NaNs.""" + np_data = np.random.rand(5000) + np_data[np.random.choice(5000, size=250, replace=False)] = np.nan + + # Pandas oracle + pd_mask = pd.Series(np_data).isnull().values + + # Arkouda Series + ak_series = ak.Series(ak.array(np_data)) + ak_mask = ak_series.isnull().values + + ak_mask_np = ak_mask.to_ndarray() + + assert (ak_mask_np == pd_mask).all() + assert ak_mask.sum() == pd_mask.sum()