Skip to content

Commit d66a960

Browse files
DOC: add storage and na_value to StringDtype reference page (#63104)
1 parent 1c08a93 commit d66a960

File tree

5 files changed

+51
-7
lines changed

5 files changed

+51
-7
lines changed

ci/code_checks.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7979
-i "pandas.api.typing.DataFrameGroupBy.plot PR02" \
8080
-i "pandas.api.typing.SeriesGroupBy.plot PR02" \
8181
-i "pandas.api.typing.Resampler.quantile PR01,PR07" \
82+
-i "pandas.StringDtype.storage SA01" \
83+
-i "pandas.StringDtype.na_value SA01" \
8284
-i "pandas.tseries.offsets.BDay PR02,SA01" \
8385
-i "pandas.tseries.offsets.BHalfYearBegin.is_on_offset GL08" \
8486
-i "pandas.tseries.offsets.BHalfYearBegin.n GL08" \

doc/source/reference/arrays.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,8 @@ with a bool :class:`numpy.ndarray`.
637637
DatetimeTZDtype.tz
638638
PeriodDtype.freq
639639
IntervalDtype.subtype
640+
StringDtype.storage
641+
StringDtype.na_value
640642

641643
*********
642644
Utilities

doc/source/user_guide/text.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -753,7 +753,10 @@ Differences in behavior will be primarily due to the kind of NA value.
753753
The four :class:`StringDtype` variants
754754
======================================
755755

756-
There are four :class:`StringDtype` variants that are available to users.
756+
There are four :class:`StringDtype` variants that are available to users,
757+
controlled by the ``storage`` and ``na_value`` parameters of :class:`StringDtype`.
758+
At runtime, these can be checked via the :attr:`StringDtype.storage`
759+
and :attr:`StringDtype.na_value` attributes.
757760

758761
Python storage with ``np.nan`` values
759762
-------------------------------------

pandas/core/arrays/string_.py

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,8 @@ class StringDtype(StorageExtensionDtype):
119119
120120
Attributes
121121
----------
122-
None
122+
storage
123+
na_value
123124
124125
Methods
125126
-------
@@ -149,8 +150,40 @@ def name(self) -> str: # type: ignore[override]
149150
# follows NumPy semantics, which uses nan.
150151
@property
151152
def na_value(self) -> libmissing.NAType | float: # type: ignore[override]
153+
"""
154+
The missing value representation for this dtype.
155+
156+
This value indicates which missing value semantics are used by this dtype.
157+
Returns ``np.nan`` for the default string dtype with NumPy semantics,
158+
and ``pd.NA`` for the opt-in string dtype with pandas NA semantics.
159+
160+
Examples
161+
--------
162+
>>> ser = pd.Series(["a", "b"])
163+
>>> ser.dtype
164+
<StringDtype(na_value=nan)>
165+
>>> ser.dtype.na_value
166+
nan
167+
"""
152168
return self._na_value
153169

170+
@property
171+
def storage(self) -> str:
172+
"""
173+
The storage backend for this dtype.
174+
175+
Can be either "pyarrow" or "python".
176+
177+
Examples
178+
--------
179+
>>> ser = pd.Series(["a", "b"])
180+
>>> ser.dtype
181+
<StringDtype(na_value=nan)>
182+
>>> ser.dtype.storage
183+
'pyarrow'
184+
"""
185+
return self._storage
186+
154187
_metadata = ("storage", "_na_value") # type: ignore[assignment]
155188

156189
def __init__(
@@ -185,7 +218,7 @@ def __init__(
185218
elif na_value is not libmissing.NA:
186219
raise ValueError(f"'na_value' must be np.nan or pd.NA, got {na_value}")
187220

188-
self.storage = cast(str, storage)
221+
self._storage = cast(str, storage)
189222
self._na_value = na_value
190223

191224
def __repr__(self) -> str:
@@ -211,7 +244,7 @@ def __eq__(self, other: object) -> bool:
211244

212245
def __setstate__(self, state: MutableMapping[str, Any]) -> None:
213246
# back-compat for pandas < 2.3, where na_value did not yet exist
214-
self.storage = state.pop("storage", "python")
247+
self._storage = state.pop("storage", "python")
215248
self._na_value = state.pop("_na_value", libmissing.NA)
216249

217250
def __hash__(self) -> int:
@@ -306,7 +339,7 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
306339
# if both python and pyarrow storage -> priority to pyarrow
307340
storage = "pyarrow"
308341
else:
309-
storage = next(iter(storages)) # type: ignore[assignment]
342+
storage = next(iter(storages))
310343

311344
na_value: libmissing.NAType | float
312345
if len(na_values) == 2:

pandas/core/dtypes/base.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -457,8 +457,8 @@ class StorageExtensionDtype(ExtensionDtype):
457457
name: str
458458
_metadata = ("storage",)
459459

460-
def __init__(self, storage: str | None = None) -> None:
461-
self.storage = storage
460+
def __init__(self, storage: str) -> None:
461+
self._storage = storage
462462

463463
def __repr__(self) -> str:
464464
return f"{self.name}[{self.storage}]"
@@ -479,6 +479,10 @@ def __hash__(self) -> int:
479479
def na_value(self) -> libmissing.NAType:
480480
return libmissing.NA
481481

482+
@property
483+
def storage(self) -> str:
484+
return self._storage
485+
482486

483487
@set_module("pandas.api.extensions")
484488
def register_extension_dtype(cls: type_t[ExtensionDtypeT]) -> type_t[ExtensionDtypeT]:

0 commit comments

Comments
 (0)