From 9bf8f7fa854d331f73f1f69c6225049636fc59fa Mon Sep 17 00:00:00 2001 From: zhangbowen-coder <2439796518@qq.com> Date: Sun, 14 Dec 2025 18:36:55 +0800 Subject: [PATCH 01/10] =?UTF-8?q?replaced=20all=20Appender=E3=80=81Substit?= =?UTF-8?q?ution=E3=80=81doc=20decorators=20in=20pandas/pandas/core/frame.?= =?UTF-8?q?py.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pandas/core/frame.py | 7252 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 6723 insertions(+), 529 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9d6af3c7b9917..d5506a382b343 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -66,10 +66,7 @@ _chained_assignment_msg, ) from pandas.util._decorators import ( - Appender, - Substitution, deprecate_nonkeyword_arguments, - doc, set_module, ) from pandas.util._exceptions import ( @@ -153,7 +150,6 @@ ) from pandas.core.generic import ( NDFrame, - make_doc, ) from pandas.core.indexers import check_key_length from pandas.core.indexes.api import ( @@ -200,9 +196,7 @@ format as fmt, ) from pandas.io.formats.info import ( - INFO_DOCSTRING, DataFrameInfo, - frame_sub_kwargs, ) import pandas.plotting @@ -1302,17 +1296,6 @@ def to_string( encoding: str | None = ..., ) -> None: ... - @Substitution( - header_type="bool or list of str", - header="Write out the column names. If a list of columns " - "is given, it is assumed to be aliases for the " - "column names", - col_space_type="int, list or dict of int", - col_space="The minimum width of each column. If a list of ints is given " - "every integers corresponds with one column. If a dict is given, the key " - "references the column, while the value defines the space to use.", - ) - @Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring) def to_string( self, buf: FilePath | WriteBuffer[str] | None = None, @@ -1338,7 +1321,65 @@ def to_string( ) -> str | None: """ Render a DataFrame to a console-friendly tabular output. - %(shared_params)s + + Parameters + ---------- + buf : str, Path or StringIO-like, optional, default None + Buffer to write to. If None, the output is returned as a string. + columns : array-like, optional, default None + The subset of columns to write. Writes all columns by default. + col_space : int, list or dict of int, optional + The minimum width of each column. + If a list of ints is given every integers corresponds with one column. + If a dict is given, the key references the column, + while the value defines the space to use. + header : bool or list of str, optional + Write out the column names. If a list of columns is given, + it is assumed to be aliases for the column names. + index : bool, optional, default True + Whether to print index (row) labels. + na_rep : str, optional, default 'NaN' + String representation of ``NaN`` to use. + formatters : list, tuple or dict of one-param. functions, optional + Formatter functions to apply to columns' elements by position or + name. + The result of each function must be a unicode string. + List/tuple must be of length equal to the number of columns. + float_format : one-parameter function, optional, default None + Formatter function to apply to columns' elements if they are + floats. This function must return a unicode string and will be + applied only to the non-``NaN`` elements, with ``NaN`` being + handled by ``na_rep``. + sparsify : bool, optional, default True + Set to False for a DataFrame with a hierarchical index to print + every multiindex key at each row. + index_names : bool, optional, default True + Prints the names of the indexes. + justify : str, default None + How to justify the column labels. If None uses the option from + the print configuration (controlled by set_option), 'right' out + of the box. Valid values are + + * left + * right + * center + * justify + * justify-all + * start + * end + * inherit + * match-parent + * initial + * unset. + max_rows : int, optional + Maximum number of rows to display in the console. + max_cols : int, optional + Maximum number of columns to display in the console. + show_dimensions : bool, default False + Display DataFrame dimensions (number of rows by number of columns). + decimal : str, default '.' + Character recognized as decimal separator, e.g. ',' in Europe. + line_width : int, optional Width to wrap a line in characters. min_rows : int, optional @@ -1348,7 +1389,13 @@ def to_string( Max width to truncate each column in characters. By default, no limit. encoding : str, default "utf-8" Set character encoding. - %(returns)s + + Returns + ------- + str or None + If buf is None, returns the result as a string. Otherwise returns + None. + See Also -------- to_html : Convert DataFrame to HTML. @@ -1358,7 +1405,7 @@ def to_string( >>> d = {"col1": [1, 2, 3], "col2": [4, 5, 6]} >>> df = pd.DataFrame(d) >>> print(df.to_string()) - col1 col2 + col1 col2 0 1 4 1 2 5 2 3 6 @@ -2655,10 +2702,6 @@ def _from_arrays( ) return cls._from_mgr(mgr, axes=mgr.axes) - @doc( - storage_options=_shared_docs["storage_options"], - compression_options=_shared_docs["compression_options"] % "path", - ) def to_stata( self, path: FilePath | WriteBuffer[bytes], @@ -2706,7 +2749,7 @@ def to_stata( variable_labels : dict Dictionary containing columns as keys and variable labels as values. Each label must be 80 characters or smaller. - version : {{114, 117, 118, 119, None}}, default 114 + version : {114, 117, 118, 119, None}, default 114 Version to use in the output dta file. Set to None to let pandas decide between 118 or 119 formats depending on the number of columns in the frame. Version 114 can be read by Stata 10 and @@ -2728,9 +2771,34 @@ def to_stata( format. Only available if version is 117. Storing strings in the StrL format can produce smaller dta files if strings have more than 8 characters and values are repeated. - {compression_options} + compression : str or dict, default 'infer' + For on-the-fly compression of the output data. If 'infer' and 'path' is + path-like, then detect compression from the following extensions: '.gz', + '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' + (otherwise no compression). + Set to ``None`` for no compression. + Can also be a dict with key ``'method'`` set + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, + ``'xz'``, ``'tar'``} and other key-value pairs are forwarded to + ``zipfile.ZipFile``, ``gzip.GzipFile``, + ``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or + ``tarfile.TarFile``, respectively. + As an example, the following could be passed for faster compression + and to create a reproducible gzip archive: + ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. + + .. versionadded:: 1.5.0 + Added support for `.tar` files. - {storage_options} + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more + details, and for more examples on storage options refer `here + `_. value_labels : dict of dicts Dictionary containing columns as keys and dictionaries of column value @@ -2991,7 +3059,6 @@ def to_parquet( **kwargs, ) -> None: ... - @doc(storage_options=_shared_docs["storage_options"]) def to_parquet( self, path: FilePath | WriteBuffer[bytes] | None = None, @@ -3019,7 +3086,7 @@ def to_parquet( object implementing a binary ``write()`` function. If None, the result is returned as bytes. If a string or path, it will be used as Root Directory path when writing a partitioned dataset. - engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto' + engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' Parquet library to use. If 'auto', then the option ``io.parquet.engine`` is used. The default ``io.parquet.engine`` behavior is to try 'pyarrow', falling back to 'fastparquet' if @@ -3039,7 +3106,15 @@ def to_parquet( Column names by which to partition the dataset. Columns are partitioned in the order they are given. Must be None if path is not a string. - {storage_options} + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more + details, and for more examples on storage options refer `here + `_. filesystem : fsspec or pyarrow filesystem, default None Filesystem object to use when reading the parquet file. Only implemented @@ -3081,7 +3156,7 @@ def to_parquet( Examples -------- - >>> df = pd.DataFrame(data={{"col1": [1, 2], "col2": [3, 4]}}) + >>> df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) >>> df.to_parquet("df.parquet.gzip", compression="gzip") # doctest: +SKIP >>> pd.read_parquet("df.parquet.gzip") # doctest: +SKIP col1 col2 @@ -3289,14 +3364,6 @@ def to_html( encoding: str | None = ..., ) -> str: ... - @Substitution( - header_type="bool", - header="Whether to print column labels, default True", - col_space_type="str or int, list or dict of int or str", - col_space="The minimum width of each column in CSS length " - "units. An int is assumed to be px units.", - ) - @Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring) def to_html( self, buf: FilePath | WriteBuffer[str] | None = None, @@ -3326,7 +3393,62 @@ def to_html( ) -> str | None: """ Render a DataFrame as an HTML table. - %(shared_params)s + + Parameters + ---------- + buf : str, Path or StringIO-like, optional, default None + Buffer to write to. If None, the output is returned as a string. + columns : array-like, optional, default None + The subset of columns to write. Writes all columns by default. + col_space : str or int, list or dict of int or str, optional + The minimum width of each column in CSS length units. + An int is assumed to be px units. + header : bool, optional + Whether to print column labels, default True. + index : bool, optional, default True + Whether to print index (row) labels. + na_rep : str, optional, default 'NaN' + String representation of ``NaN`` to use. + formatters : list, tuple or dict of one-param. functions, optional + Formatter functions to apply to columns' elements by position or + name. + The result of each function must be a unicode string. + List/tuple must be of length equal to the number of columns. + float_format : one-parameter function, optional, default None + Formatter function to apply to columns' elements if they are + floats. This function must return a unicode string and will be + applied only to the non-``NaN`` elements, with ``NaN`` being + handled by ``na_rep``. + sparsify : bool, optional, default True + Set to False for a DataFrame with a hierarchical index to print + every multiindex key at each row. + index_names : bool, optional, default True + Prints the names of the indexes. + justify : str, default None + How to justify the column labels. If None uses the option from + the print configuration (controlled by set_option), 'right' out + of the box. Valid values are + + * left + * right + * center + * justify + * justify-all + * start + * end + * inherit + * match-parent + * initial + * unset. + max_rows : int, optional + Maximum number of rows to display in the console. + max_cols : int, optional + Maximum number of columns to display in the console. + show_dimensions : bool, default False + Display DataFrame dimensions (number of rows by number of columns). + decimal : str, default '.' + Character recognized as decimal separator, e.g. ',' in Europe. + bold_rows : bool, default True Make the row labels bold in the output. classes : str or list or tuple, default None @@ -3348,79 +3470,85 @@ def to_html( Convert URLs to HTML links. encoding : str, default "utf-8" Set character encoding. - %(returns)s - See Also - -------- - to_string : Convert DataFrame to a string. - Examples - -------- - >>> df = pd.DataFrame(data={"col1": [1, 2], "col2": [4, 3]}) - >>> html_string = df.to_html() - >>> print(html_string) - - - - - - - - - - - - - - - - - - - - -
col1col2
014
123
- - HTML output - - +----+-----+-----+ - | |col1 |col2 | - +====+=====+=====+ - |0 |1 |4 | - +----+-----+-----+ - |1 |2 |3 | - +----+-----+-----+ + Returns + ------- + str or None + If buf is None, returns the result as a string. Otherwise returns + None. - >>> df = pd.DataFrame(data={"col1": [1, 2], "col2": [4, 3]}) - >>> html_string = df.to_html(index=False) - >>> print(html_string) - - - - - - - - - - - - - - - - - -
col1col2
14
23
- - HTML output - - +-----+-----+ - |col1 |col2 | - +=====+=====+ - |1 |4 | - +-----+-----+ - |2 |3 | - +-----+-----+ + See Also + -------- + to_string : Convert DataFrame to a string. + + Examples + -------- + >>> df = pd.DataFrame(data={"col1": [1, 2], "col2": [4, 3]}) + >>> html_string = df.to_html() + >>> print(html_string) + + + + + + + + + + + + + + + + + + + + +
col1col2
014
123
+ + HTML output + + +----+-----+-----+ + | |col1 |col2 | + +====+=====+=====+ + |0 |1 |4 | + +----+-----+-----+ + |1 |2 |3 | + +----+-----+-----+ + + >>> df = pd.DataFrame(data={"col1": [1, 2], "col2": [4, 3]}) + >>> html_string = df.to_html(index=False) + >>> print(html_string) + + + + + + + + + + + + + + + + + +
col1col2
14
23
+ + HTML output + + +-----+-----+ + |col1 |col2 | + +=====+=====+ + |1 |4 | + +-----+-----+ + |2 |3 | + +-----+-----+ """ if justify is not None and justify not in fmt.VALID_JUSTIFY_PARAMETERS: raise ValueError("Invalid value for justify parameter") @@ -3499,10 +3627,6 @@ def to_xml( storage_options: StorageOptions | None = ..., ) -> None: ... - @doc( - storage_options=_shared_docs["storage_options"], - compression_options=_shared_docs["compression_options"] % "path_or_buffer", - ) def to_xml( self, path_or_buffer: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, @@ -3555,7 +3679,7 @@ def to_xml( Default namespaces should be given empty string key. For example, :: - namespaces = {{"": "https://example.com"}} + namespaces = {"": "https://example.com"} prefix : str, optional Namespace prefix to be used for every element and/or attribute @@ -3568,7 +3692,7 @@ def to_xml( pretty_print : bool, default True Whether output should be pretty printed with indentation and line breaks. - parser : {{'lxml','etree'}}, default 'lxml' + parser : {'lxml','etree'}, default 'lxml' Parser module to use for building of tree. Only 'lxml' and 'etree' are supported. With 'lxml', the ability to use XSLT stylesheet is supported. @@ -3578,9 +3702,35 @@ def to_xml( layout of elements and attributes from original output. This argument requires ``lxml`` to be installed. Only XSLT 1.0 scripts and not later versions is currently supported. - {compression_options} + compression : str or dict, default 'infer' + For on-the-fly compression of the output data. If 'infer' + and 'path_or_buffer' is path-like, + then detect compression from the following extensions: '.gz', + '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' + (otherwise no compression). + Set to ``None`` for no compression. + Can also be a dict with key ``'method'`` set + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, + ``'tar'``} and other key-value pairs are forwarded to + ``zipfile.ZipFile``, ``gzip.GzipFile``, + ``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or + ``tarfile.TarFile``, respectively. + As an example, the following could be passed for faster + compression and to create a reproducible gzip archive: + ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. + + .. versionadded:: 1.5.0 + Added support for `.tar` files. - {storage_options} + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more + details, and for more examples on storage options refer `here + `_. Returns ------- @@ -3634,7 +3784,7 @@ def to_xml( >>> df.to_xml( - ... namespaces={{"doc": "https://example.com"}}, prefix="doc" + ... namespaces={"doc": "https://example.com"}, prefix="doc" ... ) # doctest: +SKIP @@ -3658,7 +3808,6 @@ def to_xml( """ - from pandas.io.formats.xml import ( EtreeXMLFormatter, LxmlXMLFormatter, @@ -3760,7 +3909,6 @@ def to_iceberg( ) # ---------------------------------------------------------------------- - @doc(INFO_DOCSTRING, **frame_sub_kwargs) def info( self, verbose: bool | None = None, @@ -3769,6 +3917,149 @@ def info( memory_usage: bool | str | None = None, show_counts: bool | None = None, ) -> None: + """ + Print a concise summary of a DataFrame. + + This method prints information about a DataFrame including + the index dtype and columns, non-NA values and memory usage. + + Parameters + ---------- + verbose : bool, optional + Whether to print the full summary. By default, the setting in + ``pandas.options.display.max_info_columns`` is followed. + buf : writable buffer, defaults to sys.stdout + Where to send the output. By default, the output is printed to + sys.stdout. Pass a writable buffer if you need to further process + the output. + max_cols : int, optional + When to switch from the verbose to the truncated output. If the + DataFrame has more than `max_cols` columns, the truncated output + is used. By default, the setting in + ``pandas.options.display.max_info_columns`` is used. + memory_usage : bool, str, optional + Specifies whether total memory usage of the DataFrame + elements (including the index) should be displayed. By default, + this follows the ``pandas.options.display.memory_usage`` setting. + + True always show memory usage. False never shows memory usage. + A value of 'deep' is equivalent to "True with deep introspection". + Memory usage is shown in human-readable units (base-2 + representation). Without deep introspection a memory estimation is + made based in column dtype and number of rows assuming values + consume the same memory amount for corresponding dtypes. With deep + memory introspection, a real memory usage calculation is performed + at the cost of computational resources. See the + :ref:`Frequently Asked Questions ` for more + details. + show_counts : bool, optional + Whether to show the non-null counts. By default, this is shown + only if the DataFrame is smaller than + ``pandas.options.display.max_info_rows`` and + ``pandas.options.display.max_info_columns``. A value of True always + shows the counts, and False never shows the counts. + + Returns + ------- + None + This method prints a summary of a DataFrame and returns None. + + See Also + -------- + DataFrame.describe: Generate descriptive statistics of DataFrame + columns. + DataFrame.memory_usage: Memory usage of DataFrame columns. + + Examples + -------- + >>> int_values = [1, 2, 3, 4, 5] + >>> text_values = ["alpha", "beta", "gamma", "delta", "epsilon"] + >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0] + >>> df = pd.DataFrame( + ... { + ... "int_col": int_values, + ... "text_col": text_values, + ... "float_col": float_values, + ... } + ... ) + >>> df + int_col text_col float_col + 0 1 alpha 0.00 + 1 2 beta 0.25 + 2 3 gamma 0.50 + 3 4 delta 0.75 + 4 5 epsilon 1.00 + + Prints information of all columns: + + >>> df.info(verbose=True) + + RangeIndex: 5 entries, 0 to 4 + Data columns (total 3 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 int_col 5 non-null int64 + 1 text_col 5 non-null object + 2 float_col 5 non-null float64 + dtypes: float64(1), int64(1), object(1) + memory usage: 248.0+ bytes + + Prints a summary of columns count and its dtypes but not per column + information: + + >>> df.info(verbose=False) + + RangeIndex: 5 entries, 0 to 4 + Columns: 3 entries, int_col to float_col + dtypes: float64(1), int64(1), object(1) + memory usage: 248.0+ bytes + + Pipe output of DataFrame.info to buffer instead of sys.stdout, get + buffer content and writes to a text file: + + >>> import io + >>> buffer = io.StringIO() + >>> df.info(buf=buffer) + >>> s = buffer.getvalue() + >>> with open("df_info.txt", "w", encoding="utf-8") as f: # doctest: +SKIP + ... f.write(s) + 260 + + The `memory_usage` parameter allows deep introspection mode, specially + useful for big DataFrames and fine-tune memory optimization: + + >>> random_strings_array = np.random.choice(["a", "b", "c"], 10**6) + >>> df = pd.DataFrame( + ... { + ... "column_1": np.random.choice(["a", "b", "c"], 10**6), + ... "column_2": np.random.choice(["a", "b", "c"], 10**6), + ... "column_3": np.random.choice(["a", "b", "c"], 10**6), + ... } + ... ) + >>> df.info() + + RangeIndex: 1000000 entries, 0 to 999999 + Data columns (total 3 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 column_1 1000000 non-null object + 1 column_2 1000000 non-null object + 2 column_3 1000000 non-null object + dtypes: object(3) + memory usage: 22.9+ MB + + >>> df.info(memory_usage="deep") + + RangeIndex: 1000000 entries, 0 to 999999 + Data columns (total 3 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 column_1 1000000 non-null object + 1 column_2 1000000 non-null object + 2 column_3 1000000 non-null object + dtypes: object(3) + memory usage: 165.9 MB + """ info = DataFrameInfo( data=self, memory_usage=memory_usage, @@ -5582,37 +5873,6 @@ def _reindex_multi(self, axes: dict[str, Index], fill_value) -> DataFrame: fill_value=fill_value, ) - @Appender( - """ - Examples - -------- - >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - - Change the row labels. - - >>> df.set_axis(['a', 'b', 'c'], axis='index') - A B - a 1 4 - b 2 5 - c 3 6 - - Change the column labels. - - >>> df.set_axis(['I', 'II'], axis='columns') - I II - 0 1 4 - 1 2 5 - 2 3 6 - """ - ) - @Substitution( - klass=_shared_doc_kwargs["klass"], - axes_single_arg=_shared_doc_kwargs["axes_single_arg"], - extended_summary_sub=" column or", - axis_description_sub=", and 1 identifies the columns", - see_also_sub=" or columns", - ) - @Appender(NDFrame.set_axis.__doc__) def set_axis( self, labels, @@ -5620,13 +5880,65 @@ def set_axis( axis: Axis = 0, copy: bool | lib.NoDefault = lib.no_default, ) -> DataFrame: + """ + Assign desired index to given axis. + + Indexes for column or row labels can be changed by assigning + a list-like or Index. + + Parameters + ---------- + labels : list-like, Index + The values for the new index. + + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to update. The value 0 identifies the rows. For `Series` + this parameter is unused and defaults to 0. + + copy : bool, default False + This keyword is now ignored; changing its value will have no + impact on the method. + + .. deprecated:: 3.0.0 + + This keyword is ignored and will be removed in pandas 4.0. Since + pandas 3.0, this method always returns a new object using a lazy + copy mechanism that defers copies until necessary + (Copy-on-Write). See the `user guide on Copy-on-Write + `__ + for more details. + + Returns + ------- + DataFrame + An object of type DataFrame. + + See Also + -------- + DataFrame.rename_axis : Alter the name of the index or columns. + + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + + Change the row labels. + + >>> df.set_axis(["a", "b", "c"], axis="index") + A B + a 1 4 + b 2 5 + c 3 6 + + Change the column labels. + + >>> df.set_axis(["I", "II"], axis="columns") + I II + 0 1 4 + 1 2 5 + 2 3 6 + """ return super().set_axis(labels, axis=axis, copy=copy) - @doc( - NDFrame.reindex, - klass=_shared_doc_kwargs["klass"], - optional_reindex=_shared_doc_kwargs["optional_reindex"], - ) def reindex( self, labels=None, @@ -5641,6 +5953,227 @@ def reindex( limit: int | None = None, tolerance=None, ) -> DataFrame: + """ + Conform DataFrame to new index with optional filling logic. + + Places NA/NaN in locations having no value in the previous index. A new object + is produced unless the new index is equivalent to the current one and + ``copy=False``. + + Parameters + ---------- + + labels : array-like, optional + New labels / index to conform the axis specified by 'axis' to. + index : array-like, optional + New labels for the index. Preferably an Index object to avoid + duplicating data. + columns : array-like, optional + New labels for the columns. Preferably an Index object to avoid + duplicating data. + axis : int or str, optional + Axis to target. Can be either the axis name ('index', 'columns') + or number (0, 1). + method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'} + Method to use for filling holes in reindexed DataFrame. + Please note: this is only applicable to DataFrames/Series with a + monotonically increasing/decreasing index. + + * None (default): don't fill gaps + * pad / ffill: Propagate last valid observation forward to next + valid. + * backfill / bfill: Use next valid observation to fill gap. + * nearest: Use nearest valid observations to fill gap. + + copy : bool, default False + This keyword is now ignored; changing its value will have no + impact on the method. + + .. deprecated:: 3.0.0 + + This keyword is ignored and will be removed in pandas 4.0. Since + pandas 3.0, this method always returns a new object using a lazy + copy mechanism that defers copies until necessary + (Copy-on-Write). See the `user guide on Copy-on-Write + `__ + for more details. + + level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : scalar, default np.nan + Value to use for missing values. Defaults to NaN, but can be any + "compatible" value. + limit : int, default None + Maximum number of consecutive elements to forward or backward fill. + tolerance : optional + Maximum distance between original and new labels for inexact + matches. The values of the index at the matching locations most + satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like includes list, tuple, array, Series, and must be + the same size as the index and its dtype must exactly match the + index's type. + + Returns + ------- + DataFrame + DataFrame with changed index. + + See Also + -------- + DataFrame.set_index : Set row labels. + DataFrame.reset_index : Remove row labels or move them to new columns. + DataFrame.reindex_like : Change to same indices as other DataFrame. + + Examples + -------- + ``DataFrame.reindex`` supports two calling conventions + + * ``(index=index_labels, columns=column_labels, ...)`` + * ``(labels, axis={'index', 'columns'}, ...)`` + + We *highly* recommend using keyword arguments to clarify your + intent. + + Create a DataFrame with some fictional data. + + >>> index = ["Firefox", "Chrome", "Safari", "IE10", "Konqueror"] + >>> columns = ["http_status", "response_time"] + >>> df = pd.DataFrame( + ... [[200, 0.04], [200, 0.02], [404, 0.07], [404, 0.08], [301, 1.0]], + ... columns=columns, + ... index=index, + ... ) + >>> df + http_status response_time + Firefox 200 0.04 + Chrome 200 0.02 + Safari 404 0.07 + IE10 404 0.08 + Konqueror 301 1.00 + + Create a new index and reindex the DataFrame. By default + values in the new index that do not have corresponding + records in the DataFrame are assigned ``NaN``. + + >>> new_index = ["Safari", "Iceweasel", "Comodo Dragon", "IE10", "Chrome"] + >>> df.reindex(new_index) + http_status response_time + Safari 404.0 0.07 + Iceweasel NaN NaN + Comodo Dragon NaN NaN + IE10 404.0 0.08 + Chrome 200.0 0.02 + + We can fill in the missing values by passing a value to + the keyword ``fill_value``. Because the index is not monotonically + increasing or decreasing, we cannot use arguments to the keyword + ``method`` to fill the ``NaN`` values. + + >>> df.reindex(new_index, fill_value=0) + http_status response_time + Safari 404 0.07 + Iceweasel 0 0.00 + Comodo Dragon 0 0.00 + IE10 404 0.08 + Chrome 200 0.02 + + >>> df.reindex(new_index, fill_value="missing") + http_status response_time + Safari 404 0.07 + Iceweasel missing missing + Comodo Dragon missing missing + IE10 404 0.08 + Chrome 200 0.02 + + We can also reindex the columns. + + >>> df.reindex(columns=["http_status", "user_agent"]) + http_status user_agent + Firefox 200 NaN + Chrome 200 NaN + Safari 404 NaN + IE10 404 NaN + Konqueror 301 NaN + + Or we can use "axis-style" keyword arguments + + >>> df.reindex(["http_status", "user_agent"], axis="columns") + http_status user_agent + Firefox 200 NaN + Chrome 200 NaN + Safari 404 NaN + IE10 404 NaN + Konqueror 301 NaN + + To further illustrate the filling functionality in + ``reindex``, we will create a DataFrame with a + monotonically increasing index (for example, a sequence + of dates). + + >>> date_index = pd.date_range("1/1/2010", periods=6, freq="D") + >>> df2 = pd.DataFrame( + ... {"prices": [100, 101, np.nan, 100, 89, 88]}, index=date_index + ... ) + >>> df2 + prices + 2010-01-01 100.0 + 2010-01-02 101.0 + 2010-01-03 NaN + 2010-01-04 100.0 + 2010-01-05 89.0 + 2010-01-06 88.0 + + Suppose we decide to expand the DataFrame to cover a wider + date range. + + >>> date_index2 = pd.date_range("12/29/2009", periods=10, freq="D") + >>> df2.reindex(date_index2) + prices + 2009-12-29 NaN + 2009-12-30 NaN + 2009-12-31 NaN + 2010-01-01 100.0 + 2010-01-02 101.0 + 2010-01-03 NaN + 2010-01-04 100.0 + 2010-01-05 89.0 + 2010-01-06 88.0 + 2010-01-07 NaN + + The index entries that did not have a value in the original data frame + (for example, '2009-12-29') are by default filled with ``NaN``. + If desired, we can fill in the missing values using one of several + options. + + For example, to back-propagate the last valid value to fill the ``NaN`` + values, pass ``bfill`` as an argument to the ``method`` keyword. + + >>> df2.reindex(date_index2, method="bfill") + prices + 2009-12-29 100.0 + 2009-12-30 100.0 + 2009-12-31 100.0 + 2010-01-01 100.0 + 2010-01-02 101.0 + 2010-01-03 NaN + 2010-01-04 100.0 + 2010-01-05 89.0 + 2010-01-06 88.0 + 2010-01-07 NaN + + Please note that the ``NaN`` value present in the original DataFrame + (at index value 2010-01-03) will not be filled by any of the + value propagation schemes. This is because filling while reindexing + does not look at DataFrame values, but only compares the original and + desired indexes. If you do want to fill in the ``NaN`` values present + in the original DataFrame, use the ``fillna()`` method. + + See the :ref:`user guide ` for more. + """ return super().reindex( labels=labels, index=index, @@ -6129,7 +6662,6 @@ def _replace_columnwise( return res if inplace else res.__finalize__(self) - @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"]) def shift( self, periods: int | Sequence[int] = 1, @@ -6138,6 +6670,120 @@ def shift( fill_value: Hashable = lib.no_default, suffix: str | None = None, ) -> DataFrame: + """ + Shift index by desired number of periods with an optional time `freq`. + + When `freq` is not passed, shift the index without realigning the data. + If `freq` is passed (in this case, the index must be date or datetime, + or it will raise a `NotImplementedError`), the index will be + increased using the periods and the `freq`. `freq` can be inferred + when specified as "infer" as long as either freq or inferred_freq + attribute is set in the index. + + Parameters + ---------- + periods : int or Sequence + Number of periods to shift. Can be positive or negative. + If an iterable of ints, the data will be shifted once by each int. + This is equivalent to shifting by one value at a time and + concatenating all resulting frames. The resulting columns will have + the shift suffixed to their column names. For multiple periods, + axis must not be 1. + freq : DateOffset, tseries.offsets, timedelta, or str, optional + Offset to use from the tseries module or time rule (e.g. 'EOM'). + If `freq` is specified then the index values are shifted but the + data is not realigned. That is, use `freq` if you would like to + extend the index when shifting and preserve the original data. + If `freq` is specified as "infer" then it will be inferred from + the freq or inferred_freq attributes of the index. If neither of + those attributes exist, a ValueError is thrown. + axis : {0 or 'index', 1 or 'columns', None}, default None + Shift direction. For `Series` this parameter is unused and defaults to 0. + fill_value : object, optional + The scalar value to use for newly introduced missing values. + the default depends on the dtype of `self`. + For Boolean and numeric NumPy data types, ``np.nan`` is used. + For datetime, timedelta, or period data, etc. :attr:`NaT` is used. + For extension dtypes, ``self.dtype.na_value`` is used. + suffix : str, optional + If str and periods is an iterable, this is added after the column + name and before the shift value for each shifted column name. + For `Series` this parameter is unused and defaults to `None`. + + Returns + ------- + DataFrame + Copy of input object, shifted. + + See Also + -------- + Index.shift : Shift values of Index. + DatetimeIndex.shift : Shift values of DatetimeIndex. + PeriodIndex.shift : Shift values of PeriodIndex. + + Examples + -------- + >>> df = pd.DataFrame( + ... [[10, 13, 17], [20, 23, 27], [15, 18, 22], [30, 33, 37], [45, 48, 52]], + ... columns=["Col1", "Col2", "Col3"], + ... index=pd.date_range("2020-01-01", "2020-01-05"), + ... ) + >>> df + Col1 Col2 Col3 + 2020-01-01 10 13 17 + 2020-01-02 20 23 27 + 2020-01-03 15 18 22 + 2020-01-04 30 33 37 + 2020-01-05 45 48 52 + + >>> df.shift(periods=3) + Col1 Col2 Col3 + 2020-01-01 NaN NaN NaN + 2020-01-02 NaN NaN NaN + 2020-01-03 NaN NaN NaN + 2020-01-04 10.0 13.0 17.0 + 2020-01-05 20.0 23.0 27.0 + + >>> df.shift(periods=1, axis="columns") + Col1 Col2 Col3 + 2020-01-01 NaN 10 13 + 2020-01-02 NaN 20 23 + 2020-01-03 NaN 15 18 + 2020-01-04 NaN 30 33 + 2020-01-05 NaN 45 48 + + >>> df.shift(periods=3, fill_value=0) + Col1 Col2 Col3 + 2020-01-01 0 0 0 + 2020-01-02 0 0 0 + 2020-01-03 0 0 0 + 2020-01-04 10 13 17 + 2020-01-05 20 23 27 + + >>> df.shift(periods=3, freq="D") + Col1 Col2 Col3 + 2020-01-04 10 13 17 + 2020-01-05 20 23 27 + 2020-01-06 15 18 22 + 2020-01-07 30 33 37 + 2020-01-08 45 48 52 + + >>> df.shift(periods=3, freq="infer") + Col1 Col2 Col3 + 2020-01-04 10 13 17 + 2020-01-05 20 23 27 + 2020-01-06 15 18 22 + 2020-01-07 30 33 37 + 2020-01-08 45 48 52 + + >>> df["Col1"].shift(periods=[0, 1, 2]) + Col1_0 Col1_1 Col1_2 + 2020-01-01 10 NaN NaN + 2020-01-02 20 10.0 NaN + 2020-01-03 15 20.0 10.0 + 2020-01-04 30 15.0 20.0 + 2020-01-05 45 30.0 15.0 + """ if freq is not None and fill_value is not lib.no_default: # GH#53832 raise ValueError( @@ -6790,35 +7436,301 @@ class max type # ---------------------------------------------------------------------- # Reindex-based selection methods - @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) def isna(self) -> DataFrame: + """ + Detect missing values. + + Return a boolean same-sized object indicating if the values are NA. + NA values, such as None or :attr:`numpy.NaN`, gets mapped to True + values. + Everything else gets mapped to False values. Characters such as empty + strings ``''`` or :attr:`numpy.inf` are not considered NA values. + + Returns + ------- + Series/DataFrame + Mask of bool values for each element in Series/DataFrame + that indicates whether an element is an NA value. + + See Also + -------- + Series.isnull : Alias of isna. + DataFrame.isnull : Alias of isna. + Series.notna : Boolean inverse of isna. + DataFrame.notna : Boolean inverse of isna. + Series.dropna : Omit axes labels with missing values. + DataFrame.dropna : Omit axes labels with missing values. + isna : Top-level isna. + + Examples + -------- + Show which entries in a DataFrame are NA. + + >>> df = pd.DataFrame( + ... dict( + ... age=[5, 6, np.nan], + ... born=[ + ... pd.NaT, + ... pd.Timestamp("1939-05-27"), + ... pd.Timestamp("1940-04-25"), + ... ], + ... name=["Alfred", "Batman", ""], + ... toy=[None, "Batmobile", "Joker"], + ... ) + ... ) + >>> df + age born name toy + 0 5.0 NaT Alfred NaN + 1 6.0 1939-05-27 Batman Batmobile + 2 NaN 1940-04-25 Joker + + >>> df.isna() + age born name toy + 0 False True False True + 1 False False False False + 2 True False False False + + Show which entries in a Series are NA. + + >>> ser = pd.Series([5, 6, np.nan]) + >>> ser + 0 5.0 + 1 6.0 + 2 NaN + dtype: float64 + + >>> ser.isna() + 0 False + 1 False + 2 True + dtype: bool + """ res_mgr = self._mgr.isna(func=isna) result = self._constructor_from_mgr(res_mgr, axes=res_mgr.axes) return result.__finalize__(self, method="isna") - @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) def isnull(self) -> DataFrame: """ DataFrame.isnull is an alias for DataFrame.isna. + + Detect missing values. + + Return a boolean same-sized object indicating if the values are NA. + NA values, such as None or :attr:`numpy.NaN`, gets mapped to True + values. + Everything else gets mapped to False values. Characters such as empty + strings ``''`` or :attr:`numpy.inf` are not considered NA values. + + Returns + ------- + Series/DataFrame + Mask of bool values for each element in Series/DataFrame + that indicates whether an element is an NA value. + + See Also + -------- + Series.isnull : Alias of isna. + DataFrame.isnull : Alias of isna. + Series.notna : Boolean inverse of isna. + DataFrame.notna : Boolean inverse of isna. + Series.dropna : Omit axes labels with missing values. + DataFrame.dropna : Omit axes labels with missing values. + isna : Top-level isna. + + Examples + -------- + Show which entries in a DataFrame are NA. + + >>> df = pd.DataFrame( + ... dict( + ... age=[5, 6, np.nan], + ... born=[ + ... pd.NaT, + ... pd.Timestamp("1939-05-27"), + ... pd.Timestamp("1940-04-25"), + ... ], + ... name=["Alfred", "Batman", ""], + ... toy=[None, "Batmobile", "Joker"], + ... ) + ... ) + >>> df + age born name toy + 0 5.0 NaT Alfred NaN + 1 6.0 1939-05-27 Batman Batmobile + 2 NaN 1940-04-25 Joker + + >>> df.isna() + age born name toy + 0 False True False True + 1 False False False False + 2 True False False False + + Show which entries in a Series are NA. + + >>> ser = pd.Series([5, 6, np.nan]) + >>> ser + 0 5.0 + 1 6.0 + 2 NaN + dtype: float64 + + >>> ser.isna() + 0 False + 1 False + 2 True + dtype: bool """ return self.isna() - @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) def notna(self) -> DataFrame: - return ~self.isna() - - @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) - def notnull(self) -> DataFrame: """ - DataFrame.notnull is an alias for DataFrame.notna. - """ - return ~self.isna() + Detect existing (non-missing) values. - @overload - def dropna( - self, - *, - axis: Axis = ..., + Return a boolean same-sized object indicating if the values are not NA. + Non-missing values get mapped to True. Characters such as empty + strings ``''`` or :attr:`numpy.inf` are not considered NA values. + NA values, such as None or :attr:`numpy.NaN`, get mapped to False + values. + + Returns + ------- + Series/DataFrame + Mask of bool values for each element in Series/DataFrame + that indicates whether an element is not an NA value. + + See Also + -------- + Series.notnull : Alias of notna. + DataFrame.notnull : Alias of notna. + Series.isna : Boolean inverse of notna. + DataFrame.isna : Boolean inverse of notna. + Series.dropna : Omit axes labels with missing values. + DataFrame.dropna : Omit axes labels with missing values. + notna : Top-level notna. + + Examples + -------- + Show which entries in a DataFrame are not NA. + + >>> df = pd.DataFrame( + ... dict( + ... age=[5, 6, np.nan], + ... born=[ + ... pd.NaT, + ... pd.Timestamp("1939-05-27"), + ... pd.Timestamp("1940-04-25"), + ... ], + ... name=["Alfred", "Batman", ""], + ... toy=[None, "Batmobile", "Joker"], + ... ) + ... ) + >>> df + age born name toy + 0 5.0 NaT Alfred NaN + 1 6.0 1939-05-27 Batman Batmobile + 2 NaN 1940-04-25 Joker + + >>> df.notna() + age born name toy + 0 True False True False + 1 True True True True + 2 False True True True + + Show which entries in a Series are not NA. + + >>> ser = pd.Series([5, 6, np.nan]) + >>> ser + 0 5.0 + 1 6.0 + 2 NaN + dtype: float64 + + >>> ser.notna() + 0 True + 1 True + 2 False + dtype: bool + """ + return ~self.isna() + + def notnull(self) -> DataFrame: + """ + DataFrame.notnull is an alias for DataFrame.notna. + + Detect existing (non-missing) values. + + Return a boolean same-sized object indicating if the values are not NA. + Non-missing values get mapped to True. Characters such as empty + strings ``''`` or :attr:`numpy.inf` are not considered NA values. + NA values, such as None or :attr:`numpy.NaN`, get mapped to False + values. + + Returns + ------- + Series/DataFrame + Mask of bool values for each element in Series/DataFrame + that indicates whether an element is not an NA value. + + See Also + -------- + Series.notnull : Alias of notna. + DataFrame.notnull : Alias of notna. + Series.isna : Boolean inverse of notna. + DataFrame.isna : Boolean inverse of notna. + Series.dropna : Omit axes labels with missing values. + DataFrame.dropna : Omit axes labels with missing values. + notna : Top-level notna. + + Examples + -------- + Show which entries in a DataFrame are not NA. + + >>> df = pd.DataFrame( + ... dict( + ... age=[5, 6, np.nan], + ... born=[ + ... pd.NaT, + ... pd.Timestamp("1939-05-27"), + ... pd.Timestamp("1940-04-25"), + ... ], + ... name=["Alfred", "Batman", ""], + ... toy=[None, "Batmobile", "Joker"], + ... ) + ... ) + >>> df + age born name toy + 0 5.0 NaT Alfred NaN + 1 6.0 1939-05-27 Batman Batmobile + 2 NaN 1940-04-25 Joker + + >>> df.notna() + age born name toy + 0 True False True False + 1 True True True True + 2 False True True True + + Show which entries in a Series are not NA. + + >>> ser = pd.Series([5, 6, np.nan]) + >>> ser + 0 5.0 + 1 6.0 + 2 NaN + dtype: float64 + + >>> ser.notna() + 0 True + 1 True + 2 False + dtype: bool + """ + return ~self.isna() + + @overload + def dropna( + self, + *, + axis: Axis = ..., how: AnyAll | lib.NoDefault = ..., thresh: int | lib.NoDefault = ..., subset: IndexLabel = ..., @@ -8743,217 +9655,3849 @@ def _flex_cmp_method(self, other, op, *, axis: Axis = "columns", level=None): new_data = self._dispatch_frame_op(other, op, axis=axis) return self._construct_result(new_data, other=other) - @Appender(ops.make_flex_doc("eq", "dataframe")) def eq(self, other, axis: Axis = "columns", level=None) -> DataFrame: - return self._flex_cmp_method(other, operator.eq, axis=axis, level=level) - - @Appender(ops.make_flex_doc("ne", "dataframe")) - def ne(self, other, axis: Axis = "columns", level=None) -> DataFrame: - return self._flex_cmp_method(other, operator.ne, axis=axis, level=level) - - @Appender(ops.make_flex_doc("le", "dataframe")) - def le(self, other, axis: Axis = "columns", level=None) -> DataFrame: - return self._flex_cmp_method(other, operator.le, axis=axis, level=level) - - @Appender(ops.make_flex_doc("lt", "dataframe")) - def lt(self, other, axis: Axis = "columns", level=None) -> DataFrame: - return self._flex_cmp_method(other, operator.lt, axis=axis, level=level) + """ + Get Not equal to of dataframe and other, element-wise (binary operator `eq`). - @Appender(ops.make_flex_doc("ge", "dataframe")) - def ge(self, other, axis: Axis = "columns", level=None) -> DataFrame: - return self._flex_cmp_method(other, operator.ge, axis=axis, level=level) + Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison + operators. - @Appender(ops.make_flex_doc("gt", "dataframe")) - def gt(self, other, axis: Axis = "columns", level=None) -> DataFrame: - return self._flex_cmp_method(other, operator.gt, axis=axis, level=level) + Equivalent to `==`, `!=`, `<=`, `<`, `>=`, `>` with support to choose axis + (rows or columns) and level for comparison. - @Appender(ops.make_flex_doc("add", "dataframe")) - def add( - self, other, axis: Axis = "columns", level=None, fill_value=None - ) -> DataFrame: - return self._flex_arith_method( - other, operator.add, level=level, fill_value=fill_value, axis=axis - ) + Parameters + ---------- + other : scalar, sequence, Series, or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'}, default 'columns' + Whether to compare by the index (0 or 'index') or columns + (1 or 'columns'). + level : int or label + Broadcast across a level, matching Index values on the passed + MultiIndex level. - @Appender(ops.make_flex_doc("radd", "dataframe")) - def radd( - self, other, axis: Axis = "columns", level=None, fill_value=None - ) -> DataFrame: - return self._flex_arith_method( - other, roperator.radd, level=level, fill_value=fill_value, axis=axis - ) + Returns + ------- + DataFrame of bool + Result of the comparison. - @Appender(ops.make_flex_doc("sub", "dataframe")) - def sub( - self, other, axis: Axis = "columns", level=None, fill_value=None - ) -> DataFrame: - return self._flex_arith_method( - other, operator.sub, level=level, fill_value=fill_value, axis=axis - ) + See Also + -------- + DataFrame.eq : Compare DataFrames for equality elementwise. + DataFrame.ne : Compare DataFrames for inequality elementwise. + DataFrame.le : Compare DataFrames for less than inequality + or equality elementwise. + DataFrame.lt : Compare DataFrames for strictly less than + inequality elementwise. + DataFrame.ge : Compare DataFrames for greater than inequality + or equality elementwise. + DataFrame.gt : Compare DataFrames for strictly greater than + inequality elementwise. - subtract = sub + Notes + ----- + Mismatched indices will be unioned together. + `NaN` values are considered different (i.e. `NaN` != `NaN`). - @Appender(ops.make_flex_doc("rsub", "dataframe")) - def rsub( - self, other, axis: Axis = "columns", level=None, fill_value=None - ) -> DataFrame: - return self._flex_arith_method( - other, roperator.rsub, level=level, fill_value=fill_value, axis=axis - ) + Examples + -------- + >>> df = pd.DataFrame( + ... {"cost": [250, 150, 100], "revenue": [100, 250, 300]}, + ... index=["A", "B", "C"], + ... ) + >>> df + cost revenue + A 250 100 + B 150 250 + C 100 300 + + Comparison with a scalar, using either the operator or method: + + >>> df == 100 + cost revenue + A False True + B False False + C True False + + >>> df.eq(100) + cost revenue + A False True + B False False + C True False + + When `other` is a :class:`Series`, the columns of a DataFrame are aligned + with the index of `other` and broadcast: + + >>> df != pd.Series([100, 250], index=["cost", "revenue"]) + cost revenue + A True True + B True False + C False True + + Use the method to control the broadcast axis: + + >>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis="index") + cost revenue + A True False + B True True + C True True + D True True + + When comparing to an arbitrary sequence, the number of columns must + match the number elements in `other`: + + >>> df == [250, 100] + cost revenue + A True True + B False False + C False False + + Use the method to control the axis: + + >>> df.eq([250, 250, 100], axis="index") + cost revenue + A True False + B False True + C True False + + Compare to a DataFrame of different shape. - @Appender(ops.make_flex_doc("mul", "dataframe")) - def mul( - self, other, axis: Axis = "columns", level=None, fill_value=None - ) -> DataFrame: - return self._flex_arith_method( - other, operator.mul, level=level, fill_value=fill_value, axis=axis - ) + >>> other = pd.DataFrame( + ... {"revenue": [300, 250, 100, 150]}, index=["A", "B", "C", "D"] + ... ) + >>> other + revenue + A 300 + B 250 + C 100 + D 150 + + >>> df.gt(other) + cost revenue + A False False + B False False + C False True + D False False + + Compare to a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "cost": [250, 150, 100, 150, 300, 220], + ... "revenue": [100, 250, 300, 200, 175, 225], + ... }, + ... index=[ + ... ["Q1", "Q1", "Q1", "Q2", "Q2", "Q2"], + ... ["A", "B", "C", "A", "B", "C"], + ... ], + ... ) + >>> df_multindex + cost revenue + Q1 A 250 100 + B 150 250 + C 100 300 + Q2 A 150 200 + B 300 175 + C 220 225 + + >>> df.le(df_multindex, level=1) + cost revenue + Q1 A True True + B True True + C True True + Q2 A False True + B True False + C True False + """ + return self._flex_cmp_method(other, operator.eq, axis=axis, level=level) - multiply = mul + def ne(self, other, axis: Axis = "columns", level=None) -> DataFrame: + """ + Get Not equal to of dataframe and other, element-wise (binary operator `ne`). - @Appender(ops.make_flex_doc("rmul", "dataframe")) - def rmul( - self, other, axis: Axis = "columns", level=None, fill_value=None - ) -> DataFrame: - return self._flex_arith_method( - other, roperator.rmul, level=level, fill_value=fill_value, axis=axis - ) + Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison + operators. - @Appender(ops.make_flex_doc("truediv", "dataframe")) - def truediv( - self, other, axis: Axis = "columns", level=None, fill_value=None - ) -> DataFrame: - return self._flex_arith_method( - other, operator.truediv, level=level, fill_value=fill_value, axis=axis - ) + Equivalent to `==`, `!=`, `<=`, `<`, `>=`, `>` with support to choose axis + (rows or columns) and level for comparison. - div = truediv - divide = truediv + Parameters + ---------- + other : scalar, sequence, Series, or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'}, default 'columns' + Whether to compare by the index (0 or 'index') or columns + (1 or 'columns'). + level : int or label + Broadcast across a level, matching Index values on the passed + MultiIndex level. - @Appender(ops.make_flex_doc("rtruediv", "dataframe")) - def rtruediv( - self, other, axis: Axis = "columns", level=None, fill_value=None - ) -> DataFrame: - return self._flex_arith_method( - other, roperator.rtruediv, level=level, fill_value=fill_value, axis=axis - ) + Returns + ------- + DataFrame of bool + Result of the comparison. - rdiv = rtruediv + See Also + -------- + DataFrame.eq : Compare DataFrames for equality elementwise. + DataFrame.ne : Compare DataFrames for inequality elementwise. + DataFrame.le : Compare DataFrames for less than inequality + or equality elementwise. + DataFrame.lt : Compare DataFrames for strictly less than + inequality elementwise. + DataFrame.ge : Compare DataFrames for greater than inequality + or equality elementwise. + DataFrame.gt : Compare DataFrames for strictly greater than + inequality elementwise. - @Appender(ops.make_flex_doc("floordiv", "dataframe")) - def floordiv( - self, other, axis: Axis = "columns", level=None, fill_value=None - ) -> DataFrame: - return self._flex_arith_method( - other, operator.floordiv, level=level, fill_value=fill_value, axis=axis - ) + Notes + ----- + Mismatched indices will be unioned together. + `NaN` values are considered different (i.e. `NaN` != `NaN`). - @Appender(ops.make_flex_doc("rfloordiv", "dataframe")) - def rfloordiv( - self, other, axis: Axis = "columns", level=None, fill_value=None - ) -> DataFrame: - return self._flex_arith_method( - other, roperator.rfloordiv, level=level, fill_value=fill_value, axis=axis - ) + Examples + -------- + >>> df = pd.DataFrame( + ... {"cost": [250, 150, 100], "revenue": [100, 250, 300]}, + ... index=["A", "B", "C"], + ... ) + >>> df + cost revenue + A 250 100 + B 150 250 + C 100 300 + + Comparison with a scalar, using either the operator or method: + + >>> df == 100 + cost revenue + A False True + B False False + C True False + + >>> df.eq(100) + cost revenue + A False True + B False False + C True False + + When `other` is a :class:`Series`, the columns of a DataFrame are aligned + with the index of `other` and broadcast: + + >>> df != pd.Series([100, 250], index=["cost", "revenue"]) + cost revenue + A True True + B True False + C False True + + Use the method to control the broadcast axis: + + >>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis="index") + cost revenue + A True False + B True True + C True True + D True True + + When comparing to an arbitrary sequence, the number of columns must + match the number elements in `other`: + + >>> df == [250, 100] + cost revenue + A True True + B False False + C False False + + Use the method to control the axis: + + >>> df.eq([250, 250, 100], axis="index") + cost revenue + A True False + B False True + C True False + + Compare to a DataFrame of different shape. - @Appender(ops.make_flex_doc("mod", "dataframe")) - def mod( - self, other, axis: Axis = "columns", level=None, fill_value=None - ) -> DataFrame: - return self._flex_arith_method( - other, operator.mod, level=level, fill_value=fill_value, axis=axis - ) + >>> other = pd.DataFrame( + ... {"revenue": [300, 250, 100, 150]}, index=["A", "B", "C", "D"] + ... ) + >>> other + revenue + A 300 + B 250 + C 100 + D 150 + + >>> df.gt(other) + cost revenue + A False False + B False False + C False True + D False False + + Compare to a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "cost": [250, 150, 100, 150, 300, 220], + ... "revenue": [100, 250, 300, 200, 175, 225], + ... }, + ... index=[ + ... ["Q1", "Q1", "Q1", "Q2", "Q2", "Q2"], + ... ["A", "B", "C", "A", "B", "C"], + ... ], + ... ) + >>> df_multindex + cost revenue + Q1 A 250 100 + B 150 250 + C 100 300 + Q2 A 150 200 + B 300 175 + C 220 225 + + >>> df.le(df_multindex, level=1) + cost revenue + Q1 A True True + B True True + C True True + Q2 A False True + B True False + C True False + """ + return self._flex_cmp_method(other, operator.ne, axis=axis, level=level) - @Appender(ops.make_flex_doc("rmod", "dataframe")) - def rmod( - self, other, axis: Axis = "columns", level=None, fill_value=None - ) -> DataFrame: - return self._flex_arith_method( - other, roperator.rmod, level=level, fill_value=fill_value, axis=axis - ) + def le(self, other, axis: Axis = "columns", level=None) -> DataFrame: + """ + Get Greater than or equal to of dataframe and other, + element-wise (binary operator `le`). - @Appender(ops.make_flex_doc("pow", "dataframe")) - def pow( - self, other, axis: Axis = "columns", level=None, fill_value=None - ) -> DataFrame: - return self._flex_arith_method( - other, operator.pow, level=level, fill_value=fill_value, axis=axis - ) + Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison + operators. - @Appender(ops.make_flex_doc("rpow", "dataframe")) - def rpow( - self, other, axis: Axis = "columns", level=None, fill_value=None - ) -> DataFrame: - return self._flex_arith_method( - other, roperator.rpow, level=level, fill_value=fill_value, axis=axis - ) + Equivalent to `==`, `!=`, `<=`, `<`, `>=`, `>` with support to choose axis + (rows or columns) and level for comparison. - # ---------------------------------------------------------------------- - # Combination-Related + Parameters + ---------- + other : scalar, sequence, Series, or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'}, default 'columns' + Whether to compare by the index (0 or 'index') or columns + (1 or 'columns'). + level : int or label + Broadcast across a level, matching Index values on the passed + MultiIndex level. - @doc( - _shared_docs["compare"], - dedent( - """ Returns ------- - DataFrame - DataFrame that shows the differences stacked side by side. - - The resulting index will be a MultiIndex with 'self' and 'other' - stacked alternately at the inner level. - - Raises - ------ - ValueError - When the two DataFrames don't have identical labels or shape. + DataFrame of bool + Result of the comparison. See Also -------- - Series.compare : Compare with another Series and show differences. - DataFrame.equals : Test whether two objects contain the same elements. + DataFrame.eq : Compare DataFrames for equality elementwise. + DataFrame.ne : Compare DataFrames for inequality elementwise. + DataFrame.le : Compare DataFrames for less than inequality + or equality elementwise. + DataFrame.lt : Compare DataFrames for strictly less than + inequality elementwise. + DataFrame.ge : Compare DataFrames for greater than inequality + or equality elementwise. + DataFrame.gt : Compare DataFrames for strictly greater than + inequality elementwise. Notes ----- - Matching NaNs will not appear as a difference. - - Can only compare identically-labeled - (i.e. same shape, identical row and column labels) DataFrames + Mismatched indices will be unioned together. + `NaN` values are considered different (i.e. `NaN` != `NaN`). Examples -------- >>> df = pd.DataFrame( - ... {{ - ... "col1": ["a", "a", "b", "b", "a"], - ... "col2": [1.0, 2.0, 3.0, np.nan, 5.0], - ... "col3": [1.0, 2.0, 3.0, 4.0, 5.0] - ... }}, - ... columns=["col1", "col2", "col3"], + ... {"cost": [250, 150, 100], "revenue": [100, 250, 300]}, + ... index=["A", "B", "C"], ... ) >>> df - col1 col2 col3 - 0 a 1.0 1.0 - 1 a 2.0 2.0 - 2 b 3.0 3.0 - 3 b NaN 4.0 - 4 a 5.0 5.0 - - >>> df2 = df.copy() - >>> df2.loc[0, 'col1'] = 'c' - >>> df2.loc[2, 'col3'] = 4.0 - >>> df2 - col1 col2 col3 - 0 c 1.0 1.0 - 1 a 2.0 2.0 - 2 b 3.0 4.0 - 3 b NaN 4.0 - 4 a 5.0 5.0 - - Align the differences on columns + cost revenue + A 250 100 + B 150 250 + C 100 300 + + Comparison with a scalar, using either the operator or method: + + >>> df == 100 + cost revenue + A False True + B False False + C True False + + >>> df.eq(100) + cost revenue + A False True + B False False + C True False + + When `other` is a :class:`Series`, the columns of a DataFrame are aligned + with the index of `other` and broadcast: + + >>> df != pd.Series([100, 250], index=["cost", "revenue"]) + cost revenue + A True True + B True False + C False True + + Use the method to control the broadcast axis: + + >>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis="index") + cost revenue + A True False + B True True + C True True + D True True + + When comparing to an arbitrary sequence, the number of columns must + match the number elements in `other`: + + >>> df == [250, 100] + cost revenue + A True True + B False False + C False False + + Use the method to control the axis: + + >>> df.eq([250, 250, 100], axis="index") + cost revenue + A True False + B False True + C True False + + Compare to a DataFrame of different shape. - >>> df.compare(df2) - col1 col3 - self other self other + >>> other = pd.DataFrame( + ... {"revenue": [300, 250, 100, 150]}, index=["A", "B", "C", "D"] + ... ) + >>> other + revenue + A 300 + B 250 + C 100 + D 150 + + >>> df.gt(other) + cost revenue + A False False + B False False + C False True + D False False + + Compare to a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "cost": [250, 150, 100, 150, 300, 220], + ... "revenue": [100, 250, 300, 200, 175, 225], + ... }, + ... index=[ + ... ["Q1", "Q1", "Q1", "Q2", "Q2", "Q2"], + ... ["A", "B", "C", "A", "B", "C"], + ... ], + ... ) + >>> df_multindex + cost revenue + Q1 A 250 100 + B 150 250 + C 100 300 + Q2 A 150 200 + B 300 175 + C 220 225 + + >>> df.le(df_multindex, level=1) + cost revenue + Q1 A True True + B True True + C True True + Q2 A False True + B True False + C True False + """ + return self._flex_cmp_method(other, operator.le, axis=axis, level=level) + + def lt(self, other, axis: Axis = "columns", level=None) -> DataFrame: + """ + Get Greater than of dataframe and other, element-wise (binary operator `lt`). + + Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison + operators. + + Equivalent to `==`, `!=`, `<=`, `<`, `>=`, `>` with support to choose axis + (rows or columns) and level for comparison. + + Parameters + ---------- + other : scalar, sequence, Series, or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'}, default 'columns' + Whether to compare by the index (0 or 'index') or columns + (1 or 'columns'). + level : int or label + Broadcast across a level, matching Index values on the passed + MultiIndex level. + + Returns + ------- + DataFrame of bool + Result of the comparison. + + See Also + -------- + DataFrame.eq : Compare DataFrames for equality elementwise. + DataFrame.ne : Compare DataFrames for inequality elementwise. + DataFrame.le : Compare DataFrames for less than inequality + or equality elementwise. + DataFrame.lt : Compare DataFrames for strictly less than + inequality elementwise. + DataFrame.ge : Compare DataFrames for greater than inequality + or equality elementwise. + DataFrame.gt : Compare DataFrames for strictly greater than + inequality elementwise. + + Notes + ----- + Mismatched indices will be unioned together. + `NaN` values are considered different (i.e. `NaN` != `NaN`). + + Examples + -------- + >>> df = pd.DataFrame( + ... {"cost": [250, 150, 100], "revenue": [100, 250, 300]}, + ... index=["A", "B", "C"], + ... ) + >>> df + cost revenue + A 250 100 + B 150 250 + C 100 300 + + Comparison with a scalar, using either the operator or method: + + >>> df == 100 + cost revenue + A False True + B False False + C True False + + >>> df.eq(100) + cost revenue + A False True + B False False + C True False + + When `other` is a :class:`Series`, the columns of a DataFrame are aligned + with the index of `other` and broadcast: + + >>> df != pd.Series([100, 250], index=["cost", "revenue"]) + cost revenue + A True True + B True False + C False True + + Use the method to control the broadcast axis: + + >>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis="index") + cost revenue + A True False + B True True + C True True + D True True + + When comparing to an arbitrary sequence, the number of columns must + match the number elements in `other`: + + >>> df == [250, 100] + cost revenue + A True True + B False False + C False False + + Use the method to control the axis: + + >>> df.eq([250, 250, 100], axis="index") + cost revenue + A True False + B False True + C True False + + Compare to a DataFrame of different shape. + + >>> other = pd.DataFrame( + ... {"revenue": [300, 250, 100, 150]}, index=["A", "B", "C", "D"] + ... ) + >>> other + revenue + A 300 + B 250 + C 100 + D 150 + + >>> df.gt(other) + cost revenue + A False False + B False False + C False True + D False False + + Compare to a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "cost": [250, 150, 100, 150, 300, 220], + ... "revenue": [100, 250, 300, 200, 175, 225], + ... }, + ... index=[ + ... ["Q1", "Q1", "Q1", "Q2", "Q2", "Q2"], + ... ["A", "B", "C", "A", "B", "C"], + ... ], + ... ) + >>> df_multindex + cost revenue + Q1 A 250 100 + B 150 250 + C 100 300 + Q2 A 150 200 + B 300 175 + C 220 225 + + >>> df.le(df_multindex, level=1) + cost revenue + Q1 A True True + B True True + C True True + Q2 A False True + B True False + C True False + """ + return self._flex_cmp_method(other, operator.lt, axis=axis, level=level) + + def ge(self, other, axis: Axis = "columns", level=None) -> DataFrame: + """ + Get Greater than or equal to of dataframe and other, + element-wise (binary operator `ge`). + + Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison + operators. + + Equivalent to `==`, `!=`, `<=`, `<`, `>=`, `>` with support to choose axis + (rows or columns) and level for comparison. + + Parameters + ---------- + other : scalar, sequence, Series, or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'}, default 'columns' + Whether to compare by the index (0 or 'index') or columns + (1 or 'columns'). + level : int or label + Broadcast across a level, matching Index values on the passed + MultiIndex level. + + Returns + ------- + DataFrame of bool + Result of the comparison. + + See Also + -------- + DataFrame.eq : Compare DataFrames for equality elementwise. + DataFrame.ne : Compare DataFrames for inequality elementwise. + DataFrame.le : Compare DataFrames for less than inequality + or equality elementwise. + DataFrame.lt : Compare DataFrames for strictly less than + inequality elementwise. + DataFrame.ge : Compare DataFrames for greater than inequality + or equality elementwise. + DataFrame.gt : Compare DataFrames for strictly greater than + inequality elementwise. + + Notes + ----- + Mismatched indices will be unioned together. + `NaN` values are considered different (i.e. `NaN` != `NaN`). + + Examples + -------- + >>> df = pd.DataFrame( + ... {"cost": [250, 150, 100], "revenue": [100, 250, 300]}, + ... index=["A", "B", "C"], + ... ) + >>> df + cost revenue + A 250 100 + B 150 250 + C 100 300 + + Comparison with a scalar, using either the operator or method: + + >>> df == 100 + cost revenue + A False True + B False False + C True False + + >>> df.eq(100) + cost revenue + A False True + B False False + C True False + + When `other` is a :class:`Series`, the columns of a DataFrame are aligned + with the index of `other` and broadcast: + + >>> df != pd.Series([100, 250], index=["cost", "revenue"]) + cost revenue + A True True + B True False + C False True + + Use the method to control the broadcast axis: + + >>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis="index") + cost revenue + A True False + B True True + C True True + D True True + + When comparing to an arbitrary sequence, the number of columns must + match the number elements in `other`: + + >>> df == [250, 100] + cost revenue + A True True + B False False + C False False + + Use the method to control the axis: + + >>> df.eq([250, 250, 100], axis="index") + cost revenue + A True False + B False True + C True False + + Compare to a DataFrame of different shape. + + >>> other = pd.DataFrame( + ... {"revenue": [300, 250, 100, 150]}, index=["A", "B", "C", "D"] + ... ) + >>> other + revenue + A 300 + B 250 + C 100 + D 150 + + >>> df.gt(other) + cost revenue + A False False + B False False + C False True + D False False + + Compare to a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "cost": [250, 150, 100, 150, 300, 220], + ... "revenue": [100, 250, 300, 200, 175, 225], + ... }, + ... index=[ + ... ["Q1", "Q1", "Q1", "Q2", "Q2", "Q2"], + ... ["A", "B", "C", "A", "B", "C"], + ... ], + ... ) + >>> df_multindex + cost revenue + Q1 A 250 100 + B 150 250 + C 100 300 + Q2 A 150 200 + B 300 175 + C 220 225 + + >>> df.le(df_multindex, level=1) + cost revenue + Q1 A True True + B True True + C True True + Q2 A False True + B True False + C True False + """ + return self._flex_cmp_method(other, operator.ge, axis=axis, level=level) + + def gt(self, other, axis: Axis = "columns", level=None) -> DataFrame: + """ + Get Greater than of dataframe and other, element-wise (binary operator `gt`). + + Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison + operators. + + Equivalent to `==`, `!=`, `<=`, `<`, `>=`, `>` with support to choose axis + (rows or columns) and level for comparison. + + Parameters + ---------- + other : scalar, sequence, Series, or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'}, default 'columns' + Whether to compare by the index (0 or 'index') or columns + (1 or 'columns'). + level : int or label + Broadcast across a level, matching Index values on the passed + MultiIndex level. + + Returns + ------- + DataFrame of bool + Result of the comparison. + + See Also + -------- + DataFrame.eq : Compare DataFrames for equality elementwise. + DataFrame.ne : Compare DataFrames for inequality elementwise. + DataFrame.le : Compare DataFrames for less than inequality + or equality elementwise. + DataFrame.lt : Compare DataFrames for strictly less than + inequality elementwise. + DataFrame.ge : Compare DataFrames for greater than inequality + or equality elementwise. + DataFrame.gt : Compare DataFrames for strictly greater than + inequality elementwise. + + Notes + ----- + Mismatched indices will be unioned together. + `NaN` values are considered different (i.e. `NaN` != `NaN`). + + Examples + -------- + >>> df = pd.DataFrame( + ... {"cost": [250, 150, 100], "revenue": [100, 250, 300]}, + ... index=["A", "B", "C"], + ... ) + >>> df + cost revenue + A 250 100 + B 150 250 + C 100 300 + + Comparison with a scalar, using either the operator or method: + + >>> df == 100 + cost revenue + A False True + B False False + C True False + + >>> df.eq(100) + cost revenue + A False True + B False False + C True False + + When `other` is a :class:`Series`, the columns of a DataFrame are aligned + with the index of `other` and broadcast: + + >>> df != pd.Series([100, 250], index=["cost", "revenue"]) + cost revenue + A True True + B True False + C False True + + Use the method to control the broadcast axis: + + >>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis="index") + cost revenue + A True False + B True True + C True True + D True True + + When comparing to an arbitrary sequence, the number of columns must + match the number elements in `other`: + + >>> df == [250, 100] + cost revenue + A True True + B False False + C False False + + Use the method to control the axis: + + >>> df.eq([250, 250, 100], axis="index") + cost revenue + A True False + B False True + C True False + + Compare to a DataFrame of different shape. + + >>> other = pd.DataFrame( + ... {"revenue": [300, 250, 100, 150]}, index=["A", "B", "C", "D"] + ... ) + >>> other + revenue + A 300 + B 250 + C 100 + D 150 + + >>> df.gt(other) + cost revenue + A False False + B False False + C False True + D False False + + Compare to a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "cost": [250, 150, 100, 150, 300, 220], + ... "revenue": [100, 250, 300, 200, 175, 225], + ... }, + ... index=[ + ... ["Q1", "Q1", "Q1", "Q2", "Q2", "Q2"], + ... ["A", "B", "C", "A", "B", "C"], + ... ], + ... ) + >>> df_multindex + cost revenue + Q1 A 250 100 + B 150 250 + C 100 300 + Q2 A 150 200 + B 300 175 + C 220 225 + + >>> df.le(df_multindex, level=1) + cost revenue + Q1 A True True + B True True + C True True + Q2 A False True + B True False + C True False + """ + return self._flex_cmp_method(other, operator.gt, axis=axis, level=level) + + def add( + self, other, axis: Axis = "columns", level=None, fill_value=None + ) -> DataFrame: + """ + Get Addition of dataframe and other, element-wise (binary operator `add`). + + Equivalent to ``dataframe + other``, but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `radd`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ + return self._flex_arith_method( + other, operator.add, level=level, fill_value=fill_value, axis=axis + ) + + def radd( + self, other, axis: Axis = "columns", level=None, fill_value=None + ) -> DataFrame: + """ + Get Addition of dataframe and other, element-wise (binary operator `radd`). + + Equivalent to ``other + dataframe``, but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `add`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ + return self._flex_arith_method( + other, roperator.radd, level=level, fill_value=fill_value, axis=axis + ) + + def sub( + self, other, axis: Axis = "columns", level=None, fill_value=None + ) -> DataFrame: + """ + Get Subtraction of dataframe and other, element-wise (binary operator `sub`). + + Equivalent to ``dataframe - other``, but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `rsub`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ + return self._flex_arith_method( + other, operator.sub, level=level, fill_value=fill_value, axis=axis + ) + + subtract = sub + + def rsub( + self, other, axis: Axis = "columns", level=None, fill_value=None + ) -> DataFrame: + """ + Get Subtraction of dataframe and other, + element-wise (binary operator `rsub`). + + Equivalent to ``other - dataframe``, but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `sub`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ + return self._flex_arith_method( + other, roperator.rsub, level=level, fill_value=fill_value, axis=axis + ) + + def mul( + self, other, axis: Axis = "columns", level=None, fill_value=None + ) -> DataFrame: + """ + Get Multiplication of dataframe and other, + element-wise (binary operator `mul`). + + Equivalent to ``dataframe * other``, but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `rmul`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ + return self._flex_arith_method( + other, operator.mul, level=level, fill_value=fill_value, axis=axis + ) + + multiply = mul + + def rmul( + self, other, axis: Axis = "columns", level=None, fill_value=None + ) -> DataFrame: + """ + Get Multiplication of dataframe and other, + element-wise (binary operator `rmul`). + + Equivalent to ``other * dataframe``, but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `mul`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ + return self._flex_arith_method( + other, roperator.rmul, level=level, fill_value=fill_value, axis=axis + ) + + def truediv( + self, other, axis: Axis = "columns", level=None, fill_value=None + ) -> DataFrame: + """ + Get Floating division of dataframe and other, + element-wise (binary operator `truediv`). + + Equivalent to ``dataframe / other``, but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `rtruediv`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ + return self._flex_arith_method( + other, operator.truediv, level=level, fill_value=fill_value, axis=axis + ) + + div = truediv + divide = truediv + + def rtruediv( + self, other, axis: Axis = "columns", level=None, fill_value=None + ) -> DataFrame: + """ + Get Floating division of dataframe and other, + element-wise (binary operator `rtruediv`). + + Equivalent to ``other / dataframe``, but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `truediv`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ + return self._flex_arith_method( + other, roperator.rtruediv, level=level, fill_value=fill_value, axis=axis + ) + + rdiv = rtruediv + + def floordiv( + self, other, axis: Axis = "columns", level=None, fill_value=None + ) -> DataFrame: + """ + Get Integer division of dataframe and other, + element-wise (binary operator `floordiv`). + + Equivalent to ``dataframe // other``, + but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `rfloordiv`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ + return self._flex_arith_method( + other, operator.floordiv, level=level, fill_value=fill_value, axis=axis + ) + + def rfloordiv( + self, other, axis: Axis = "columns", level=None, fill_value=None + ) -> DataFrame: + """ + Get Integer division of dataframe and other, + element-wise (binary operator `rfloordiv`). + + Equivalent to ``other // dataframe``, + but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `floordiv`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ + return self._flex_arith_method( + other, roperator.rfloordiv, level=level, fill_value=fill_value, axis=axis + ) + + def mod( + self, other, axis: Axis = "columns", level=None, fill_value=None + ) -> DataFrame: + """ + Get Modulo of dataframe and other, element-wise (binary operator `mod`). + + Equivalent to ``dataframe % other``, but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `rmod`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ + return self._flex_arith_method( + other, operator.mod, level=level, fill_value=fill_value, axis=axis + ) + + def rmod( + self, other, axis: Axis = "columns", level=None, fill_value=None + ) -> DataFrame: + """ + Get Modulo of dataframe and other, element-wise (binary operator `rmod`). + + Equivalent to ``other % dataframe``, but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `mod`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ + return self._flex_arith_method( + other, roperator.rmod, level=level, fill_value=fill_value, axis=axis + ) + + def pow( + self, other, axis: Axis = "columns", level=None, fill_value=None + ) -> DataFrame: + """ + Get Exponential power of dataframe and other, + element-wise (binary operator `pow`). + + Equivalent to ``dataframe ** other``, + but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `rpow`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ + return self._flex_arith_method( + other, operator.pow, level=level, fill_value=fill_value, axis=axis + ) + + def rpow( + self, other, axis: Axis = "columns", level=None, fill_value=None + ) -> DataFrame: + """ + Get Exponential power of dataframe and other, + element-wise (binary operator `rpow`). + + Equivalent to ``other ** dataframe``, + but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `pow`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ + return self._flex_arith_method( + other, roperator.rpow, level=level, fill_value=fill_value, axis=axis + ) + + # ---------------------------------------------------------------------- + # Combination-Related + + def compare( + self, + other: DataFrame, + align_axis: Axis = 1, + keep_shape: bool = False, + keep_equal: bool = False, + result_names: Suffixes = ("self", "other"), + ) -> DataFrame: + """ + Compare to another DataFrame and show the differences. + + Parameters + ---------- + other : DataFrame + Object to compare with. + + align_axis : {0 or 'index', 1 or 'columns'}, default 1 + Determine which axis to align the comparison on. + + * 0, or 'index' : Resulting differences are stacked vertically + with rows drawn alternately from self and other. + * 1, or 'columns' : Resulting differences are aligned horizontally + with columns drawn alternately from self and other. + + keep_shape : bool, default False + If true, all rows and columns are kept. + Otherwise, only the ones with different values are kept. + + keep_equal : bool, default False + If true, the result keeps values that are equal. + Otherwise, equal values are shown as NaNs. + + result_names : tuple, default ('self', 'other') + Set the dataframes names in the comparison. + + Returns + ------- + DataFrame + DataFrame that shows the differences stacked side by side. + + The resulting index will be a MultiIndex with 'self' and 'other' + stacked alternately at the inner level. + + Raises + ------ + ValueError + When the two DataFrames don't have identical labels or shape. + + See Also + -------- + Series.compare : Compare with another Series and show differences. + DataFrame.equals : Test whether two objects contain the same elements. + + Notes + ----- + Matching NaNs will not appear as a difference. + + Can only compare identically-labeled + (i.e. same shape, identical row and column labels) DataFrames + + Examples + -------- + >>> df = pd.DataFrame( + ... { + ... "col1": ["a", "a", "b", "b", "a"], + ... "col2": [1.0, 2.0, 3.0, np.nan, 5.0], + ... "col3": [1.0, 2.0, 3.0, 4.0, 5.0], + ... }, + ... columns=["col1", "col2", "col3"], + ... ) + >>> df + col1 col2 col3 + 0 a 1.0 1.0 + 1 a 2.0 2.0 + 2 b 3.0 3.0 + 3 b NaN 4.0 + 4 a 5.0 5.0 + + >>> df2 = df.copy() + >>> df2.loc[0, "col1"] = "c" + >>> df2.loc[2, "col3"] = 4.0 + >>> df2 + col1 col2 col3 + 0 c 1.0 1.0 + 1 a 2.0 2.0 + 2 b 3.0 4.0 + 3 b NaN 4.0 + 4 a 5.0 5.0 + + Align the differences on columns + + >>> df.compare(df2) + col1 col3 + self other self other 0 a c NaN NaN 2 NaN NaN 3.0 4.0 @@ -9004,17 +13548,6 @@ def rpow( 3 b b NaN NaN 4.0 4.0 4 a a 5.0 5.0 5.0 5.0 """ - ), - klass=_shared_doc_kwargs["klass"], - ) - def compare( - self, - other: DataFrame, - align_axis: Axis = 1, - keep_shape: bool = False, - keep_equal: bool = False, - result_names: Suffixes = ("self", "other"), - ) -> DataFrame: return super().compare( other=other, align_axis=align_axis, @@ -9653,47 +14186,185 @@ def groupby( b 12.3 123.0 NaN 12.3 33.0 - When using ``.apply()``, use ``group_keys`` to include or exclude the - group keys. The ``group_keys`` argument defaults to ``True`` (include). + When using ``.apply()``, use ``group_keys`` to include or exclude the + group keys. The ``group_keys`` argument defaults to ``True`` (include). + + >>> df = pd.DataFrame( + ... { + ... "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"], + ... "Max Speed": [380.0, 370.0, 24.0, 26.0], + ... } + ... ) + >>> df.groupby("Animal", group_keys=True)[["Max Speed"]].apply(lambda x: x) + Max Speed + Animal + Falcon 0 380.0 + 1 370.0 + Parrot 2 24.0 + 3 26.0 + + >>> df.groupby("Animal", group_keys=False)[["Max Speed"]].apply(lambda x: x) + Max Speed + 0 380.0 + 1 370.0 + 2 24.0 + 3 26.0 + """ + from pandas.core.groupby.generic import DataFrameGroupBy + + if level is None and by is None: + raise TypeError("You have to supply one of 'by' and 'level'") + + return DataFrameGroupBy( + obj=self, + keys=by, + level=level, + as_index=as_index, + sort=sort, + group_keys=group_keys, + observed=observed, + dropna=dropna, + ) + + _shared_docs["pivot"] = """ + Return reshaped DataFrame organized by given index / column values. + + Reshape data (produce a "pivot" table) based on column values. Uses + unique values from specified `index` / `columns` to form axes of the + resulting DataFrame. This function does not support data + aggregation, multiple values will result in a MultiIndex in the + columns. See the :ref:`User Guide ` for more on reshaping. + + Parameters + ----------%s + columns : Hashable or a sequence of the previous + Column to use to make new frame's columns. + index : Hashable or a sequence of the previous, optional + Column to use to make new frame's index. If not given, uses existing index. + values : Hashable or a sequence of the previous, optional + Column(s) to use for populating new frame's values. If not + specified, all remaining columns will be used and the result will + have hierarchically indexed columns. + + Returns + ------- + DataFrame + Returns reshaped DataFrame. + + Raises + ------ + ValueError: + When there are any `index`, `columns` combinations with multiple + values. `DataFrame.pivot_table` when you need to aggregate. + + See Also + -------- + DataFrame.pivot_table : Generalization of pivot that can handle + duplicate values for one index/column pair. + DataFrame.unstack : Pivot based on the index values instead of a + column. + wide_to_long : Wide panel to long format. Less flexible but more + user-friendly than melt. + + Notes + ----- + For finer-tuned control, see hierarchical indexing documentation along + with the related stack/unstack methods. + + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + >>> df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', + ... 'two'], + ... 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], + ... 'baz': [1, 2, 3, 4, 5, 6], + ... 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) + >>> df + foo bar baz zoo + 0 one A 1 x + 1 one B 2 y + 2 one C 3 z + 3 two A 4 q + 4 two B 5 w + 5 two C 6 t + + >>> df.pivot(index='foo', columns='bar', values='baz') + bar A B C + foo + one 1 2 3 + two 4 5 6 + + >>> df.pivot(index='foo', columns='bar')['baz'] + bar A B C + foo + one 1 2 3 + two 4 5 6 + + >>> df.pivot(index='foo', columns='bar', values=['baz', 'zoo']) + baz zoo + bar A B C A B C + foo + one 1 2 3 x y z + two 4 5 6 q w t + + You could also assign a list of column names or a list of index names. + + >>> df = pd.DataFrame({ + ... "lev1": [1, 1, 1, 2, 2, 2], + ... "lev2": [1, 1, 2, 1, 1, 2], + ... "lev3": [1, 2, 1, 2, 1, 2], + ... "lev4": [1, 2, 3, 4, 5, 6], + ... "values": [0, 1, 2, 3, 4, 5]}) + >>> df + lev1 lev2 lev3 lev4 values + 0 1 1 1 1 0 + 1 1 1 2 2 1 + 2 1 2 1 3 2 + 3 2 1 2 4 3 + 4 2 1 1 5 4 + 5 2 2 2 6 5 + + >>> df.pivot(index="lev1", columns=["lev2", "lev3"], values="values") + lev2 1 2 + lev3 1 2 1 2 + lev1 + 1 0.0 1.0 2.0 NaN + 2 4.0 3.0 NaN 5.0 + + >>> df.pivot(index=["lev1", "lev2"], columns=["lev3"], values="values") + lev3 1 2 + lev1 lev2 + 1 1 0.0 1.0 + 2 2.0 NaN + 2 1 4.0 3.0 + 2 NaN 5.0 - >>> df = pd.DataFrame( - ... { - ... "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"], - ... "Max Speed": [380.0, 370.0, 24.0, 26.0], - ... } - ... ) - >>> df.groupby("Animal", group_keys=True)[["Max Speed"]].apply(lambda x: x) - Max Speed - Animal - Falcon 0 380.0 - 1 370.0 - Parrot 2 24.0 - 3 26.0 + A ValueError is raised if there are any duplicates. - >>> df.groupby("Animal", group_keys=False)[["Max Speed"]].apply(lambda x: x) - Max Speed - 0 380.0 - 1 370.0 - 2 24.0 - 3 26.0 - """ - from pandas.core.groupby.generic import DataFrameGroupBy + >>> df = pd.DataFrame({"foo": ['one', 'one', 'two', 'two'], + ... "bar": ['A', 'A', 'B', 'C'], + ... "baz": [1, 2, 3, 4]}) + >>> df + foo bar baz + 0 one A 1 + 1 one A 2 + 2 two B 3 + 3 two C 4 - if level is None and by is None: - raise TypeError("You have to supply one of 'by' and 'level'") + Notice that the first two rows are the same for our `index` + and `columns` arguments. - return DataFrameGroupBy( - obj=self, - keys=by, - level=level, - as_index=as_index, - sort=sort, - group_keys=group_keys, - observed=observed, - dropna=dropna, - ) + >>> df.pivot(index='foo', columns='bar', values='baz') + Traceback (most recent call last): + ... + ValueError: Index contains duplicate entries, cannot reshape + """ - _shared_docs["pivot"] = """ + def pivot( + self, *, columns, index=lib.no_default, values=lib.no_default + ) -> DataFrame: + """ Return reshaped DataFrame organized by given index / column values. Reshape data (produce a "pivot" table) based on column values. Uses @@ -9703,7 +14374,7 @@ def groupby( columns. See the :ref:`User Guide ` for more on reshaping. Parameters - ----------%s + ---------- columns : Hashable or a sequence of the previous Column to use to make new frame's columns. index : Hashable or a sequence of the previous, optional @@ -9742,11 +14413,14 @@ def groupby( Examples -------- - >>> df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', - ... 'two'], - ... 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], - ... 'baz': [1, 2, 3, 4, 5, 6], - ... 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) + >>> df = pd.DataFrame( + ... { + ... "foo": ["one", "one", "one", "two", "two", "two"], + ... "bar": ["A", "B", "C", "A", "B", "C"], + ... "baz": [1, 2, 3, 4, 5, 6], + ... "zoo": ["x", "y", "z", "q", "w", "t"], + ... } + ... ) >>> df foo bar baz zoo 0 one A 1 x @@ -9756,19 +14430,19 @@ def groupby( 4 two B 5 w 5 two C 6 t - >>> df.pivot(index='foo', columns='bar', values='baz') + >>> df.pivot(index="foo", columns="bar", values="baz") bar A B C foo one 1 2 3 two 4 5 6 - >>> df.pivot(index='foo', columns='bar')['baz'] + >>> df.pivot(index="foo", columns="bar")["baz"] bar A B C foo one 1 2 3 two 4 5 6 - >>> df.pivot(index='foo', columns='bar', values=['baz', 'zoo']) + >>> df.pivot(index="foo", columns="bar", values=["baz", "zoo"]) baz zoo bar A B C A B C foo @@ -9777,12 +14451,15 @@ def groupby( You could also assign a list of column names or a list of index names. - >>> df = pd.DataFrame({ - ... "lev1": [1, 1, 1, 2, 2, 2], - ... "lev2": [1, 1, 2, 1, 1, 2], - ... "lev3": [1, 2, 1, 2, 1, 2], - ... "lev4": [1, 2, 3, 4, 5, 6], - ... "values": [0, 1, 2, 3, 4, 5]}) + >>> df = pd.DataFrame( + ... { + ... "lev1": [1, 1, 1, 2, 2, 2], + ... "lev2": [1, 1, 2, 1, 1, 2], + ... "lev3": [1, 2, 1, 2, 1, 2], + ... "lev4": [1, 2, 3, 4, 5, 6], + ... "values": [0, 1, 2, 3, 4, 5], + ... } + ... ) >>> df lev1 lev2 lev3 lev4 values 0 1 1 1 1 0 @@ -9809,9 +14486,13 @@ def groupby( A ValueError is raised if there are any duplicates. - >>> df = pd.DataFrame({"foo": ['one', 'one', 'two', 'two'], - ... "bar": ['A', 'A', 'B', 'C'], - ... "baz": [1, 2, 3, 4]}) + >>> df = pd.DataFrame( + ... { + ... "foo": ["one", "one", "two", "two"], + ... "bar": ["A", "A", "B", "C"], + ... "baz": [1, 2, 3, 4], + ... } + ... ) >>> df foo bar baz 0 one A 1 @@ -9822,29 +14503,190 @@ def groupby( Notice that the first two rows are the same for our `index` and `columns` arguments. - >>> df.pivot(index='foo', columns='bar', values='baz') + >>> df.pivot(index="foo", columns="bar", values="baz") Traceback (most recent call last): ... ValueError: Index contains duplicate entries, cannot reshape """ - - @Substitution("") - @Appender(_shared_docs["pivot"]) - def pivot( - self, *, columns, index=lib.no_default, values=lib.no_default - ) -> DataFrame: from pandas.core.reshape.pivot import pivot return pivot(self, index=index, columns=columns, values=values) - _shared_docs["pivot_table"] = """ + _shared_docs["pivot_table"] = """ + Create a spreadsheet-style pivot table as a DataFrame. + + The levels in the pivot table will be stored in MultiIndex objects + (hierarchical indexes) on the index and columns of the result DataFrame. + + Parameters + ----------%s + values : list-like or scalar, optional + Column or columns to aggregate. + index : column, Grouper, array, or sequence of the previous + Keys to group by on the pivot table index. If a list is passed, + it can contain any of the other types (except list). If an array is + passed, it must be the same length as the data and will be used in + the same manner as column values. + columns : column, Grouper, array, or sequence of the previous + Keys to group by on the pivot table column. If a list is passed, + it can contain any of the other types (except list). If an array is + passed, it must be the same length as the data and will be used in + the same manner as column values. + aggfunc : function, list of functions, dict, default "mean" + If a list of functions is passed, the resulting pivot table will have + hierarchical columns whose top level are the function names + (inferred from the function objects themselves). + If a dict is passed, the key is column to aggregate and the value is + function or list of functions. If ``margin=True``, aggfunc will be + used to calculate the partial aggregates. + fill_value : scalar, default None + Value to replace missing values with (in the resulting pivot table, + after aggregation). + margins : bool, default False + If ``margins=True``, special ``All`` columns and rows + will be added with partial group aggregates across the categories + on the rows and columns. + dropna : bool, default True + Do not include columns whose entries are all NaN. If True, + + * rows with an NA value in any column will be omitted before computing + margins, + * index/column keys containing NA values will be dropped (see ``dropna`` + parameter in :meth:`DataFrame.groupby`). + + margins_name : str, default 'All' + Name of the row / column that will contain the totals + when margins is True. + observed : bool, default False + This only applies if any of the groupers are Categoricals. + If True: only show observed values for categorical groupers. + If False: show all values for categorical groupers. + + .. versionchanged:: 3.0.0 + + The default value is now ``True``. + + sort : bool, default True + Specifies if the result should be sorted. + + **kwargs : dict + Optional keyword arguments to pass to ``aggfunc``. + + Returns + ------- + DataFrame + An Excel style pivot table. + + See Also + -------- + DataFrame.pivot : Pivot without aggregation that can handle + non-numeric data. + DataFrame.melt: Unpivot a DataFrame from wide to long format, + optionally leaving identifiers set. + wide_to_long : Wide panel to long format. Less flexible but more + user-friendly than melt. + + Notes + ----- + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", + ... "bar", "bar", "bar", "bar"], + ... "B": ["one", "one", "one", "two", "two", + ... "one", "one", "two", "two"], + ... "C": ["small", "large", "large", "small", + ... "small", "large", "small", "small", + ... "large"], + ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], + ... "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]}) + >>> df + A B C D E + 0 foo one small 1 2 + 1 foo one large 2 4 + 2 foo one large 2 5 + 3 foo two small 3 5 + 4 foo two small 3 6 + 5 bar one large 4 6 + 6 bar one small 5 8 + 7 bar two small 6 9 + 8 bar two large 7 9 + + This first example aggregates values by taking the sum. + + >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], + ... columns=['C'], aggfunc="sum") + >>> table + C large small + A B + bar one 4.0 5.0 + two 7.0 6.0 + foo one 4.0 1.0 + two NaN 6.0 + + We can also fill missing values using the `fill_value` parameter. + + >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], + ... columns=['C'], aggfunc="sum", fill_value=0) + >>> table + C large small + A B + bar one 4 5 + two 7 6 + foo one 4 1 + two 0 6 + + The next example aggregates by taking the mean across multiple columns. + + >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], + ... aggfunc={'D': "mean", 'E': "mean"}) + >>> table + D E + A C + bar large 5.500000 7.500000 + small 5.500000 8.500000 + foo large 2.000000 4.500000 + small 2.333333 4.333333 + + We can also calculate multiple types of aggregations for any given + value column. + + >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], + ... aggfunc={'D': "mean", + ... 'E': ["min", "max", "mean"]}) + >>> table + D E + mean max mean min + A C + bar large 5.500000 9 7.500000 6 + small 5.500000 9 8.500000 8 + foo large 2.000000 5 4.500000 4 + small 2.333333 6 4.333333 2 + """ + + def pivot_table( + self, + values=None, + index=None, + columns=None, + aggfunc: AggFuncType = "mean", + fill_value=None, + margins: bool = False, + dropna: bool = True, + margins_name: Level = "All", + observed: bool = True, + sort: bool = True, + **kwargs, + ) -> DataFrame: + """ Create a spreadsheet-style pivot table as a DataFrame. The levels in the pivot table will be stored in MultiIndex objects (hierarchical indexes) on the index and columns of the result DataFrame. Parameters - ----------%s + ---------- values : list-like or scalar, optional Column or columns to aggregate. index : column, Grouper, array, or sequence of the previous @@ -9917,15 +14759,45 @@ def pivot( Examples -------- - >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", - ... "bar", "bar", "bar", "bar"], - ... "B": ["one", "one", "one", "two", "two", - ... "one", "one", "two", "two"], - ... "C": ["small", "large", "large", "small", - ... "small", "large", "small", "small", - ... "large"], - ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], - ... "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]}) + >>> df = pd.DataFrame( + ... { + ... "A": [ + ... "foo", + ... "foo", + ... "foo", + ... "foo", + ... "foo", + ... "bar", + ... "bar", + ... "bar", + ... "bar", + ... ], + ... "B": [ + ... "one", + ... "one", + ... "one", + ... "two", + ... "two", + ... "one", + ... "one", + ... "two", + ... "two", + ... ], + ... "C": [ + ... "small", + ... "large", + ... "large", + ... "small", + ... "small", + ... "large", + ... "small", + ... "small", + ... "large", + ... ], + ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], + ... "E": [2, 4, 5, 5, 6, 6, 8, 9, 9], + ... } + ... ) >>> df A B C D E 0 foo one small 1 2 @@ -9940,8 +14812,9 @@ def pivot( This first example aggregates values by taking the sum. - >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], - ... columns=['C'], aggfunc="sum") + >>> table = pd.pivot_table( + ... df, values="D", index=["A", "B"], columns=["C"], aggfunc="sum" + ... ) >>> table C large small A B @@ -9952,8 +14825,14 @@ def pivot( We can also fill missing values using the `fill_value` parameter. - >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], - ... columns=['C'], aggfunc="sum", fill_value=0) + >>> table = pd.pivot_table( + ... df, + ... values="D", + ... index=["A", "B"], + ... columns=["C"], + ... aggfunc="sum", + ... fill_value=0, + ... ) >>> table C large small A B @@ -9964,8 +14843,12 @@ def pivot( The next example aggregates by taking the mean across multiple columns. - >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], - ... aggfunc={'D': "mean", 'E': "mean"}) + >>> table = pd.pivot_table( + ... df, + ... values=["D", "E"], + ... index=["A", "C"], + ... aggfunc={"D": "mean", "E": "mean"}, + ... ) >>> table D E A C @@ -9977,9 +14860,12 @@ def pivot( We can also calculate multiple types of aggregations for any given value column. - >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], - ... aggfunc={'D': "mean", - ... 'E': ["min", "max", "mean"]}) + >>> table = pd.pivot_table( + ... df, + ... values=["D", "E"], + ... index=["A", "C"], + ... aggfunc={"D": "mean", "E": ["min", "max", "mean"]}, + ... ) >>> table D E mean max mean min @@ -9989,23 +14875,6 @@ def pivot( foo large 2.000000 5 4.500000 4 small 2.333333 6 4.333333 2 """ - - @Substitution("") - @Appender(_shared_docs["pivot_table"]) - def pivot_table( - self, - values=None, - index=None, - columns=None, - aggfunc: AggFuncType = "mean", - fill_value=None, - margins: bool = False, - dropna: bool = True, - margins_name: Level = "All", - observed: bool = True, - sort: bool = True, - **kwargs, - ) -> DataFrame: from pandas.core.reshape.pivot import pivot_table return pivot_table( @@ -10575,19 +15444,52 @@ def melt( # ---------------------------------------------------------------------- # Time series-related - @doc( - Series.diff, - klass="DataFrame", - extra_params="axis : {0 or 'index', 1 or 'columns'}, default 0\n " - "Take difference over rows (0) or columns (1).\n", - other_klass="Series", - examples=dedent( - """ + def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame: + """ + First discrete difference of element. + + Calculates the difference of a DataFrame element compared with another + element in the DataFrame (default is element in previous row). + + Parameters + ---------- + periods : int, default 1 + Periods to shift for calculating difference, accepts negative + values. + axis : {0 or 'index', 1 or 'columns'}, default 0 + Take difference over rows (0) or columns (1). + + Returns + ------- + DataFrame + First differences of the Series. + + See Also + -------- + DataFrame.pct_change: Percent change over given number of periods. + DataFrame.shift: Shift index by desired number of periods with an + optional time freq. + Series.diff: First discrete difference of object. + + Notes + ----- + For boolean dtypes, this uses :meth:`operator.xor` rather than + :meth:`operator.sub`. + The result is calculated according to current dtype in DataFrame, + however dtype of the result is always float64. + + Examples + -------- + Difference with previous row - >>> df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6], - ... 'b': [1, 1, 2, 3, 5, 8], - ... 'c': [1, 4, 9, 16, 25, 36]}) + >>> df = pd.DataFrame( + ... { + ... "a": [1, 2, 3, 4, 5, 6], + ... "b": [1, 1, 2, 3, 5, 8], + ... "c": [1, 4, 9, 16, 25, 36], + ... } + ... ) >>> df a b c 0 1 1 1 @@ -10641,14 +15543,12 @@ def melt( Overflow in input dtype - >>> df = pd.DataFrame({'a': [1, 0]}, dtype=np.uint8) + >>> df = pd.DataFrame({"a": [1, 0]}, dtype=np.uint8) >>> df.diff() a 0 NaN - 1 255.0""" - ), - ) - def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame: + 1 255.0 + """ if not lib.is_integer(periods): if not (is_float(periods) and periods.is_integer()): raise ValueError("periods must be an integer") @@ -10756,14 +15656,110 @@ def _gotitem( """ ) - @doc( - _shared_docs["aggregate"], - klass=_shared_doc_kwargs["klass"], - axis=_shared_doc_kwargs["axis"], - see_also=_agg_see_also_doc, - examples=_agg_examples_doc, - ) def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): + """ + Aggregate using one or more operations over the specified axis. + + Parameters + ---------- + func : function, str, list or dict + Function to use for aggregating the data. If a function, must either + work when passed a DataFrame or when passed to DataFrame.apply. + + Accepted combinations are: + + - function + - string function name + - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` + - dict of axis labels -> functions, function names or list of such. + axis : {0 or 'index', 1 or 'columns'}, default 0 + If 0 or 'index': apply function to each column. + If 1 or 'columns': apply function to each row. + *args + Positional arguments to pass to `func`. + **kwargs + Keyword arguments to pass to `func`. + + Returns + ------- + scalar, Series or DataFrame + + The return can be: + + * scalar : when Series.agg is called with single function + * Series : when DataFrame.agg is called with a single function + * DataFrame : when DataFrame.agg is called with several functions + + See Also + -------- + DataFrame.apply : Perform any type of operations. + DataFrame.transform : Perform transformation type operations. + DataFrame.groupby : Perform operations over groups. + DataFrame.resample : Perform operations over resampled bins. + DataFrame.rolling : Perform operations over rolling window. + DataFrame.expanding : Perform operations over expanding window. + core.window.ewm.ExponentialMovingWindow : Perform operation over exponential + weighted window. + + Notes + ----- + The aggregation operations are always performed over an axis, either the + index (default) or the column axis. This behavior is different from + `numpy` aggregation functions (`mean`, `median`, `prod`, `sum`, `std`, + `var`), where the default is to compute the aggregation of the flattened + array, e.g., ``numpy.mean(arr_2d)`` as opposed to + ``numpy.mean(arr_2d, axis=0)``. + + `agg` is an alias for `aggregate`. Use the alias. + + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` + for more details. + + A passed user-defined-function will be passed a Series for evaluation. + + If ``func`` defines an index relabeling, ``axis`` must be ``0`` or ``index``. + + Examples + -------- + >>> df = pd.DataFrame( + ... [[1, 2, 3], [4, 5, 6], [7, 8, 9], [np.nan, np.nan, np.nan]], + ... columns=["A", "B", "C"], + ... ) + + Aggregate these functions over the rows. + + >>> df.agg(["sum", "min"]) + A B C + sum 12.0 15.0 18.0 + min 1.0 2.0 3.0 + + Different aggregations per column. + + >>> df.agg({"A": ["sum", "min"], "B": ["min", "max"]}) + A B + sum 12.0 NaN + min 1.0 2.0 + max NaN 8.0 + + Aggregate different functions over the columns + and rename the index of the resulting DataFrame. + + >>> df.agg(x=("A", "max"), y=("B", "min"), z=("C", "mean")) + A B C + x 7.0 NaN NaN + y NaN 2.0 NaN + z NaN NaN 6.0 + + Aggregate over the columns. + + >>> df.agg("mean", axis="columns") + 0 2.0 + 1 5.0 + 2 8.0 + 3 NaN + dtype: float64 + """ from pandas.core.apply import frame_apply axis = self._get_axis_number(axis) @@ -10775,14 +15771,147 @@ def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): agg = aggregate - @doc( - _shared_docs["transform"], - klass=_shared_doc_kwargs["klass"], - axis=_shared_doc_kwargs["axis"], - ) def transform( self, func: AggFuncType, axis: Axis = 0, *args, **kwargs ) -> DataFrame: + """ + Call ``func`` on self producing a DataFrame with the same axis shape as self. + + Parameters + ---------- + func : function, str, list-like or dict-like + Function to use for transforming the data. If a function, must either + work when passed a DataFrame or when passed to DataFrame.apply. If func + is both list-like and dict-like, dict-like behavior takes precedence. + + Accepted combinations are: + + - function + - string function name + - list-like of functions and/or function names, e.g. ``[np.exp, 'sqrt']`` + - dict-like of axis labels -> functions, + function names or list-like of such. + axis : {0 or 'index', 1 or 'columns'}, default 0 + If 0 or 'index': apply function to each column. + If 1 or 'columns': apply function to each row. + *args + Positional arguments to pass to `func`. + **kwargs + Keyword arguments to pass to `func`. + + Returns + ------- + DataFrame + A DataFrame that must have the same length as self. + + Raises + ------ + ValueError : If the returned DataFrame has a different length than self. + + See Also + -------- + DataFrame.agg : Only perform aggregating type operations. + DataFrame.apply : Invoke function on a DataFrame. + + Notes + ----- + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` + for more details. + + Examples + -------- + >>> df = pd.DataFrame({"A": range(3), "B": range(1, 4)}) + >>> df + A B + 0 0 1 + 1 1 2 + 2 2 3 + >>> df.transform(lambda x: x + 1) + A B + 0 1 2 + 1 2 3 + 2 3 4 + + Even though the resulting DataFrame must have the same length as the + input DataFrame, it is possible to provide several input functions: + + >>> s = pd.Series(range(3)) + >>> s + 0 0 + 1 1 + 2 2 + dtype: int64 + >>> s.transform([np.sqrt, np.exp]) + sqrt exp + 0 0.000000 1.000000 + 1 1.000000 2.718282 + 2 1.414214 7.389056 + + You can call transform on a GroupBy object: + + >>> df = pd.DataFrame( + ... { + ... "Date": [ + ... "2015-05-08", + ... "2015-05-07", + ... "2015-05-06", + ... "2015-05-05", + ... "2015-05-08", + ... "2015-05-07", + ... "2015-05-06", + ... "2015-05-05", + ... ], + ... "Data": [5, 8, 6, 1, 50, 100, 60, 120], + ... } + ... ) + >>> df + Date Data + 0 2015-05-08 5 + 1 2015-05-07 8 + 2 2015-05-06 6 + 3 2015-05-05 1 + 4 2015-05-08 50 + 5 2015-05-07 100 + 6 2015-05-06 60 + 7 2015-05-05 120 + >>> df.groupby("Date")["Data"].transform("sum") + 0 55 + 1 108 + 2 66 + 3 121 + 4 55 + 5 108 + 6 66 + 7 121 + Name: Data, dtype: int64 + + >>> df = pd.DataFrame( + ... { + ... "c": [1, 1, 1, 2, 2, 2, 2], + ... "type": ["m", "n", "o", "m", "m", "n", "n"], + ... } + ... ) + >>> df + c type + 0 1 m + 1 1 n + 2 1 o + 3 2 m + 4 2 m + 5 2 n + 6 2 n + >>> df["size"] = df.groupby("c")["type"].transform(len) + >>> df + c type size + 0 1 m 3 + 1 1 n 3 + 2 1 o 3 + 3 2 m 4 + 4 2 m 4 + 5 2 n 4 + 6 2 n 4 + """ from pandas.core.apply import frame_apply op = frame_apply(self, func=func, axis=axis, args=args, kwargs=kwargs) @@ -11462,25 +16591,235 @@ def join( validate=validate, ) - return joined + return joined + + def merge( + self, + right: DataFrame | Series, + how: MergeHow = "inner", + on: IndexLabel | AnyArrayLike | None = None, + left_on: IndexLabel | AnyArrayLike | None = None, + right_on: IndexLabel | AnyArrayLike | None = None, + left_index: bool = False, + right_index: bool = False, + sort: bool = False, + suffixes: Suffixes = ("_x", "_y"), + copy: bool | lib.NoDefault = lib.no_default, + indicator: str | bool = False, + validate: MergeValidate | None = None, + ) -> DataFrame: + """ + Merge DataFrame or named Series objects with a database-style join. + + A named Series object is treated as a DataFrame with a single named column. + + The join is done on columns or indexes. If joining columns on + columns, the DataFrame indexes *will be ignored*. Otherwise if joining indexes + on indexes or indexes on a column or columns, the index will be passed on. + When performing a cross merge, no column specifications to merge on are + allowed. + + .. warning:: + + If both key columns contain rows where the key is a null value, those + rows will be matched against each other. This is different from usual SQL + join behaviour and can lead to unexpected results. + + Parameters + ---------- + right : DataFrame or named Series + Object to merge with. + how : {'left', 'right', 'outer', 'inner', 'cross', 'left_anti', 'right_anti'}, + default 'inner' + Type of merge to be performed. + + * left: use only keys from left frame, similar to a SQL left outer join; + preserve key order. + * right: use only keys from right frame, similar to a SQL right outer join; + preserve key order. + * outer: use union of keys from both frames, similar to a SQL full outer + join; sort keys lexicographically. + * inner: use intersection of keys from both frames, similar to a SQL inner + join; preserve the order of the left keys. + * cross: creates the cartesian product from both frames, preserves the order + of the left keys. + * left_anti: use only keys from left frame that + are not in right frame, similar + to SQL left anti join; preserve key order. + + .. versionadded:: 3.0 + * right_anti: use only keys from right frame + that are not in left frame, similar + to SQL right anti join; preserve key order. + + .. versionadded:: 3.0 + on : Hashable or a sequence of the previous + Column or index level names to join on. These must be found in both + DataFrames. If `on` is None and not merging on indexes then this defaults + to the intersection of the columns in both DataFrames. + left_on : Hashable or a sequence of the previous, or array-like + Column or index level names to join on in the left DataFrame. Can also + be an array or list of arrays of the length of the left DataFrame. + These arrays are treated as if they are columns. + right_on : Hashable or a sequence of the previous, or array-like + Column or index level names to join on in the right DataFrame. Can also + be an array or list of arrays of the length of the right DataFrame. + These arrays are treated as if they are columns. + left_index : bool, default False + Use the index from the left DataFrame as the join key(s). If it is a + MultiIndex, the number of keys in the other DataFrame (either the index + or a number of columns) must match the number of levels. + right_index : bool, default False + Use the index from the right DataFrame as the join key. Same caveats as + left_index. + sort : bool, default False + Sort the join keys lexicographically in the result DataFrame. If False, + the order of the join keys depends on the join type (how keyword). + suffixes : list-like, default is ("_x", "_y") + A length-2 sequence where each element is optionally a string + indicating the suffix to add to overlapping column names in + `left` and `right` respectively. Pass a value of `None` instead + of a string to indicate that the column name from `left` or + `right` should be left as-is, with no suffix. At least one of the + values must not be None. + copy : bool, default False + This keyword is now ignored; changing its value will have no + impact on the method. + + .. deprecated:: 3.0.0 + + This keyword is ignored and will be removed in pandas 4.0. Since + pandas 3.0, this method always returns a new object using a lazy + copy mechanism that defers copies until necessary + (Copy-on-Write). See the `user guide on Copy-on-Write + `__ + for more details. + + indicator : bool or str, default False + If True, adds a column to the output DataFrame called "_merge" with + information on the source of each row. The column can be given a different + name by providing a string argument. The column will have a Categorical + type with the value of "left_only" for observations whose merge key only + appears in the left DataFrame, "right_only" for observations + whose merge key only appears in the right DataFrame, and "both" + if the observation's merge key is found in both DataFrames. + + validate : str, optional + If specified, checks if merge is of specified type. + + * "one_to_one" or "1:1": check if merge keys are unique in both + left and right datasets. + * "one_to_many" or "1:m": check if merge keys are unique in left + dataset. + * "many_to_one" or "m:1": check if merge keys are unique in right + dataset. + * "many_to_many" or "m:m": allowed, but does not result in checks. + + Returns + ------- + DataFrame + A DataFrame of the two merged objects. + + See Also + -------- + merge_ordered : Merge with optional filling/interpolation. + merge_asof : Merge on nearest keys. + DataFrame.join : Similar method using indices. + + Examples + -------- + >>> df1 = pd.DataFrame( + ... {"lkey": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 5]} + ... ) + >>> df2 = pd.DataFrame( + ... {"rkey": ["foo", "bar", "baz", "foo"], "value": [5, 6, 7, 8]} + ... ) + >>> df1 + lkey value + 0 foo 1 + 1 bar 2 + 2 baz 3 + 3 foo 5 + >>> df2 + rkey value + 0 foo 5 + 1 bar 6 + 2 baz 7 + 3 foo 8 + + Merge df1 and df2 on the lkey and rkey columns. The value columns have + the default suffixes, _x and _y, appended. + + >>> df1.merge(df2, left_on="lkey", right_on="rkey") + lkey value_x rkey value_y + 0 foo 1 foo 5 + 1 foo 1 foo 8 + 2 bar 2 bar 6 + 3 baz 3 baz 7 + 4 foo 5 foo 5 + 5 foo 5 foo 8 + + Merge DataFrames df1 and df2 with specified left and right suffixes + appended to any overlapping columns. + + >>> df1.merge( + ... df2, left_on="lkey", right_on="rkey", suffixes=("_left", "_right") + ... ) + lkey value_left rkey value_right + 0 foo 1 foo 5 + 1 foo 1 foo 8 + 2 bar 2 bar 6 + 3 baz 3 baz 7 + 4 foo 5 foo 5 + 5 foo 5 foo 8 + + Merge DataFrames df1 and df2, but raise an exception if the DataFrames have + any overlapping columns. + + >>> df1.merge(df2, left_on="lkey", right_on="rkey", suffixes=(False, False)) + Traceback (most recent call last): + ... + ValueError: columns overlap but no suffix specified: + Index(['value'], dtype='object') + + >>> df1 = pd.DataFrame({"a": ["foo", "bar"], "b": [1, 2]}) + >>> df2 = pd.DataFrame({"a": ["foo", "baz"], "c": [3, 4]}) + >>> df1 + a b + 0 foo 1 + 1 bar 2 + >>> df2 + a c + 0 foo 3 + 1 baz 4 - @Substitution("") - @Appender(_merge_doc, indents=2) - def merge( - self, - right: DataFrame | Series, - how: MergeHow = "inner", - on: IndexLabel | AnyArrayLike | None = None, - left_on: IndexLabel | AnyArrayLike | None = None, - right_on: IndexLabel | AnyArrayLike | None = None, - left_index: bool = False, - right_index: bool = False, - sort: bool = False, - suffixes: Suffixes = ("_x", "_y"), - copy: bool | lib.NoDefault = lib.no_default, - indicator: str | bool = False, - validate: MergeValidate | None = None, - ) -> DataFrame: + >>> df1.merge(df2, how="inner", on="a") + a b c + 0 foo 1 3 + + >>> df1.merge(df2, how="left", on="a") + a b c + 0 foo 1 3.0 + 1 bar 2 NaN + + >>> df1 = pd.DataFrame({"left": ["foo", "bar"]}) + >>> df2 = pd.DataFrame({"right": [7, 8]}) + >>> df1 + left + 0 foo + 1 bar + >>> df2 + right + 0 7 + 1 8 + + >>> df1.merge(df2, how="cross") + left right + 0 foo 7 + 1 foo 8 + 2 bar 7 + 3 bar 8 + """ self._check_copy_deprecation(copy) from pandas.core.reshape.merge import merge @@ -12291,7 +17630,6 @@ def any( **kwargs, ) -> Series | bool: ... - @doc(make_doc("any", ndim=1)) def any( self, *, @@ -12300,6 +17638,118 @@ def any( skipna: bool = True, **kwargs, ) -> Series | bool: + """ + Return whether any element is True, potentially over an axis. + + Returns False unless there is at least one element within a series or + along a Dataframe axis that is True or equivalent (e.g. non-zero or + non-empty). + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns', None}, default 0 + Indicate which axis or axes should be reduced. For `Series` this parameter + is unused and defaults to 0. + + * 0 / 'index' : reduce the index, return a Series whose index is the + original column labels. + * 1 / 'columns' : reduce the columns, return a Series whose index is the + original index. + * None : reduce all axes, return a scalar. + + bool_only : bool, default False + Include only boolean columns. Not implemented for Series. + skipna : bool, default True + Exclude NA/null values. If the entire row/column is NA and skipna is + True, then the result will be False, as for an empty row/column. + If skipna is False, then NA are treated as True, because these are not + equal to zero. + **kwargs : any, default None + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + Series or scalar + If axis=None, then a scalar boolean is returned. + Otherwise a Series is returned with index matching the index argument. + + See Also + -------- + numpy.any : Numpy version of this method. + Series.any : Return whether any element is True. + Series.all : Return whether all elements are True. + DataFrame.any : Return whether any element is True over requested axis. + DataFrame.all : Return whether all elements are True over requested axis. + + Examples + -------- + **Series** + + For Series input, the output is a scalar indicating whether any element + is True. + + >>> pd.Series([False, False]).any() + False + >>> pd.Series([True, False]).any() + True + >>> pd.Series([], dtype="float64").any() + False + >>> pd.Series([np.nan]).any() + False + >>> pd.Series([np.nan]).any(skipna=False) + True + + **DataFrame** + + Whether each column contains at least one True element (the default). + + >>> df = pd.DataFrame({"A": [1, 2], "B": [0, 2], "C": [0, 0]}) + >>> df + A B C + 0 1 0 0 + 1 2 2 0 + + >>> df.any() + A True + B True + C False + dtype: bool + + Aggregating over the columns. + + >>> df = pd.DataFrame({"A": [True, False], "B": [1, 2]}) + >>> df + A B + 0 True 1 + 1 False 2 + + >>> df.any(axis="columns") + 0 True + 1 True + dtype: bool + + >>> df = pd.DataFrame({"A": [True, False], "B": [1, 0]}) + >>> df + A B + 0 True 1 + 1 False 0 + + >>> df.any(axis="columns") + 0 True + 1 False + dtype: bool + + Aggregating over the entire DataFrame with ``axis=None``. + + >>> df.any(axis=None) + True + + `any` for an empty DataFrame is an empty Series. + + >>> pd.DataFrame([]).any() + Series([], dtype: bool) + """ result = self._logical_func( "any", nanops.nanany, axis, bool_only, skipna, **kwargs ) @@ -12338,7 +17788,6 @@ def all( ) -> Series | bool: ... @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="all") - @doc(make_doc("all", ndim=1)) def all( self, axis: Axis | None = 0, @@ -12346,6 +17795,91 @@ def all( skipna: bool = True, **kwargs, ) -> Series | bool: + """ + Return whether all elements are True, potentially over an axis. + + Returns True unless there at least one element within a series or + along a Dataframe axis that is False or equivalent (e.g. zero or + empty). + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns', None}, default 0 + Indicate which axis or axes should be reduced. For `Series` this parameter + is unused and defaults to 0. + + * 0 / 'index' : reduce the index, return a Series whose index is the + original column labels. + * 1 / 'columns' : reduce the columns, return a Series whose index is the + original index. + * None : reduce all axes, return a scalar. + + bool_only : bool, default False + Include only boolean columns. Not implemented for Series. + skipna : bool, default True + Exclude NA/null values. If the entire row/column is NA and skipna is + True, then the result will be True, as for an empty row/column. + If skipna is False, then NA are treated as True, because these are not + equal to zero. + **kwargs : any, default None + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + Series or scalar + If axis=None, then a scalar boolean is returned. + Otherwise a Series is returned with index matching the index argument. + + See Also + -------- + Series.all : Return True if all elements are True. + DataFrame.any : Return True if one (or more) elements are True. + + Examples + -------- + **Series** + + >>> pd.Series([True, True]).all() + True + >>> pd.Series([True, False]).all() + False + >>> pd.Series([], dtype="float64").all() + True + >>> pd.Series([np.nan]).all() + True + >>> pd.Series([np.nan]).all(skipna=False) + True + + **DataFrames** + + Create a DataFrame from a dictionary. + + >>> df = pd.DataFrame({"col1": [True, True], "col2": [True, False]}) + >>> df + col1 col2 + 0 True True + 1 True False + + Default behaviour checks if values in each column all return True. + + >>> df.all() + col1 True + col2 False + dtype: bool + + Specify ``axis='columns'`` to check if values in each row all return True. + + >>> df.all(axis="columns") + 0 True + 1 False + dtype: bool + + Or ``axis=None`` for whether every value is True. + + >>> df.all(axis=None) + False + """ result = self._logical_func( "all", nanops.nanall, axis, bool_only, skipna, **kwargs ) @@ -12385,7 +17919,6 @@ def min( ) -> Series | Any: ... @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="min") - @doc(make_doc("min", ndim=2)) def min( self, axis: Axis | None = 0, @@ -12393,6 +17926,67 @@ def min( numeric_only: bool = False, **kwargs, ) -> Series | Any: + """ + Return the minimum of the values over the requested axis. + + If you want the *index* of the minimum, use ``idxmin``. This is + the equivalent of the ``numpy.ndarray`` method ``argmin``. + + Parameters + ---------- + axis : {index (0), columns (1)} + Axis for the function to be applied on. + For `Series` this parameter is unused and defaults to 0. + + For DataFrames, specifying ``axis=None`` will apply the aggregation + across both axes. + + .. versionadded:: 2.0.0 + + skipna : bool, default True + Exclude NA/null values when computing the result. + numeric_only : bool, default False + Include only float, int, boolean columns. + + **kwargs + Additional keyword arguments to be passed to the function. + + Returns + ------- + Series or scalar + Value containing the calculation referenced in the description. + + See Also + -------- + Series.sum : Return the sum. + Series.min : Return the minimum. + Series.max : Return the maximum. + Series.idxmin : Return the index of the minimum. + Series.idxmax : Return the index of the maximum. + DataFrame.sum : Return the sum over the requested axis. + DataFrame.min : Return the minimum over the requested axis. + DataFrame.max : Return the maximum over the requested axis. + DataFrame.idxmin : Return the index of the minimum over the requested axis. + DataFrame.idxmax : Return the index of the maximum over the requested axis. + + Examples + -------- + >>> idx = pd.MultiIndex.from_arrays( + ... [["warm", "warm", "cold", "cold"], ["dog", "falcon", "fish", "spider"]], + ... names=["blooded", "animal"], + ... ) + >>> s = pd.Series([4, 2, 0, 8], name="legs", index=idx) + >>> s + blooded animal + warm dog 4 + falcon 2 + cold fish 0 + spider 8 + Name: legs, dtype: int64 + + >>> s.min() + 0 + """ result = super().min( axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) @@ -12432,7 +18026,6 @@ def max( ) -> Series | Any: ... @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="max") - @doc(make_doc("max", ndim=2)) def max( self, axis: Axis | None = 0, @@ -12440,6 +18033,67 @@ def max( numeric_only: bool = False, **kwargs, ) -> Series | Any: + """ + Return the maximum of the values over the requested axis. + + If you want the *index* of the maximum, use ``idxmax``. This is + the equivalent of the ``numpy.ndarray`` method ``argmax``. + + Parameters + ---------- + axis : {index (0), columns (1)} + Axis for the function to be applied on. + For `Series` this parameter is unused and defaults to 0. + + For DataFrames, specifying ``axis=None`` will apply the aggregation + across both axes. + + .. versionadded:: 2.0.0 + + skipna : bool, default True + Exclude NA/null values when computing the result. + numeric_only : bool, default False + Include only float, int, boolean columns. + + **kwargs + Additional keyword arguments to be passed to the function. + + Returns + ------- + Series or scalar + Value containing the calculation referenced in the description. + + See Also + -------- + Series.sum : Return the sum. + Series.min : Return the minimum. + Series.max : Return the maximum. + Series.idxmin : Return the index of the minimum. + Series.idxmax : Return the index of the maximum. + DataFrame.sum : Return the sum over the requested axis. + DataFrame.min : Return the minimum over the requested axis. + DataFrame.max : Return the maximum over the requested axis. + DataFrame.idxmin : Return the index of the minimum over the requested axis. + DataFrame.idxmax : Return the index of the maximum over the requested axis. + + Examples + -------- + >>> idx = pd.MultiIndex.from_arrays( + ... [["warm", "warm", "cold", "cold"], ["dog", "falcon", "fish", "spider"]], + ... names=["blooded", "animal"], + ... ) + >>> s = pd.Series([4, 2, 0, 8], name="legs", index=idx) + >>> s + blooded animal + warm dog 4 + falcon 2 + cold fish 0 + spider 8 + Name: legs, dtype: int64 + + >>> s.max() + 8 + """ result = super().max( axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) @@ -12656,25 +18310,101 @@ def mean( **kwargs, ) -> Any: ... - @overload - def mean( - self, - *, - axis: Axis | None, - skipna: bool = ..., - numeric_only: bool = ..., - **kwargs, - ) -> Series | Any: ... + @overload + def mean( + self, + *, + axis: Axis | None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series | Any: ... + + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="mean") + def mean( + self, + axis: Axis | None = 0, + skipna: bool = True, + numeric_only: bool = False, + **kwargs, + ) -> Series | Any: + """ + Return the mean of the values over the requested axis. + + Parameters + ---------- + axis : {index (0), columns (1)} + Axis for the function to be applied on. + For `Series` this parameter is unused and defaults to 0. + + For DataFrames, specifying ``axis=None`` will apply the aggregation + across both axes. + + .. versionadded:: 2.0.0 + + skipna : bool, default True + Exclude NA/null values when computing the result. + numeric_only : bool, default False + Include only float, int, boolean columns. + + **kwargs + Additional keyword arguments to be passed to the function. + + Returns + ------- + Series or scalar + Value containing the calculation referenced in the description. - @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="mean") - @doc(make_doc("mean", ndim=2)) - def mean( - self, - axis: Axis | None = 0, - skipna: bool = True, - numeric_only: bool = False, - **kwargs, - ) -> Series | Any: + See Also + -------- + Series.sum : Return the sum. + Series.min : Return the minimum. + Series.max : Return the maximum. + Series.idxmin : Return the index of the minimum. + Series.idxmax : Return the index of the maximum. + DataFrame.sum : Return the sum over the requested axis. + DataFrame.min : Return the minimum over the requested axis. + DataFrame.max : Return the maximum over the requested axis. + DataFrame.idxmin : Return the index of the minimum over the requested axis. + DataFrame.idxmax : Return the index of the maximum over the requested axis. + + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s.mean() + 2.0 + + With a DataFrame + + >>> df = pd.DataFrame( + ... {"a": [1, 2], "b": [2, 3]}, index=["tiger", "zebra"] + ... ) + >>> df + a b + tiger 1 2 + zebra 2 3 + >>> df.mean() + a 1.5 + b 2.5 + dtype: float64 + + Using axis=1 + + >>> df.mean(axis=1) + tiger 1.5 + zebra 2.5 + dtype: float64 + + In this case, `numeric_only` should be set to `True` to avoid + getting an error. + + >>> df = pd.DataFrame( + ... {"a": [1, 2], "b": ["T", "Z"]}, index=["tiger", "zebra"] + ... ) + >>> df.mean(numeric_only=True) + a 1.5 + dtype: float64 + """ result = super().mean( axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) @@ -12716,7 +18446,6 @@ def median( @deprecate_nonkeyword_arguments( Pandas4Warning, allowed_args=["self"], name="median" ) - @doc(make_doc("median", ndim=2)) def median( self, axis: Axis | None = 0, @@ -12724,6 +18453,83 @@ def median( numeric_only: bool = False, **kwargs, ) -> Series | Any: + """ + Return the median of the values over the requested axis. + + Parameters + ---------- + axis : {index (0), columns (1)} + Axis for the function to be applied on. + For `Series` this parameter is unused and defaults to 0. + + For DataFrames, specifying ``axis=None`` will apply the aggregation + across both axes. + + .. versionadded:: 2.0.0 + + skipna : bool, default True + Exclude NA/null values when computing the result. + numeric_only : bool, default False + Include only float, int, boolean columns. + + **kwargs + Additional keyword arguments to be passed to the function. + + Returns + ------- + Series or scalar + Value containing the calculation referenced in the description. + + See Also + -------- + Series.sum : Return the sum. + Series.min : Return the minimum. + Series.max : Return the maximum. + Series.idxmin : Return the index of the minimum. + Series.idxmax : Return the index of the maximum. + DataFrame.sum : Return the sum over the requested axis. + DataFrame.min : Return the minimum over the requested axis. + DataFrame.max : Return the maximum over the requested axis. + DataFrame.idxmin : Return the index of the minimum over the requested axis. + DataFrame.idxmax : Return the index of the maximum over the requested axis. + + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s.median() + 2.0 + + With a DataFrame + + >>> df = pd.DataFrame( + ... {"a": [1, 2], "b": [2, 3]}, index=["tiger", "zebra"] + ... ) + >>> df + a b + tiger 1 2 + zebra 2 3 + >>> df.median() + a 1.5 + b 2.5 + dtype: float64 + + Using axis=1 + + >>> df.median(axis=1) + tiger 1.5 + zebra 2.5 + dtype: float64 + + In this case, `numeric_only` should be set to `True` + to avoid getting an error. + + >>> df = pd.DataFrame( + ... {"a": [1, 2], "b": ["T", "Z"]}, index=["tiger", "zebra"] + ... ) + >>> df.median(numeric_only=True) + a 1.5 + dtype: float64 + """ result = super().median( axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) @@ -13345,7 +19151,6 @@ def kurt( kurtosis = kurt # type: ignore[assignment] product = prod - @doc(make_doc("cummin", ndim=2)) def cummin( self, axis: Axis = 0, @@ -13354,10 +19159,107 @@ def cummin( *args, **kwargs, ) -> Self: + """ + Return cumulative minimum over a DataFrame or Series axis. + + Returns a DataFrame or Series of the same size containing the cumulative + minimum. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + The index or the name of the axis. 0 is equivalent to None or 'index'. + For `Series` this parameter is unused and defaults to 0. + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. + numeric_only : bool, default False + Include only float, int, boolean columns. + *args, **kwargs + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + Series or DataFrame + Return cumulative minimum of Series or DataFrame. + + See Also + -------- + core.window.expanding.Expanding.min : Similar functionality + but ignores ``NaN`` values. + DataFrame.min : Return the minimum over + DataFrame axis. + DataFrame.cummax : Return cumulative maximum over DataFrame axis. + DataFrame.cummin : Return cumulative minimum over DataFrame axis. + DataFrame.cumsum : Return cumulative sum over DataFrame axis. + DataFrame.cumprod : Return cumulative product over DataFrame axis. + + Examples + -------- + **Series** + + >>> s = pd.Series([2, np.nan, 5, -1, 0]) + >>> s + 0 2.0 + 1 NaN + 2 5.0 + 3 -1.0 + 4 0.0 + dtype: float64 + + By default, NA values are ignored. + + >>> s.cummin() + 0 2.0 + 1 NaN + 2 2.0 + 3 -1.0 + 4 -1.0 + dtype: float64 + + To include NA values in the operation, use ``skipna=False`` + + >>> s.cummin(skipna=False) + 0 2.0 + 1 NaN + 2 NaN + 3 NaN + 4 NaN + dtype: float64 + + **DataFrame** + + >>> df = pd.DataFrame( + ... [[2.0, 1.0], [3.0, np.nan], [1.0, 0.0]], columns=list("AB") + ... ) + >>> df + A B + 0 2.0 1.0 + 1 3.0 NaN + 2 1.0 0.0 + + By default, iterates over rows and finds the minimum + in each column. This is equivalent to ``axis=None`` or ``axis='index'``. + + >>> df.cummin() + A B + 0 2.0 1.0 + 1 2.0 NaN + 2 1.0 0.0 + + To iterate over columns and find the minimum in each row, + use ``axis=1`` + + >>> df.cummin(axis=1) + A B + 0 2.0 1.0 + 1 3.0 NaN + 2 1.0 0.0 + """ data = self._get_numeric_data() if numeric_only else self return NDFrame.cummin(data, axis, skipna, *args, **kwargs) - @doc(make_doc("cummax", ndim=2)) def cummax( self, axis: Axis = 0, @@ -13366,10 +19268,107 @@ def cummax( *args, **kwargs, ) -> Self: + """ + Return cumulative maximum over a DataFrame or Series axis. + + Returns a DataFrame or Series of the same size containing the cumulative + maximum. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + The index or the name of the axis. 0 is equivalent to None or 'index'. + For `Series` this parameter is unused and defaults to 0. + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. + numeric_only : bool, default False + Include only float, int, boolean columns. + *args, **kwargs + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + Series or DataFrame + Return cumulative maximum of Series or DataFrame. + + See Also + -------- + core.window.expanding.Expanding.max : Similar functionality + but ignores ``NaN`` values. + DataFrame.max : Return the maximum over + DataFrame axis. + DataFrame.cummax : Return cumulative maximum over DataFrame axis. + DataFrame.cummin : Return cumulative minimum over DataFrame axis. + DataFrame.cumsum : Return cumulative sum over DataFrame axis. + DataFrame.cumprod : Return cumulative product over DataFrame axis. + + Examples + -------- + **Series** + + >>> s = pd.Series([2, np.nan, 5, -1, 0]) + >>> s + 0 2.0 + 1 NaN + 2 5.0 + 3 -1.0 + 4 0.0 + dtype: float64 + + By default, NA values are ignored. + + >>> s.cummax() + 0 2.0 + 1 NaN + 2 5.0 + 3 5.0 + 4 5.0 + dtype: float64 + + To include NA values in the operation, use ``skipna=False`` + + >>> s.cummax(skipna=False) + 0 2.0 + 1 NaN + 2 NaN + 3 NaN + 4 NaN + dtype: float64 + + **DataFrame** + + >>> df = pd.DataFrame( + ... [[2.0, 1.0], [3.0, np.nan], [1.0, 0.0]], columns=list("AB") + ... ) + >>> df + A B + 0 2.0 1.0 + 1 3.0 NaN + 2 1.0 0.0 + + By default, iterates over rows and finds the maximum + in each column. This is equivalent to ``axis=None`` or ``axis='index'``. + + >>> df.cummax() + A B + 0 2.0 1.0 + 1 3.0 NaN + 2 3.0 1.0 + + To iterate over columns and find the maximum in each row, + use ``axis=1`` + + >>> df.cummax(axis=1) + A B + 0 2.0 2.0 + 1 3.0 NaN + 2 1.0 1.0 + """ data = self._get_numeric_data() if numeric_only else self return NDFrame.cummax(data, axis, skipna, *args, **kwargs) - @doc(make_doc("cumsum", ndim=2)) def cumsum( self, axis: Axis = 0, @@ -13378,10 +19377,107 @@ def cumsum( *args, **kwargs, ) -> Self: + """ + Return cumulative sum over a DataFrame or Series axis. + + Returns a DataFrame or Series of the same size containing the cumulative + sum. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + The index or the name of the axis. 0 is equivalent to None or 'index'. + For `Series` this parameter is unused and defaults to 0. + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. + numeric_only : bool, default False + Include only float, int, boolean columns. + *args, **kwargs + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + Series or DataFrame + Return cumulative sum of Series or DataFrame. + + See Also + -------- + core.window.expanding.Expanding.sum : Similar functionality + but ignores ``NaN`` values. + DataFrame.sum : Return the sum over + DataFrame axis. + DataFrame.cummax : Return cumulative maximum over DataFrame axis. + DataFrame.cummin : Return cumulative minimum over DataFrame axis. + DataFrame.cumsum : Return cumulative sum over DataFrame axis. + DataFrame.cumprod : Return cumulative product over DataFrame axis. + + Examples + -------- + **Series** + + >>> s = pd.Series([2, np.nan, 5, -1, 0]) + >>> s + 0 2.0 + 1 NaN + 2 5.0 + 3 -1.0 + 4 0.0 + dtype: float64 + + By default, NA values are ignored. + + >>> s.cumsum() + 0 2.0 + 1 NaN + 2 7.0 + 3 6.0 + 4 6.0 + dtype: float64 + + To include NA values in the operation, use ``skipna=False`` + + >>> s.cumsum(skipna=False) + 0 2.0 + 1 NaN + 2 NaN + 3 NaN + 4 NaN + dtype: float64 + + **DataFrame** + + >>> df = pd.DataFrame( + ... [[2.0, 1.0], [3.0, np.nan], [1.0, 0.0]], columns=list("AB") + ... ) + >>> df + A B + 0 2.0 1.0 + 1 3.0 NaN + 2 1.0 0.0 + + By default, iterates over rows and finds the sum + in each column. This is equivalent to ``axis=None`` or ``axis='index'``. + + >>> df.cumsum() + A B + 0 2.0 1.0 + 1 5.0 NaN + 2 6.0 1.0 + + To iterate over columns and find the sum in each row, + use ``axis=1`` + + >>> df.cumsum(axis=1) + A B + 0 2.0 3.0 + 1 3.0 NaN + 2 1.0 1.0 + """ data = self._get_numeric_data() if numeric_only else self return NDFrame.cumsum(data, axis, skipna, *args, **kwargs) - @doc(make_doc("cumprod", 2)) def cumprod( self, axis: Axis = 0, @@ -13390,6 +19486,104 @@ def cumprod( *args, **kwargs, ) -> Self: + """ + Return cumulative product over a DataFrame or Series axis. + + Returns a DataFrame or Series of the same size containing the cumulative + product. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + The index or the name of the axis. 0 is equivalent to None or 'index'. + For `Series` this parameter is unused and defaults to 0. + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. + numeric_only : bool, default False + Include only float, int, boolean columns. + *args, **kwargs + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + Series or DataFrame + Return cumulative product of Series or DataFrame. + + See Also + -------- + core.window.expanding.Expanding.prod : Similar functionality + but ignores ``NaN`` values. + DataFrame.prod : Return the product over + DataFrame axis. + DataFrame.cummax : Return cumulative maximum over DataFrame axis. + DataFrame.cummin : Return cumulative minimum over DataFrame axis. + DataFrame.cumsum : Return cumulative sum over DataFrame axis. + DataFrame.cumprod : Return cumulative product over DataFrame axis. + + Examples + -------- + **Series** + + >>> s = pd.Series([2, np.nan, 5, -1, 0]) + >>> s + 0 2.0 + 1 NaN + 2 5.0 + 3 -1.0 + 4 0.0 + dtype: float64 + + By default, NA values are ignored. + + >>> s.cumprod() + 0 2.0 + 1 NaN + 2 10.0 + 3 -10.0 + 4 -0.0 + dtype: float64 + + To include NA values in the operation, use ``skipna=False`` + + >>> s.cumprod(skipna=False) + 0 2.0 + 1 NaN + 2 NaN + 3 NaN + 4 NaN + dtype: float64 + + **DataFrame** + + >>> df = pd.DataFrame( + ... [[2.0, 1.0], [3.0, np.nan], [1.0, 0.0]], columns=list("AB") + ... ) + >>> df + A B + 0 2.0 1.0 + 1 3.0 NaN + 2 1.0 0.0 + + By default, iterates over rows and finds the product + in each column. This is equivalent to ``axis=None`` or ``axis='index'``. + + >>> df.cumprod() + A B + 0 2.0 1.0 + 1 6.0 NaN + 2 6.0 0.0 + + To iterate over columns and find the product in each row, + use ``axis=1`` + + >>> df.cumprod(axis=1) + A B + 0 2.0 2.0 + 1 3.0 NaN + 2 1.0 0.0 + """ data = self._get_numeric_data() if numeric_only else self return NDFrame.cumprod(data, axis, skipna, *args, **kwargs) From 657d9f20acce5703c166a7be98ad20e8e30a5943 Mon Sep 17 00:00:00 2001 From: zhangbowen-coder <2439796518@qq.com> Date: Sun, 14 Dec 2025 18:52:12 +0800 Subject: [PATCH 02/10] =?UTF-8?q?replaced=20all=20Appender=E3=80=81Substit?= =?UTF-8?q?ution=E3=80=81doc=20decorators=20in=20pandas/pandas/core/frame.?= =?UTF-8?q?py.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pandas/tests/base/test_misc.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py index 0a820c0d3e0bd..b48a1ae7bb33a 100644 --- a/pandas/tests/base/test_misc.py +++ b/pandas/tests/base/test_misc.py @@ -19,9 +19,13 @@ def test_isnull_notnull_docstrings(): # GH#41855 make sure its clear these are aliases doc = pd.DataFrame.notnull.__doc__ - assert doc.startswith("\nDataFrame.notnull is an alias for DataFrame.notna.\n") + assert doc.startswith( + "\n DataFrame.notnull is an alias for DataFrame.notna.\n" + ) doc = pd.DataFrame.isnull.__doc__ - assert doc.startswith("\nDataFrame.isnull is an alias for DataFrame.isna.\n") + assert doc.startswith( + "\n DataFrame.isnull is an alias for DataFrame.isna.\n" + ) doc = Series.notnull.__doc__ assert doc.startswith("\nSeries.notnull is an alias for Series.notna.\n") From a265ebc0f1a731cf169925badb7bbc5dcdb5141e Mon Sep 17 00:00:00 2001 From: zhangbowen-coder <2439796518@qq.com> Date: Sun, 14 Dec 2025 19:23:03 +0800 Subject: [PATCH 03/10] update --- pandas/core/frame.py | 6 ++---- pandas/tests/base/test_misc.py | 8 ++------ 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d5506a382b343..07d79ed27b027 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7510,8 +7510,7 @@ def isna(self) -> DataFrame: return result.__finalize__(self, method="isna") def isnull(self) -> DataFrame: - """ - DataFrame.isnull is an alias for DataFrame.isna. + """DataFrame.isnull is an alias for DataFrame.isna. Detect missing values. @@ -7654,8 +7653,7 @@ def notna(self) -> DataFrame: return ~self.isna() def notnull(self) -> DataFrame: - """ - DataFrame.notnull is an alias for DataFrame.notna. + """DataFrame.notnull is an alias for DataFrame.notna. Detect existing (non-missing) values. diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py index b48a1ae7bb33a..0a820c0d3e0bd 100644 --- a/pandas/tests/base/test_misc.py +++ b/pandas/tests/base/test_misc.py @@ -19,13 +19,9 @@ def test_isnull_notnull_docstrings(): # GH#41855 make sure its clear these are aliases doc = pd.DataFrame.notnull.__doc__ - assert doc.startswith( - "\n DataFrame.notnull is an alias for DataFrame.notna.\n" - ) + assert doc.startswith("\nDataFrame.notnull is an alias for DataFrame.notna.\n") doc = pd.DataFrame.isnull.__doc__ - assert doc.startswith( - "\n DataFrame.isnull is an alias for DataFrame.isna.\n" - ) + assert doc.startswith("\nDataFrame.isnull is an alias for DataFrame.isna.\n") doc = Series.notnull.__doc__ assert doc.startswith("\nSeries.notnull is an alias for Series.notna.\n") From 19c8ac0bfcf685a642f8f51d7155c1f9a12f4122 Mon Sep 17 00:00:00 2001 From: zhangbowen-coder <2439796518@qq.com> Date: Sun, 14 Dec 2025 20:02:11 +0800 Subject: [PATCH 04/10] update --- pandas/core/frame.py | 143 +++---------------------------------------- 1 file changed, 7 insertions(+), 136 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 07d79ed27b027..8c52de1a5bf51 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -67,6 +67,7 @@ ) from pandas.util._decorators import ( deprecate_nonkeyword_arguments, + doc, set_module, ) from pandas.util._exceptions import ( @@ -7509,75 +7510,10 @@ def isna(self) -> DataFrame: result = self._constructor_from_mgr(res_mgr, axes=res_mgr.axes) return result.__finalize__(self, method="isna") + @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) def isnull(self) -> DataFrame: - """DataFrame.isnull is an alias for DataFrame.isna. - - Detect missing values. - - Return a boolean same-sized object indicating if the values are NA. - NA values, such as None or :attr:`numpy.NaN`, gets mapped to True - values. - Everything else gets mapped to False values. Characters such as empty - strings ``''`` or :attr:`numpy.inf` are not considered NA values. - - Returns - ------- - Series/DataFrame - Mask of bool values for each element in Series/DataFrame - that indicates whether an element is an NA value. - - See Also - -------- - Series.isnull : Alias of isna. - DataFrame.isnull : Alias of isna. - Series.notna : Boolean inverse of isna. - DataFrame.notna : Boolean inverse of isna. - Series.dropna : Omit axes labels with missing values. - DataFrame.dropna : Omit axes labels with missing values. - isna : Top-level isna. - - Examples - -------- - Show which entries in a DataFrame are NA. - - >>> df = pd.DataFrame( - ... dict( - ... age=[5, 6, np.nan], - ... born=[ - ... pd.NaT, - ... pd.Timestamp("1939-05-27"), - ... pd.Timestamp("1940-04-25"), - ... ], - ... name=["Alfred", "Batman", ""], - ... toy=[None, "Batmobile", "Joker"], - ... ) - ... ) - >>> df - age born name toy - 0 5.0 NaT Alfred NaN - 1 6.0 1939-05-27 Batman Batmobile - 2 NaN 1940-04-25 Joker - - >>> df.isna() - age born name toy - 0 False True False True - 1 False False False False - 2 True False False False - - Show which entries in a Series are NA. - - >>> ser = pd.Series([5, 6, np.nan]) - >>> ser - 0 5.0 - 1 6.0 - 2 NaN - dtype: float64 - - >>> ser.isna() - 0 False - 1 False - 2 True - dtype: bool + """ + DataFrame.isnull is an alias for DataFrame.isna. """ return self.isna() @@ -7652,75 +7588,10 @@ def notna(self) -> DataFrame: """ return ~self.isna() + @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) def notnull(self) -> DataFrame: - """DataFrame.notnull is an alias for DataFrame.notna. - - Detect existing (non-missing) values. - - Return a boolean same-sized object indicating if the values are not NA. - Non-missing values get mapped to True. Characters such as empty - strings ``''`` or :attr:`numpy.inf` are not considered NA values. - NA values, such as None or :attr:`numpy.NaN`, get mapped to False - values. - - Returns - ------- - Series/DataFrame - Mask of bool values for each element in Series/DataFrame - that indicates whether an element is not an NA value. - - See Also - -------- - Series.notnull : Alias of notna. - DataFrame.notnull : Alias of notna. - Series.isna : Boolean inverse of notna. - DataFrame.isna : Boolean inverse of notna. - Series.dropna : Omit axes labels with missing values. - DataFrame.dropna : Omit axes labels with missing values. - notna : Top-level notna. - - Examples - -------- - Show which entries in a DataFrame are not NA. - - >>> df = pd.DataFrame( - ... dict( - ... age=[5, 6, np.nan], - ... born=[ - ... pd.NaT, - ... pd.Timestamp("1939-05-27"), - ... pd.Timestamp("1940-04-25"), - ... ], - ... name=["Alfred", "Batman", ""], - ... toy=[None, "Batmobile", "Joker"], - ... ) - ... ) - >>> df - age born name toy - 0 5.0 NaT Alfred NaN - 1 6.0 1939-05-27 Batman Batmobile - 2 NaN 1940-04-25 Joker - - >>> df.notna() - age born name toy - 0 True False True False - 1 True True True True - 2 False True True True - - Show which entries in a Series are not NA. - - >>> ser = pd.Series([5, 6, np.nan]) - >>> ser - 0 5.0 - 1 6.0 - 2 NaN - dtype: float64 - - >>> ser.notna() - 0 True - 1 True - 2 False - dtype: bool + """ + DataFrame.notnull is an alias for DataFrame.notna. """ return ~self.isna() From c1248baca3b8d6bfcc599f0bf8abe0cf44fa6a72 Mon Sep 17 00:00:00 2001 From: zhangbowen-coder <2439796518@qq.com> Date: Sun, 14 Dec 2025 20:39:06 +0800 Subject: [PATCH 05/10] update --- pandas/core/groupby/generic.py | 1345 +++++++++++++++++++++++++++++++- 1 file changed, 1330 insertions(+), 15 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 63d89982d77d4..806769b3d4102 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -32,9 +32,6 @@ SpecificationError, ) from pandas.util._decorators import ( - Appender, - Substitution, - doc, set_module, ) from pandas.util._exceptions import find_stack_level @@ -71,7 +68,6 @@ from pandas.core.groupby.groupby import ( GroupBy, GroupByPlot, - _transform_template, ) from pandas.core.indexes.api import ( Index, @@ -702,9 +698,149 @@ def _wrap_applied_output( """ ) - @Substitution(klass="Series", example=__examples_series_doc) - @Appender(_transform_template) def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): + """ + Call ``func`` on self producing a Series with the same axis shape as self. + + Parameters + ---------- + func : function, str, list-like or dict-like + Function to use for transforming the data. If a function, must either + work when passed a Series or when passed to Series.apply. If func + is both list-like and dict-like, dict-like behavior takes precedence. + + Accepted combinations are: + + - function + - string function name + - list-like of functions and/or function names, e.g. ``[np.exp, 'sqrt']`` + - dict-like of axis labels -> functions, function names or list-like of such + + axis : {0 or 'index'} + Unused. Parameter needed for compatibility with DataFrame. + + *args + Positional arguments to pass to `func`. + **kwargs + Keyword arguments to pass to `func`. + + Returns + ------- + Series + A Series that must have the same length as self. + + Raises + ------ + ValueError : If the returned Series has a different length than self. + + See Also + -------- + Series.agg : Only perform aggregating type operations. + Series.apply : Invoke function on a Series. + + Notes + ----- + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` + for more details. + + Examples + -------- + >>> df = pd.DataFrame({{"A": range(3), "B": range(1, 4)}}) + >>> df + A B + 0 0 1 + 1 1 2 + 2 2 3 + >>> df.transform(lambda x: x + 1) + A B + 0 1 2 + 1 2 3 + 2 3 4 + + Even though the resulting Series must have the same length as the + input Series, it is possible to provide several input functions: + + >>> s = pd.Series(range(3)) + >>> s + 0 0 + 1 1 + 2 2 + dtype: int64 + >>> s.transform([np.sqrt, np.exp]) + sqrt exp + 0 0.000000 1.000000 + 1 1.000000 2.718282 + 2 1.414214 7.389056 + + You can call transform on a GroupBy object: + + >>> df = pd.DataFrame( + ... { + ... { + ... "Date": [ + ... "2015-05-08", + ... "2015-05-07", + ... "2015-05-06", + ... "2015-05-05", + ... "2015-05-08", + ... "2015-05-07", + ... "2015-05-06", + ... "2015-05-05", + ... ], + ... "Data": [5, 8, 6, 1, 50, 100, 60, 120], + ... } + ... } + ... ) + >>> df + Date Data + 0 2015-05-08 5 + 1 2015-05-07 8 + 2 2015-05-06 6 + 3 2015-05-05 1 + 4 2015-05-08 50 + 5 2015-05-07 100 + 6 2015-05-06 60 + 7 2015-05-05 120 + >>> df.groupby("Date")["Data"].transform("sum") + 0 55 + 1 108 + 2 66 + 3 121 + 4 55 + 5 108 + 6 66 + 7 121 + Name: Data, dtype: int64 + + >>> df = pd.DataFrame( + ... { + ... { + ... "c": [1, 1, 1, 2, 2, 2, 2], + ... "type": ["m", "n", "o", "m", "m", "n", "n"], + ... } + ... } + ... ) + >>> df + c type + 0 1 m + 1 1 n + 2 1 o + 3 2 m + 4 2 m + 5 2 n + 6 2 n + >>> df["size"] = df.groupby("c")["type"].transform(len) + >>> df + c type size + 0 1 m 3 + 1 1 n 3 + 2 1 o 3 + 3 2 m 4 + 4 2 m 4 + 5 2 n 4 + 6 2 n 4 + """ return self._transform( func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs ) @@ -899,8 +1035,246 @@ def nunique(self, dropna: bool = True) -> Series | DataFrame: result.index = default_index(len(result)) return result - @doc(Series.describe) def describe(self, percentiles=None, include=None, exclude=None) -> Series: + """ + Generate descriptive statistics. + + Descriptive statistics include those that summarize the central + tendency, dispersion and shape of a + dataset's distribution, excluding ``NaN`` values. + + Analyzes both numeric and object series, as well + as ``DataFrame`` column sets of mixed data types. The output + will vary depending on what is provided. Refer to the notes + below for more detail. + + Parameters + ---------- + percentiles : list-like of numbers, optional + The percentiles to include in the output. All should + fall between 0 and 1. The default, ``None``, will automatically + return the 25th, 50th, and 75th percentiles. + include : 'all', list-like of dtypes or None (default), optional + A white list of data types to include in the result. Ignored + for ``Series``. Here are the options: + + - 'all' : All columns of the input will be included in the output. + - A list-like of dtypes : Limits the results to the + provided data types. + To limit the result to numeric types submit + ``numpy.number``. To limit it instead to object columns submit + the ``numpy.object`` data type. Strings + can also be used in the style of + ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To + select pandas categorical columns, use ``'category'`` + - None (default) : The result will include all numeric columns. + exclude : list-like of dtypes or None (default), optional, + A black list of data types to omit from the result. Ignored + for ``Series``. Here are the options: + + - A list-like of dtypes : Excludes the provided data types + from the result. To exclude numeric types submit + ``numpy.number``. To exclude object columns submit the data + type ``numpy.object``. Strings can also be used in the style of + ``select_dtypes`` (e.g. ``df.describe(exclude=['O'])``). To + exclude pandas categorical columns, use ``'category'`` + - None (default) : The result will exclude nothing. + + Returns + ------- + Series or DataFrame + Summary statistics of the Series or Dataframe provided. + + See Also + -------- + DataFrame.count: Count number of non-NA/null observations. + DataFrame.max: Maximum of the values in the object. + DataFrame.min: Minimum of the values in the object. + DataFrame.mean: Mean of the values. + DataFrame.std: Standard deviation of the observations. + DataFrame.select_dtypes: Subset of a DataFrame including/excluding + columns based on their dtype. + + Notes + ----- + For numeric data, the result's index will include ``count``, + ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and + upper percentiles. By default the lower percentile is ``25`` and the + upper percentile is ``75``. The ``50`` percentile is the + same as the median. + + For object data (e.g. strings), the result's index + will include ``count``, ``unique``, ``top``, and ``freq``. The ``top`` + is the most common value. The ``freq`` is the most common value's + frequency. + + If multiple object values have the highest count, then the + ``count`` and ``top`` results will be arbitrarily chosen from + among those with the highest count. + + For mixed data types provided via a ``DataFrame``, the default is to + return only an analysis of numeric columns. If the DataFrame consists + only of object and categorical data without any numeric columns, the + default is to return an analysis of both the object and categorical + columns. If ``include='all'`` is provided as an option, the result + will include a union of attributes of each type. + + The `include` and `exclude` parameters can be used to limit + which columns in a ``DataFrame`` are analyzed for the output. + The parameters are ignored when analyzing a ``Series``. + + Examples + -------- + Describing a numeric ``Series``. + + >>> s = pd.Series([1, 2, 3]) + >>> s.describe() + count 3.0 + mean 2.0 + std 1.0 + min 1.0 + 25% 1.5 + 50% 2.0 + 75% 2.5 + max 3.0 + dtype: float64 + + Describing a categorical ``Series``. + + >>> s = pd.Series(["a", "a", "b", "c"]) + >>> s.describe() + count 4 + unique 3 + top a + freq 2 + dtype: object + + Describing a timestamp ``Series``. + + >>> s = pd.Series( + ... [ + ... np.datetime64("2000-01-01"), + ... np.datetime64("2010-01-01"), + ... np.datetime64("2010-01-01"), + ... ] + ... ) + >>> s.describe() + count 3 + mean 2006-09-01 08:00:00 + min 2000-01-01 00:00:00 + 25% 2004-12-31 12:00:00 + 50% 2010-01-01 00:00:00 + 75% 2010-01-01 00:00:00 + max 2010-01-01 00:00:00 + dtype: object + + Describing a ``DataFrame``. By default only numeric fields + are returned. + + >>> df = pd.DataFrame( + ... { + ... "categorical": pd.Categorical(["d", "e", "f"]), + ... "numeric": [1, 2, 3], + ... "object": ["a", "b", "c"], + ... } + ... ) + >>> df.describe() + numeric + count 3.0 + mean 2.0 + std 1.0 + min 1.0 + 25% 1.5 + 50% 2.0 + 75% 2.5 + max 3.0 + + Describing all columns of a ``DataFrame`` regardless of data type. + + >>> df.describe(include="all") # doctest: +SKIP + categorical numeric object + count 3 3.0 3 + unique 3 NaN 3 + top f NaN a + freq 1 NaN 1 + mean NaN 2.0 NaN + std NaN 1.0 NaN + min NaN 1.0 NaN + 25% NaN 1.5 NaN + 50% NaN 2.0 NaN + 75% NaN 2.5 NaN + max NaN 3.0 NaN + + Describing a column from a ``DataFrame`` by accessing it as + an attribute. + + >>> df.numeric.describe() + count 3.0 + mean 2.0 + std 1.0 + min 1.0 + 25% 1.5 + 50% 2.0 + 75% 2.5 + max 3.0 + Name: numeric, dtype: float64 + + Including only numeric columns in a ``DataFrame`` description. + + >>> df.describe(include=[np.number]) + numeric + count 3.0 + mean 2.0 + std 1.0 + min 1.0 + 25% 1.5 + 50% 2.0 + 75% 2.5 + max 3.0 + + Including only string columns in a ``DataFrame`` description. + + >>> df.describe(include=[object]) # doctest: +SKIP + object + count 3 + unique 3 + top a + freq 1 + + Including only categorical columns from a ``DataFrame`` description. + + >>> df.describe(include=["category"]) + categorical + count 3 + unique 3 + top d + freq 1 + + Excluding numeric columns from a ``DataFrame`` description. + + >>> df.describe(exclude=[np.number]) # doctest: +SKIP + categorical object + count 3 3 + unique 3 3 + top f a + freq 1 1 + + Excluding object columns from a ``DataFrame`` description. + + >>> df.describe(exclude=[object]) # doctest: +SKIP + categorical numeric + count 3 3.0 + unique 3 NaN + top f NaN + freq 1 NaN + mean NaN 2.0 + std NaN 1.0 + min NaN 1.0 + 25% NaN 1.5 + 50% NaN 2.0 + 75% NaN 2.5 + max NaN 3.0 + """ return super().describe( percentiles=percentiles, include=include, exclude=exclude ) @@ -1385,15 +1759,333 @@ def alt(obj): ) @property - @doc(Series.plot.__doc__) def plot(self) -> GroupByPlot: + """ + Make plots of Series or DataFrame. + + Uses the backend specified by the + option ``plotting.backend``. By default, matplotlib is used. + + Parameters + ---------- + data : Series or DataFrame + The object for which the method is called. + + Attributes + ---------- + x : label or position, default None + Only used if data is a DataFrame. + y : label, position or list of label, positions, default None + Allows plotting of one column versus another. Only used if data is a + DataFrame. + kind : str + The kind of plot to produce: + + - 'line' : line plot (default) + - 'bar' : vertical bar plot + - 'barh' : horizontal bar plot + - 'hist' : histogram + - 'box' : boxplot + - 'kde' : Kernel Density Estimation plot + - 'density' : same as 'kde' + - 'area' : area plot + - 'pie' : pie plot + - 'scatter' : scatter plot (DataFrame only) + - 'hexbin' : hexbin plot (DataFrame only) + ax : matplotlib axes object, default None + An axes of the current figure. + subplots : bool or sequence of iterables, default False + Whether to group columns into subplots: + + - ``False`` : No subplots will be used + - ``True`` : Make separate subplots for each column. + - sequence of iterables of column labels: Create a subplot for each + group of columns. For example `[('a', 'c'), ('b', 'd')]` will + create 2 subplots: one with columns 'a' and 'c', and one + with columns 'b' and 'd'. Remaining columns that aren't specified + will be plotted in additional subplots (one per column). + + sharex : bool, default True if ax is None else False + In case ``subplots=True``, share x axis and set some x axis labels + to invisible; defaults to True if ax is None otherwise False if + an ax is passed in; Be aware, that passing in both an ax and + ``sharex=True`` will alter all x axis labels for all axis in a figure. + sharey : bool, default False + In case ``subplots=True``, + share y axis and set some y axis labels to invisible. + layout : tuple, optional + (rows, columns) for the layout of subplots. + figsize : a tuple (width, height) in inches + Size of a figure object. + use_index : bool, default True + Use index as ticks for x axis. + title : str or list + Title to use for the plot. If a string is passed, print the string + at the top of the figure. If a list is passed and `subplots` is + True, print each item in the list above the corresponding subplot. + grid : bool, default None (matlab style default) + Axis grid lines. + legend : bool or {'reverse'} + Place legend on axis subplots. + style : list or dict + The matplotlib line style per column. + logx : bool or 'sym', default False + Use log scaling or symlog scaling on x axis. + + logy : bool or 'sym' default False + Use log scaling or symlog scaling on y axis. + + loglog : bool or 'sym', default False + Use log scaling or symlog scaling on both x and y axes. + + xticks : sequence + Values to use for the xticks. + yticks : sequence + Values to use for the yticks. + xlim : 2-tuple/list + Set the x limits of the current axes. + ylim : 2-tuple/list + Set the y limits of the current axes. + xlabel : label, optional + Name to use for the xlabel on x-axis. + Default uses index name as xlabel, or the + x-column name for planar plots. + + .. versionchanged:: 2.0.0 + + Now applicable to histograms. + + ylabel : label, optional + Name to use for the ylabel on y-axis. Default will show no ylabel, or the + y-column name for planar plots. + + .. versionchanged:: 2.0.0 + + Now applicable to histograms. + + rot : float, default None + Rotation for ticks (xticks for vertical, yticks for horizontal + plots). + fontsize : float, default None + Font size for xticks and yticks. + colormap : str or matplotlib colormap object, default None + Colormap to select colors from. If string, load colormap with that + name from matplotlib. + colorbar : bool, optional + If True, plot colorbar (only relevant for 'scatter' and 'hexbin' + plots). + position : float + Specify relative alignments for bar plot layout. + From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 + (center). + table : bool, Series or DataFrame, default False + If True, draw a table using the data in the DataFrame and the data + will be transposed to meet matplotlib's default layout. + If a Series or DataFrame is passed, use passed data to draw a + table. + yerr : DataFrame, Series, array-like, dict and str + See :ref:`Plotting with Error Bars ` for + detail. + xerr : DataFrame, Series, array-like, dict and str + Equivalent to yerr. + stacked : bool, default False in line and bar plots, and True in area plot + If True, create stacked plot. + secondary_y : bool or sequence, default False + Whether to plot on the secondary y-axis if a list/tuple, which + columns to plot on secondary y-axis. + mark_right : bool, default True + When using a secondary_y axis, automatically mark the column + labels with "(right)" in the legend. + include_bool : bool, default is False + If True, boolean values can be plotted. + backend : str, default None + Backend to use instead of the backend specified in the option + ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to + specify the ``plotting.backend`` for the whole session, set + ``pd.options.plotting.backend``. + **kwargs + Options to pass to matplotlib plotting method. + + Returns + ------- + :class:`matplotlib.axes.Axes` or numpy.ndarray of them + If the backend is not the default matplotlib one, the return value + will be the object returned by the backend. + + See Also + -------- + matplotlib.pyplot.plot : Plot y versus x as lines and/or markers. + DataFrame.hist : Make a histogram. + DataFrame.boxplot : Make a box plot. + DataFrame.plot.scatter : Make a scatter plot with varying marker + point size and color. + DataFrame.plot.hexbin : Make a hexagonal binning plot of + two variables. + DataFrame.plot.kde : Make Kernel Density Estimate plot using + Gaussian kernels. + DataFrame.plot.area : Make a stacked area plot. + DataFrame.plot.bar : Make a bar plot. + DataFrame.plot.barh : Make a horizontal bar plot. + + Notes + ----- + - See matplotlib documentation online for more on this subject + - If `kind` = 'bar' or 'barh', you can specify relative alignments + for bar plot layout by `position` keyword. + From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 + (center) + + Examples + -------- + For Series: + + .. plot:: + :context: close-figs + + >>> ser = pd.Series([1, 2, 3, 3]) + >>> plot = ser.plot(kind="hist", title="My plot") + + For DataFrame: + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame( + ... { + ... "length": [1.5, 0.5, 1.2, 0.9, 3], + ... "width": [0.7, 0.2, 0.15, 0.2, 1.1], + ... }, + ... index=["pig", "rabbit", "duck", "chicken", "horse"], + ... ) + >>> plot = df.plot(title="DataFrame Plot") + + For SeriesGroupBy: + + .. plot:: + :context: close-figs + + >>> lst = [-1, -2, -3, 1, 2, 3] + >>> ser = pd.Series([1, 2, 2, 4, 6, 6], index=lst) + >>> plot = ser.groupby(lambda x: x > 0).plot(title="SeriesGroupBy Plot") + + For DataFrameGroupBy: + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({"col1": [1, 2, 3, 4], "col2": ["A", "B", "A", "B"]}) + >>> plot = df.groupby("col2").plot( + ... kind="bar", title="DataFrameGroupBy Plot" + ... ) + """ result = GroupByPlot(self) return result - @doc(Series.nlargest.__doc__) def nlargest( self, n: int = 5, keep: Literal["first", "last", "all"] = "first" ) -> Series: + """ + Return the largest `n` elements. + + Parameters + ---------- + n : int, default 5 + Return this many descending sorted values. + keep : {'first', 'last', 'all'}, default 'first' + When there are duplicate values that cannot all fit in a + Series of `n` elements: + + - ``first`` : return the first `n` occurrences in order + of appearance. + - ``last`` : return the last `n` occurrences in reverse + order of appearance. + - ``all`` : keep all occurrences. This can result in a Series of + size larger than `n`. + + Returns + ------- + Series + The `n` largest values in the Series, sorted in decreasing order. + + See Also + -------- + Series.nsmallest: Get the `n` smallest elements. + Series.sort_values: Sort Series by values. + Series.head: Return the first `n` rows. + + Notes + ----- + Faster than ``.sort_values(ascending=False).head(n)`` for small `n` + relative to the size of the ``Series`` object. + + Examples + -------- + >>> countries_population = { + ... "Italy": 59000000, + ... "France": 65000000, + ... "Malta": 434000, + ... "Maldives": 434000, + ... "Brunei": 434000, + ... "Iceland": 337000, + ... "Nauru": 11300, + ... "Tuvalu": 11300, + ... "Anguilla": 11300, + ... "Montserrat": 5200, + ... } + >>> s = pd.Series(countries_population) + >>> s + Italy 59000000 + France 65000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + Iceland 337000 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Montserrat 5200 + dtype: int64 + + The `n` largest elements where ``n=5`` by default. + + >>> s.nlargest() + France 65000000 + Italy 59000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + dtype: int64 + + The `n` largest elements where ``n=3``. Default `keep` value is 'first' + so Malta will be kept. + + >>> s.nlargest(3) + France 65000000 + Italy 59000000 + Malta 434000 + dtype: int64 + + The `n` largest elements where ``n=3`` and keeping the last duplicates. + Brunei will be kept since it is the last with value 434000 based on + the index order. + + >>> s.nlargest(3, keep="last") + France 65000000 + Italy 59000000 + Brunei 434000 + dtype: int64 + + The `n` largest elements where ``n=3`` with all duplicates kept. Note + that the returned Series has five elements due to the three duplicates. + + >>> s.nlargest(3, keep="all") + France 65000000 + Italy 59000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + dtype: int64 + """ f = partial(Series.nlargest, n=n, keep=keep) data = self._obj_with_exclusions # Don't change behavior if result index happens to be the same, i.e. @@ -1401,10 +2093,110 @@ def nlargest( result = self._python_apply_general(f, data, not_indexed_same=True) return result - @doc(Series.nsmallest.__doc__) def nsmallest( self, n: int = 5, keep: Literal["first", "last", "all"] = "first" ) -> Series: + """ + Return the smallest `n` elements. + + Parameters + ---------- + n : int, default 5 + Return this many ascending sorted values. + keep : {'first', 'last', 'all'}, default 'first' + When there are duplicate values that cannot all fit in a + Series of `n` elements: + + - ``first`` : return the first `n` occurrences in order + of appearance. + - ``last`` : return the last `n` occurrences in reverse + order of appearance. + - ``all`` : keep all occurrences. This can result in a Series of + size larger than `n`. + + Returns + ------- + Series + The `n` smallest values in the Series, sorted in increasing order. + + See Also + -------- + Series.nlargest: Get the `n` largest elements. + Series.sort_values: Sort Series by values. + Series.head: Return the first `n` rows. + + Notes + ----- + Faster than ``.sort_values().head(n)`` for small `n` relative to + the size of the ``Series`` object. + + Examples + -------- + >>> countries_population = { + ... "Italy": 59000000, + ... "France": 65000000, + ... "Brunei": 434000, + ... "Malta": 434000, + ... "Maldives": 434000, + ... "Iceland": 337000, + ... "Nauru": 11300, + ... "Tuvalu": 11300, + ... "Anguilla": 11300, + ... "Montserrat": 5200, + ... } + >>> s = pd.Series(countries_population) + >>> s + Italy 59000000 + France 65000000 + Brunei 434000 + Malta 434000 + Maldives 434000 + Iceland 337000 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Montserrat 5200 + dtype: int64 + + The `n` smallest elements where ``n=5`` by default. + + >>> s.nsmallest() + Montserrat 5200 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Iceland 337000 + dtype: int64 + + The `n` smallest elements where ``n=3``. Default `keep` value is + 'first' so Nauru and Tuvalu will be kept. + + >>> s.nsmallest(3) + Montserrat 5200 + Nauru 11300 + Tuvalu 11300 + dtype: int64 + + The `n` smallest elements where ``n=3`` and keeping the last + duplicates. Anguilla and Tuvalu will be kept since they are the last + with value 11300 based on the index order. + + >>> s.nsmallest(3, keep="last") + Montserrat 5200 + Anguilla 11300 + Tuvalu 11300 + dtype: int64 + + The `n` smallest elements where ``n=3`` with all duplicates kept. Note + that the returned Series has four elements due to the three duplicates. + + >>> s.nsmallest(3, keep="all") + Montserrat 5200 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + dtype: int64 + """ f = partial(Series.nsmallest, n=n, keep=keep) data = self._obj_with_exclusions # Don't change behavior if result index happens to be the same, i.e. @@ -2423,9 +3215,145 @@ def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs): """ ) - @Substitution(klass="DataFrame", example=__examples_dataframe_doc) - @Appender(_transform_template) def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): + """ + Call ``func`` on self producing a DataFrame with the same axis shape as self. + + Parameters + ---------- + func : function, str, list-like or dict-like + Function to use for transforming the data. If a function, must either + work when passed a DataFrame or when passed to DataFrame.apply. If func + is both list-like and dict-like, dict-like behavior takes precedence. + + Accepted combinations are: + + - function + - string function name + - list-like of functions and/or function names, e.g. ``[np.exp, 'sqrt']`` + - dict-like of axis labels -> functions, + function names or list-like of such. + axis : {0 or 'index', 1 or 'columns'}, default 0 + If 0 or 'index': apply function to each column. + If 1 or 'columns': apply function to each row. + *args + Positional arguments to pass to `func`. + **kwargs + Keyword arguments to pass to `func`. + + Returns + ------- + DataFrame + A DataFrame that must have the same length as self. + + Raises + ------ + ValueError : If the returned DataFrame has a different length than self. + + See Also + -------- + DataFrame.agg : Only perform aggregating type operations. + DataFrame.apply : Invoke function on a DataFrame. + + Notes + ----- + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` + for more details. + + Examples + -------- + >>> df = pd.DataFrame({"A": range(3), "B": range(1, 4)}) + >>> df + A B + 0 0 1 + 1 1 2 + 2 2 3 + >>> df.transform(lambda x: x + 1) + A B + 0 1 2 + 1 2 3 + 2 3 4 + + Even though the resulting DataFrame must have the same length as the + input DataFrame, it is possible to provide several input functions: + + >>> s = pd.Series(range(3)) + >>> s + 0 0 + 1 1 + 2 2 + dtype: int64 + >>> s.transform([np.sqrt, np.exp]) + sqrt exp + 0 0.000000 1.000000 + 1 1.000000 2.718282 + 2 1.414214 7.389056 + + You can call transform on a GroupBy object: + + >>> df = pd.DataFrame( + ... { + ... "Date": [ + ... "2015-05-08", + ... "2015-05-07", + ... "2015-05-06", + ... "2015-05-05", + ... "2015-05-08", + ... "2015-05-07", + ... "2015-05-06", + ... "2015-05-05", + ... ], + ... "Data": [5, 8, 6, 1, 50, 100, 60, 120], + ... } + ... ) + >>> df + Date Data + 0 2015-05-08 5 + 1 2015-05-07 8 + 2 2015-05-06 6 + 3 2015-05-05 1 + 4 2015-05-08 50 + 5 2015-05-07 100 + 6 2015-05-06 60 + 7 2015-05-05 120 + >>> df.groupby("Date")["Data"].transform("sum") + 0 55 + 1 108 + 2 66 + 3 121 + 4 55 + 5 108 + 6 66 + 7 121 + Name: Data, dtype: int64 + + >>> df = pd.DataFrame( + ... { + ... "c": [1, 1, 1, 2, 2, 2, 2], + ... "type": ["m", "n", "o", "m", "m", "n", "n"], + ... } + ... ) + >>> df + c type + 0 1 m + 1 1 n + 2 1 o + 3 2 m + 4 2 m + 5 2 n + 6 2 n + >>> df["size"] = df.groupby("c")["type"].transform(len) + >>> df + c type size + 0 1 m 3 + 1 1 n 3 + 2 1 o 3 + 3 2 m 4 + 4 2 m 4 + 5 2 n 4 + 6 2 n 4 + """ return self._transform( func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs ) @@ -3256,30 +4184,417 @@ def kurt( ) @property - @doc(DataFrame.plot.__doc__) def plot(self) -> GroupByPlot: + """ + Make plots of Series or DataFrame. + + Uses the backend specified by the + option ``plotting.backend``. By default, matplotlib is used. + + Parameters + ---------- + data : Series or DataFrame + The object for which the method is called. + + Attributes + ---------- + x : label or position, default None + Only used if data is a DataFrame. + y : label, position or list of label, positions, default None + Allows plotting of one column versus another. Only used if data is a + DataFrame. + kind : str + The kind of plot to produce: + + - 'line' : line plot (default) + - 'bar' : vertical bar plot + - 'barh' : horizontal bar plot + - 'hist' : histogram + - 'box' : boxplot + - 'kde' : Kernel Density Estimation plot + - 'density' : same as 'kde' + - 'area' : area plot + - 'pie' : pie plot + - 'scatter' : scatter plot (DataFrame only) + - 'hexbin' : hexbin plot (DataFrame only) + ax : matplotlib axes object, default None + An axes of the current figure. + subplots : bool or sequence of iterables, default False + Whether to group columns into subplots: + + - ``False`` : No subplots will be used + - ``True`` : Make separate subplots for each column. + - sequence of iterables of column labels: Create a subplot for each + group of columns. For example `[('a', 'c'), ('b', 'd')]` will + create 2 subplots: one with columns 'a' and 'c', and one + with columns 'b' and 'd'. Remaining columns that aren't specified + will be plotted in additional subplots (one per column). + + sharex : bool, default True if ax is None else False + In case ``subplots=True``, share x axis and set some x axis labels + to invisible; defaults to True if ax is None otherwise False if + an ax is passed in; Be aware, that passing in both an ax and + ``sharex=True`` will alter all x axis labels for all axis in a figure. + sharey : bool, default False + In case ``subplots=True``, + share y axis and set some y axis labels to invisible. + layout : tuple, optional + (rows, columns) for the layout of subplots. + figsize : a tuple (width, height) in inches + Size of a figure object. + use_index : bool, default True + Use index as ticks for x axis. + title : str or list + Title to use for the plot. If a string is passed, print the string + at the top of the figure. If a list is passed and `subplots` is + True, print each item in the list above the corresponding subplot. + grid : bool, default None (matlab style default) + Axis grid lines. + legend : bool or {'reverse'} + Place legend on axis subplots. + style : list or dict + The matplotlib line style per column. + logx : bool or 'sym', default False + Use log scaling or symlog scaling on x axis. + + logy : bool or 'sym' default False + Use log scaling or symlog scaling on y axis. + + loglog : bool or 'sym', default False + Use log scaling or symlog scaling on both x and y axes. + + xticks : sequence + Values to use for the xticks. + yticks : sequence + Values to use for the yticks. + xlim : 2-tuple/list + Set the x limits of the current axes. + ylim : 2-tuple/list + Set the y limits of the current axes. + xlabel : label, optional + Name to use for the xlabel on x-axis. + Default uses index name as xlabel, or the + x-column name for planar plots. + + .. versionchanged:: 2.0.0 + + Now applicable to histograms. + + ylabel : label, optional + Name to use for the ylabel on y-axis. Default will show no ylabel, or the + y-column name for planar plots. + + .. versionchanged:: 2.0.0 + + Now applicable to histograms. + + rot : float, default None + Rotation for ticks (xticks for vertical, yticks for horizontal + plots). + fontsize : float, default None + Font size for xticks and yticks. + colormap : str or matplotlib colormap object, default None + Colormap to select colors from. If string, load colormap with that + name from matplotlib. + colorbar : bool, optional + If True, plot colorbar (only relevant for 'scatter' and 'hexbin' + plots). + position : float + Specify relative alignments for bar plot layout. + From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 + (center). + table : bool, Series or DataFrame, default False + If True, draw a table using the data in the DataFrame and the data + will be transposed to meet matplotlib's default layout. + If a Series or DataFrame is passed, use passed data to draw a + table. + yerr : DataFrame, Series, array-like, dict and str + See :ref:`Plotting with Error Bars ` for + detail. + xerr : DataFrame, Series, array-like, dict and str + Equivalent to yerr. + stacked : bool, default False in line and bar plots, and True in area plot + If True, create stacked plot. + secondary_y : bool or sequence, default False + Whether to plot on the secondary y-axis if a list/tuple, which + columns to plot on secondary y-axis. + mark_right : bool, default True + When using a secondary_y axis, automatically mark the column + labels with "(right)" in the legend. + include_bool : bool, default is False + If True, boolean values can be plotted. + backend : str, default None + Backend to use instead of the backend specified in the option + ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to + specify the ``plotting.backend`` for the whole session, set + ``pd.options.plotting.backend``. + **kwargs + Options to pass to matplotlib plotting method. + + Returns + ------- + :class:`matplotlib.axes.Axes` or numpy.ndarray of them + If the backend is not the default matplotlib one, the return value + will be the object returned by the backend. + + See Also + -------- + matplotlib.pyplot.plot : Plot y versus x as lines and/or markers. + DataFrame.hist : Make a histogram. + DataFrame.boxplot : Make a box plot. + DataFrame.plot.scatter : Make a scatter plot with varying marker + point size and color. + DataFrame.plot.hexbin : Make a hexagonal binning plot of + two variables. + DataFrame.plot.kde : Make Kernel Density Estimate plot using + Gaussian kernels. + DataFrame.plot.area : Make a stacked area plot. + DataFrame.plot.bar : Make a bar plot. + DataFrame.plot.barh : Make a horizontal bar plot. + + Notes + ----- + - See matplotlib documentation online for more on this subject + - If `kind` = 'bar' or 'barh', you can specify relative alignments + for bar plot layout by `position` keyword. + From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 + (center) + + Examples + -------- + For Series: + + .. plot:: + :context: close-figs + + >>> ser = pd.Series([1, 2, 3, 3]) + >>> plot = ser.plot(kind="hist", title="My plot") + + For DataFrame: + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame( + ... { + ... "length": [1.5, 0.5, 1.2, 0.9, 3], + ... "width": [0.7, 0.2, 0.15, 0.2, 1.1], + ... }, + ... index=["pig", "rabbit", "duck", "chicken", "horse"], + ... ) + >>> plot = df.plot(title="DataFrame Plot") + + For SeriesGroupBy: + + .. plot:: + :context: close-figs + + >>> lst = [-1, -2, -3, 1, 2, 3] + >>> ser = pd.Series([1, 2, 2, 4, 6, 6], index=lst) + >>> plot = ser.groupby(lambda x: x > 0).plot(title="SeriesGroupBy Plot") + + For DataFrameGroupBy: + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({"col1": [1, 2, 3, 4], "col2": ["A", "B", "A", "B"]}) + >>> plot = df.groupby("col2").plot( + ... kind="bar", title="DataFrameGroupBy Plot" + ... ) + """ result = GroupByPlot(self) return result - @doc(DataFrame.corr.__doc__) def corr( self, method: str | Callable[[np.ndarray, np.ndarray], float] = "pearson", min_periods: int = 1, numeric_only: bool = False, ) -> DataFrame: + """ + Compute pairwise correlation of columns, excluding NA/null values. + + Parameters + ---------- + method : {'pearson', 'kendall', 'spearman'} or callable + Method of correlation: + + * pearson : standard correlation coefficient + * kendall : Kendall Tau correlation coefficient + * spearman : Spearman rank correlation + * callable: callable with input two 1d ndarrays + and returning a float. Note that the returned matrix from corr + will have 1 along the diagonals and will be symmetric + regardless of the callable's behavior. + min_periods : int, optional + Minimum number of observations required per pair of columns + to have a valid result. Currently only available for Pearson + and Spearman correlation. + numeric_only : bool, default False + Include only `float`, `int` or `boolean` data. + + .. versionchanged:: 2.0.0 + The default value of ``numeric_only`` is now ``False``. + + Returns + ------- + DataFrame + Correlation matrix. + + See Also + -------- + DataFrame.corrwith : Compute pairwise correlation with another + DataFrame or Series. + Series.corr : Compute the correlation between two Series. + + Notes + ----- + Pearson, Kendall and Spearman correlation are currently + computed using pairwise complete observations. + + * `Pearson correlation coefficient `_ + * `Kendall rank correlation coefficient `_ + * `Spearman's rank correlation coefficient `_ + + Examples + -------- + >>> def histogram_intersection(a, b): + ... v = np.minimum(a, b).sum().round(decimals=1) + ... return v + >>> df = pd.DataFrame( + ... [(0.2, 0.3), (0.0, 0.6), (0.6, 0.0), (0.2, 0.1)], + ... columns=["dogs", "cats"], + ... ) + >>> df.corr(method=histogram_intersection) + dogs cats + dogs 1.0 0.3 + cats 0.3 1.0 + + >>> df = pd.DataFrame( + ... [(1, 1), (2, np.nan), (np.nan, 3), (4, 4)], columns=["dogs", "cats"] + ... ) + >>> df.corr(min_periods=3) + dogs cats + dogs 1.0 NaN + cats NaN 1.0 + """ result = self._op_via_apply( "corr", method=method, min_periods=min_periods, numeric_only=numeric_only ) return result - @doc(DataFrame.cov.__doc__) def cov( self, min_periods: int | None = None, ddof: int | None = 1, numeric_only: bool = False, ) -> DataFrame: + """ + Compute pairwise covariance of columns, excluding NA/null values. + + Compute the pairwise covariance among the series of a DataFrame. + The returned data frame is the `covariance matrix + `__ of the columns + of the DataFrame. + + Both NA and null values are automatically excluded from the + calculation. (See the note below about bias from missing values.) + A threshold can be set for the minimum number of + observations for each value created. Comparisons with observations + below this threshold will be returned as ``NaN``. + + This method is generally used for the analysis of time series data to + understand the relationship between different measures + across time. + + Parameters + ---------- + min_periods : int, optional + Minimum number of observations required per pair of columns + to have a valid result. + + ddof : int, default 1 + Delta degrees of freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + This argument is applicable only when no ``nan`` is in the dataframe. + + numeric_only : bool, default False + Include only `float`, `int` or `boolean` data. + + .. versionchanged:: 2.0.0 + The default value of ``numeric_only`` is now ``False``. + + Returns + ------- + DataFrame + The covariance matrix of the series of the DataFrame. + + See Also + -------- + Series.cov : Compute covariance with another Series. + core.window.ewm.ExponentialMovingWindow.cov : Exponential weighted sample + covariance. + core.window.expanding.Expanding.cov : Expanding sample covariance. + core.window.rolling.Rolling.cov : Rolling sample covariance. + + Notes + ----- + Returns the covariance matrix of the DataFrame's time series. + The covariance is normalized by N-ddof. + + For DataFrames that have Series that are missing data (assuming that + data is `missing at random + `__) + the returned covariance matrix will be an unbiased estimate + of the variance and covariance between the member Series. + + However, for many applications this estimate may not be acceptable + because the estimate covariance matrix is not guaranteed to be positive + semi-definite. This could lead to estimate correlations having + absolute values which are greater than one, and/or a non-invertible + covariance matrix. See `Estimation of covariance matrices + `__ for more details. + + Examples + -------- + >>> df = pd.DataFrame( + ... [(1, 2), (0, 3), (2, 0), (1, 1)], columns=["dogs", "cats"] + ... ) + >>> df.cov() + dogs cats + dogs 0.666667 -1.000000 + cats -1.000000 1.666667 + + >>> np.random.seed(42) + >>> df = pd.DataFrame( + ... np.random.randn(1000, 5), columns=["a", "b", "c", "d", "e"] + ... ) + >>> df.cov() + a b c d e + a 0.998438 -0.020161 0.059277 -0.008943 0.014144 + b -0.020161 1.059352 -0.008543 -0.024738 0.009826 + c 0.059277 -0.008543 1.010670 -0.001486 -0.000271 + d -0.008943 -0.024738 -0.001486 0.921297 -0.013692 + e 0.014144 0.009826 -0.000271 -0.013692 0.977795 + + **Minimum number of periods** + + This method also supports an optional ``min_periods`` keyword + that specifies the required minimum number of non-NA observations for + each column pair in order to have a valid result: + + >>> np.random.seed(42) + >>> df = pd.DataFrame(np.random.randn(20, 3), columns=["a", "b", "c"]) + >>> df.loc[df.index[:5], "a"] = np.nan + >>> df.loc[df.index[5:10], "b"] = np.nan + >>> df.cov(min_periods=12) + a b c + a 0.316741 NaN -0.150812 + b NaN 1.248003 0.191417 + c -0.150812 0.191417 0.895202 + """ result = self._op_via_apply( "cov", min_periods=min_periods, ddof=ddof, numeric_only=numeric_only ) From 706090269bca6cfea10a0a8767497f500dc8ddf0 Mon Sep 17 00:00:00 2001 From: zhangbowen-coder <2439796518@qq.com> Date: Sun, 14 Dec 2025 22:41:22 +0800 Subject: [PATCH 06/10] update --- pandas/core/groupby/generic.py | 1507 +++++--------------------------- 1 file changed, 232 insertions(+), 1275 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 806769b3d4102..1738c6f1011d9 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -32,6 +32,7 @@ SpecificationError, ) from pandas.util._decorators import ( + doc, set_module, ) from pandas.util._exceptions import find_stack_level @@ -700,146 +701,138 @@ def _wrap_applied_output( def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): """ - Call ``func`` on self producing a Series with the same axis shape as self. + Call function producing a same-indexed Series on each group. + + Returns a Series having the same indexes as the original object + filled with the transformed values. Parameters ---------- - func : function, str, list-like or dict-like - Function to use for transforming the data. If a function, must either - work when passed a Series or when passed to Series.apply. If func - is both list-like and dict-like, dict-like behavior takes precedence. + func : function, str + Function to apply to each group. + See the Notes section below for requirements. - Accepted combinations are: + Accepted inputs are: - - function - - string function name - - list-like of functions and/or function names, e.g. ``[np.exp, 'sqrt']`` - - dict-like of axis labels -> functions, function names or list-like of such + - String + - Python function + - Numba JIT function with ``engine='numba'`` specified. - axis : {0 or 'index'} - Unused. Parameter needed for compatibility with DataFrame. + Only passing a single function is supported with this engine. + If the ``'numba'`` engine is chosen, the function must be + a user defined function with ``values`` and ``index`` as the + first and second arguments respectively in the function signature. + Each group's index will be passed to the user defined function + and optionally available for use. + If a string is chosen, then it needs to be the name + of the groupby method you want to use. *args - Positional arguments to pass to `func`. + Positional arguments to pass to func. + engine : str, default None + * ``'cython'`` : Runs the function through C-extensions from cython. + * ``'numba'`` : Runs the function through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` + or the global setting ``compute.use_numba`` + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be + applied to the function + **kwargs - Keyword arguments to pass to `func`. + Keyword arguments to be passed into func. Returns ------- Series - A Series that must have the same length as self. - - Raises - ------ - ValueError : If the returned Series has a different length than self. + Series with the same indexes as the original object filled + with transformed values. See Also -------- - Series.agg : Only perform aggregating type operations. - Series.apply : Invoke function on a Series. + Series.groupby.apply : Apply function ``func`` group-wise and combine + the results together. + Series.groupby.aggregate : Aggregate using one or more operations. + Series.transform : Call ``func`` on self producing a Series with the + same axis shape as self. Notes ----- - Functions that mutate the passed object can produce unexpected - behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` - for more details. + Each group is endowed the attribute 'name' in case you need to know + which group you are working on. + + The current implementation imposes three requirements on f: + + * f must return a value that either has the same shape as the input + subframe or can be broadcast to the shape of the input subframe. + For example, if `f` returns a scalar it will be broadcast to have the + same shape as the input subframe. + * if this is a DataFrame, f must support application column-by-column + in the subframe. If f also supports application to the entire subframe, + then a fast path is used starting from the second chunk. + * f must not mutate groups. Mutation is not supported and may + produce unexpected results. See :ref:`gotchas.udf-mutation` for more details. + + When using ``engine='numba'``, there will be no "fall back" behavior internally. + The group data and group index will be passed as numpy arrays to the JITed + user defined function, and no alternative execution attempts will be tried. + + The resulting dtype will reflect the return value of the passed ``func``, + see the examples below. + + .. versionchanged:: 2.0.0 + + When using ``.transform`` on a grouped DataFrame and + the transformation function returns a DataFrame, + pandas now aligns the result's index with the input's index. + You can call ``.to_numpy()`` on the result of + the transformation function to avoid alignment. Examples -------- - >>> df = pd.DataFrame({{"A": range(3), "B": range(1, 4)}}) - >>> df - A B - 0 0 1 - 1 1 2 - 2 2 3 - >>> df.transform(lambda x: x + 1) - A B - 0 1 2 - 1 2 3 - 2 3 4 - - Even though the resulting Series must have the same length as the - input Series, it is possible to provide several input functions: - - >>> s = pd.Series(range(3)) - >>> s - 0 0 - 1 1 - 2 2 - dtype: int64 - >>> s.transform([np.sqrt, np.exp]) - sqrt exp - 0 0.000000 1.000000 - 1 1.000000 2.718282 - 2 1.414214 7.389056 - - You can call transform on a GroupBy object: - >>> df = pd.DataFrame( - ... { - ... { - ... "Date": [ - ... "2015-05-08", - ... "2015-05-07", - ... "2015-05-06", - ... "2015-05-05", - ... "2015-05-08", - ... "2015-05-07", - ... "2015-05-06", - ... "2015-05-05", - ... ], - ... "Data": [5, 8, 6, 1, 50, 100, 60, 120], - ... } - ... } + >>> ser = pd.Series( + ... [390.0, 350.0, 30.0, 20.0], + ... index=["Falcon", "Falcon", "Parrot", "Parrot"], + ... name="Max Speed", ... ) - >>> df - Date Data - 0 2015-05-08 5 - 1 2015-05-07 8 - 2 2015-05-06 6 - 3 2015-05-05 1 - 4 2015-05-08 50 - 5 2015-05-07 100 - 6 2015-05-06 60 - 7 2015-05-05 120 - >>> df.groupby("Date")["Data"].transform("sum") - 0 55 - 1 108 - 2 66 - 3 121 - 4 55 - 5 108 - 6 66 - 7 121 - Name: Data, dtype: int64 + >>> grouped = ser.groupby([1, 1, 2, 2]) + >>> grouped.transform(lambda x: (x - x.mean()) / x.std()) + Falcon 0.707107 + Falcon -0.707107 + Parrot 0.707107 + Parrot -0.707107 + Name: Max Speed, dtype: float64 + + Broadcast result of the transformation + + >>> grouped.transform(lambda x: x.max() - x.min()) + Falcon 40.0 + Falcon 40.0 + Parrot 10.0 + Parrot 10.0 + Name: Max Speed, dtype: float64 - >>> df = pd.DataFrame( - ... { - ... { - ... "c": [1, 1, 1, 2, 2, 2, 2], - ... "type": ["m", "n", "o", "m", "m", "n", "n"], - ... } - ... } - ... ) - >>> df - c type - 0 1 m - 1 1 n - 2 1 o - 3 2 m - 4 2 m - 5 2 n - 6 2 n - >>> df["size"] = df.groupby("c")["type"].transform(len) - >>> df - c type size - 0 1 m 3 - 1 1 n 3 - 2 1 o 3 - 3 2 m 4 - 4 2 m 4 - 5 2 n 4 - 6 2 n 4 + >>> grouped.transform("mean") + Falcon 370.0 + Falcon 370.0 + Parrot 25.0 + Parrot 25.0 + Name: Max Speed, dtype: float64 + + The resulting dtype will reflect the return value of the passed ``func``, + for example: + + >>> grouped.transform(lambda x: x.astype(int).max()) + Falcon 390 + Falcon 390 + Parrot 30 + Parrot 30 + Name: Max Speed, dtype: int64 """ return self._transform( func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs @@ -1035,246 +1028,8 @@ def nunique(self, dropna: bool = True) -> Series | DataFrame: result.index = default_index(len(result)) return result + @doc(Series.describe) def describe(self, percentiles=None, include=None, exclude=None) -> Series: - """ - Generate descriptive statistics. - - Descriptive statistics include those that summarize the central - tendency, dispersion and shape of a - dataset's distribution, excluding ``NaN`` values. - - Analyzes both numeric and object series, as well - as ``DataFrame`` column sets of mixed data types. The output - will vary depending on what is provided. Refer to the notes - below for more detail. - - Parameters - ---------- - percentiles : list-like of numbers, optional - The percentiles to include in the output. All should - fall between 0 and 1. The default, ``None``, will automatically - return the 25th, 50th, and 75th percentiles. - include : 'all', list-like of dtypes or None (default), optional - A white list of data types to include in the result. Ignored - for ``Series``. Here are the options: - - - 'all' : All columns of the input will be included in the output. - - A list-like of dtypes : Limits the results to the - provided data types. - To limit the result to numeric types submit - ``numpy.number``. To limit it instead to object columns submit - the ``numpy.object`` data type. Strings - can also be used in the style of - ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To - select pandas categorical columns, use ``'category'`` - - None (default) : The result will include all numeric columns. - exclude : list-like of dtypes or None (default), optional, - A black list of data types to omit from the result. Ignored - for ``Series``. Here are the options: - - - A list-like of dtypes : Excludes the provided data types - from the result. To exclude numeric types submit - ``numpy.number``. To exclude object columns submit the data - type ``numpy.object``. Strings can also be used in the style of - ``select_dtypes`` (e.g. ``df.describe(exclude=['O'])``). To - exclude pandas categorical columns, use ``'category'`` - - None (default) : The result will exclude nothing. - - Returns - ------- - Series or DataFrame - Summary statistics of the Series or Dataframe provided. - - See Also - -------- - DataFrame.count: Count number of non-NA/null observations. - DataFrame.max: Maximum of the values in the object. - DataFrame.min: Minimum of the values in the object. - DataFrame.mean: Mean of the values. - DataFrame.std: Standard deviation of the observations. - DataFrame.select_dtypes: Subset of a DataFrame including/excluding - columns based on their dtype. - - Notes - ----- - For numeric data, the result's index will include ``count``, - ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and - upper percentiles. By default the lower percentile is ``25`` and the - upper percentile is ``75``. The ``50`` percentile is the - same as the median. - - For object data (e.g. strings), the result's index - will include ``count``, ``unique``, ``top``, and ``freq``. The ``top`` - is the most common value. The ``freq`` is the most common value's - frequency. - - If multiple object values have the highest count, then the - ``count`` and ``top`` results will be arbitrarily chosen from - among those with the highest count. - - For mixed data types provided via a ``DataFrame``, the default is to - return only an analysis of numeric columns. If the DataFrame consists - only of object and categorical data without any numeric columns, the - default is to return an analysis of both the object and categorical - columns. If ``include='all'`` is provided as an option, the result - will include a union of attributes of each type. - - The `include` and `exclude` parameters can be used to limit - which columns in a ``DataFrame`` are analyzed for the output. - The parameters are ignored when analyzing a ``Series``. - - Examples - -------- - Describing a numeric ``Series``. - - >>> s = pd.Series([1, 2, 3]) - >>> s.describe() - count 3.0 - mean 2.0 - std 1.0 - min 1.0 - 25% 1.5 - 50% 2.0 - 75% 2.5 - max 3.0 - dtype: float64 - - Describing a categorical ``Series``. - - >>> s = pd.Series(["a", "a", "b", "c"]) - >>> s.describe() - count 4 - unique 3 - top a - freq 2 - dtype: object - - Describing a timestamp ``Series``. - - >>> s = pd.Series( - ... [ - ... np.datetime64("2000-01-01"), - ... np.datetime64("2010-01-01"), - ... np.datetime64("2010-01-01"), - ... ] - ... ) - >>> s.describe() - count 3 - mean 2006-09-01 08:00:00 - min 2000-01-01 00:00:00 - 25% 2004-12-31 12:00:00 - 50% 2010-01-01 00:00:00 - 75% 2010-01-01 00:00:00 - max 2010-01-01 00:00:00 - dtype: object - - Describing a ``DataFrame``. By default only numeric fields - are returned. - - >>> df = pd.DataFrame( - ... { - ... "categorical": pd.Categorical(["d", "e", "f"]), - ... "numeric": [1, 2, 3], - ... "object": ["a", "b", "c"], - ... } - ... ) - >>> df.describe() - numeric - count 3.0 - mean 2.0 - std 1.0 - min 1.0 - 25% 1.5 - 50% 2.0 - 75% 2.5 - max 3.0 - - Describing all columns of a ``DataFrame`` regardless of data type. - - >>> df.describe(include="all") # doctest: +SKIP - categorical numeric object - count 3 3.0 3 - unique 3 NaN 3 - top f NaN a - freq 1 NaN 1 - mean NaN 2.0 NaN - std NaN 1.0 NaN - min NaN 1.0 NaN - 25% NaN 1.5 NaN - 50% NaN 2.0 NaN - 75% NaN 2.5 NaN - max NaN 3.0 NaN - - Describing a column from a ``DataFrame`` by accessing it as - an attribute. - - >>> df.numeric.describe() - count 3.0 - mean 2.0 - std 1.0 - min 1.0 - 25% 1.5 - 50% 2.0 - 75% 2.5 - max 3.0 - Name: numeric, dtype: float64 - - Including only numeric columns in a ``DataFrame`` description. - - >>> df.describe(include=[np.number]) - numeric - count 3.0 - mean 2.0 - std 1.0 - min 1.0 - 25% 1.5 - 50% 2.0 - 75% 2.5 - max 3.0 - - Including only string columns in a ``DataFrame`` description. - - >>> df.describe(include=[object]) # doctest: +SKIP - object - count 3 - unique 3 - top a - freq 1 - - Including only categorical columns from a ``DataFrame`` description. - - >>> df.describe(include=["category"]) - categorical - count 3 - unique 3 - top d - freq 1 - - Excluding numeric columns from a ``DataFrame`` description. - - >>> df.describe(exclude=[np.number]) # doctest: +SKIP - categorical object - count 3 3 - unique 3 3 - top f a - freq 1 1 - - Excluding object columns from a ``DataFrame`` description. - - >>> df.describe(exclude=[object]) # doctest: +SKIP - categorical numeric - count 3 3.0 - unique 3 NaN - top f NaN - freq 1 NaN - mean NaN 2.0 - std NaN 1.0 - min NaN 1.0 - 25% NaN 1.5 - 50% NaN 2.0 - 75% NaN 2.5 - max NaN 3.0 - """ return super().describe( percentiles=percentiles, include=include, exclude=exclude ) @@ -1759,333 +1514,15 @@ def alt(obj): ) @property + @doc(Series.plot.__doc__) def plot(self) -> GroupByPlot: - """ - Make plots of Series or DataFrame. - - Uses the backend specified by the - option ``plotting.backend``. By default, matplotlib is used. - - Parameters - ---------- - data : Series or DataFrame - The object for which the method is called. - - Attributes - ---------- - x : label or position, default None - Only used if data is a DataFrame. - y : label, position or list of label, positions, default None - Allows plotting of one column versus another. Only used if data is a - DataFrame. - kind : str - The kind of plot to produce: - - - 'line' : line plot (default) - - 'bar' : vertical bar plot - - 'barh' : horizontal bar plot - - 'hist' : histogram - - 'box' : boxplot - - 'kde' : Kernel Density Estimation plot - - 'density' : same as 'kde' - - 'area' : area plot - - 'pie' : pie plot - - 'scatter' : scatter plot (DataFrame only) - - 'hexbin' : hexbin plot (DataFrame only) - ax : matplotlib axes object, default None - An axes of the current figure. - subplots : bool or sequence of iterables, default False - Whether to group columns into subplots: - - - ``False`` : No subplots will be used - - ``True`` : Make separate subplots for each column. - - sequence of iterables of column labels: Create a subplot for each - group of columns. For example `[('a', 'c'), ('b', 'd')]` will - create 2 subplots: one with columns 'a' and 'c', and one - with columns 'b' and 'd'. Remaining columns that aren't specified - will be plotted in additional subplots (one per column). - - sharex : bool, default True if ax is None else False - In case ``subplots=True``, share x axis and set some x axis labels - to invisible; defaults to True if ax is None otherwise False if - an ax is passed in; Be aware, that passing in both an ax and - ``sharex=True`` will alter all x axis labels for all axis in a figure. - sharey : bool, default False - In case ``subplots=True``, - share y axis and set some y axis labels to invisible. - layout : tuple, optional - (rows, columns) for the layout of subplots. - figsize : a tuple (width, height) in inches - Size of a figure object. - use_index : bool, default True - Use index as ticks for x axis. - title : str or list - Title to use for the plot. If a string is passed, print the string - at the top of the figure. If a list is passed and `subplots` is - True, print each item in the list above the corresponding subplot. - grid : bool, default None (matlab style default) - Axis grid lines. - legend : bool or {'reverse'} - Place legend on axis subplots. - style : list or dict - The matplotlib line style per column. - logx : bool or 'sym', default False - Use log scaling or symlog scaling on x axis. - - logy : bool or 'sym' default False - Use log scaling or symlog scaling on y axis. - - loglog : bool or 'sym', default False - Use log scaling or symlog scaling on both x and y axes. - - xticks : sequence - Values to use for the xticks. - yticks : sequence - Values to use for the yticks. - xlim : 2-tuple/list - Set the x limits of the current axes. - ylim : 2-tuple/list - Set the y limits of the current axes. - xlabel : label, optional - Name to use for the xlabel on x-axis. - Default uses index name as xlabel, or the - x-column name for planar plots. - - .. versionchanged:: 2.0.0 - - Now applicable to histograms. - - ylabel : label, optional - Name to use for the ylabel on y-axis. Default will show no ylabel, or the - y-column name for planar plots. - - .. versionchanged:: 2.0.0 - - Now applicable to histograms. - - rot : float, default None - Rotation for ticks (xticks for vertical, yticks for horizontal - plots). - fontsize : float, default None - Font size for xticks and yticks. - colormap : str or matplotlib colormap object, default None - Colormap to select colors from. If string, load colormap with that - name from matplotlib. - colorbar : bool, optional - If True, plot colorbar (only relevant for 'scatter' and 'hexbin' - plots). - position : float - Specify relative alignments for bar plot layout. - From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 - (center). - table : bool, Series or DataFrame, default False - If True, draw a table using the data in the DataFrame and the data - will be transposed to meet matplotlib's default layout. - If a Series or DataFrame is passed, use passed data to draw a - table. - yerr : DataFrame, Series, array-like, dict and str - See :ref:`Plotting with Error Bars ` for - detail. - xerr : DataFrame, Series, array-like, dict and str - Equivalent to yerr. - stacked : bool, default False in line and bar plots, and True in area plot - If True, create stacked plot. - secondary_y : bool or sequence, default False - Whether to plot on the secondary y-axis if a list/tuple, which - columns to plot on secondary y-axis. - mark_right : bool, default True - When using a secondary_y axis, automatically mark the column - labels with "(right)" in the legend. - include_bool : bool, default is False - If True, boolean values can be plotted. - backend : str, default None - Backend to use instead of the backend specified in the option - ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to - specify the ``plotting.backend`` for the whole session, set - ``pd.options.plotting.backend``. - **kwargs - Options to pass to matplotlib plotting method. - - Returns - ------- - :class:`matplotlib.axes.Axes` or numpy.ndarray of them - If the backend is not the default matplotlib one, the return value - will be the object returned by the backend. - - See Also - -------- - matplotlib.pyplot.plot : Plot y versus x as lines and/or markers. - DataFrame.hist : Make a histogram. - DataFrame.boxplot : Make a box plot. - DataFrame.plot.scatter : Make a scatter plot with varying marker - point size and color. - DataFrame.plot.hexbin : Make a hexagonal binning plot of - two variables. - DataFrame.plot.kde : Make Kernel Density Estimate plot using - Gaussian kernels. - DataFrame.plot.area : Make a stacked area plot. - DataFrame.plot.bar : Make a bar plot. - DataFrame.plot.barh : Make a horizontal bar plot. - - Notes - ----- - - See matplotlib documentation online for more on this subject - - If `kind` = 'bar' or 'barh', you can specify relative alignments - for bar plot layout by `position` keyword. - From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 - (center) - - Examples - -------- - For Series: - - .. plot:: - :context: close-figs - - >>> ser = pd.Series([1, 2, 3, 3]) - >>> plot = ser.plot(kind="hist", title="My plot") - - For DataFrame: - - .. plot:: - :context: close-figs - - >>> df = pd.DataFrame( - ... { - ... "length": [1.5, 0.5, 1.2, 0.9, 3], - ... "width": [0.7, 0.2, 0.15, 0.2, 1.1], - ... }, - ... index=["pig", "rabbit", "duck", "chicken", "horse"], - ... ) - >>> plot = df.plot(title="DataFrame Plot") - - For SeriesGroupBy: - - .. plot:: - :context: close-figs - - >>> lst = [-1, -2, -3, 1, 2, 3] - >>> ser = pd.Series([1, 2, 2, 4, 6, 6], index=lst) - >>> plot = ser.groupby(lambda x: x > 0).plot(title="SeriesGroupBy Plot") - - For DataFrameGroupBy: - - .. plot:: - :context: close-figs - - >>> df = pd.DataFrame({"col1": [1, 2, 3, 4], "col2": ["A", "B", "A", "B"]}) - >>> plot = df.groupby("col2").plot( - ... kind="bar", title="DataFrameGroupBy Plot" - ... ) - """ result = GroupByPlot(self) return result + @doc(Series.nlargest.__doc__) def nlargest( self, n: int = 5, keep: Literal["first", "last", "all"] = "first" ) -> Series: - """ - Return the largest `n` elements. - - Parameters - ---------- - n : int, default 5 - Return this many descending sorted values. - keep : {'first', 'last', 'all'}, default 'first' - When there are duplicate values that cannot all fit in a - Series of `n` elements: - - - ``first`` : return the first `n` occurrences in order - of appearance. - - ``last`` : return the last `n` occurrences in reverse - order of appearance. - - ``all`` : keep all occurrences. This can result in a Series of - size larger than `n`. - - Returns - ------- - Series - The `n` largest values in the Series, sorted in decreasing order. - - See Also - -------- - Series.nsmallest: Get the `n` smallest elements. - Series.sort_values: Sort Series by values. - Series.head: Return the first `n` rows. - - Notes - ----- - Faster than ``.sort_values(ascending=False).head(n)`` for small `n` - relative to the size of the ``Series`` object. - - Examples - -------- - >>> countries_population = { - ... "Italy": 59000000, - ... "France": 65000000, - ... "Malta": 434000, - ... "Maldives": 434000, - ... "Brunei": 434000, - ... "Iceland": 337000, - ... "Nauru": 11300, - ... "Tuvalu": 11300, - ... "Anguilla": 11300, - ... "Montserrat": 5200, - ... } - >>> s = pd.Series(countries_population) - >>> s - Italy 59000000 - France 65000000 - Malta 434000 - Maldives 434000 - Brunei 434000 - Iceland 337000 - Nauru 11300 - Tuvalu 11300 - Anguilla 11300 - Montserrat 5200 - dtype: int64 - - The `n` largest elements where ``n=5`` by default. - - >>> s.nlargest() - France 65000000 - Italy 59000000 - Malta 434000 - Maldives 434000 - Brunei 434000 - dtype: int64 - - The `n` largest elements where ``n=3``. Default `keep` value is 'first' - so Malta will be kept. - - >>> s.nlargest(3) - France 65000000 - Italy 59000000 - Malta 434000 - dtype: int64 - - The `n` largest elements where ``n=3`` and keeping the last duplicates. - Brunei will be kept since it is the last with value 434000 based on - the index order. - - >>> s.nlargest(3, keep="last") - France 65000000 - Italy 59000000 - Brunei 434000 - dtype: int64 - - The `n` largest elements where ``n=3`` with all duplicates kept. Note - that the returned Series has five elements due to the three duplicates. - - >>> s.nlargest(3, keep="all") - France 65000000 - Italy 59000000 - Malta 434000 - Maldives 434000 - Brunei 434000 - dtype: int64 - """ f = partial(Series.nlargest, n=n, keep=keep) data = self._obj_with_exclusions # Don't change behavior if result index happens to be the same, i.e. @@ -2093,110 +1530,10 @@ def nlargest( result = self._python_apply_general(f, data, not_indexed_same=True) return result + @doc(Series.nsmallest.__doc__) def nsmallest( self, n: int = 5, keep: Literal["first", "last", "all"] = "first" ) -> Series: - """ - Return the smallest `n` elements. - - Parameters - ---------- - n : int, default 5 - Return this many ascending sorted values. - keep : {'first', 'last', 'all'}, default 'first' - When there are duplicate values that cannot all fit in a - Series of `n` elements: - - - ``first`` : return the first `n` occurrences in order - of appearance. - - ``last`` : return the last `n` occurrences in reverse - order of appearance. - - ``all`` : keep all occurrences. This can result in a Series of - size larger than `n`. - - Returns - ------- - Series - The `n` smallest values in the Series, sorted in increasing order. - - See Also - -------- - Series.nlargest: Get the `n` largest elements. - Series.sort_values: Sort Series by values. - Series.head: Return the first `n` rows. - - Notes - ----- - Faster than ``.sort_values().head(n)`` for small `n` relative to - the size of the ``Series`` object. - - Examples - -------- - >>> countries_population = { - ... "Italy": 59000000, - ... "France": 65000000, - ... "Brunei": 434000, - ... "Malta": 434000, - ... "Maldives": 434000, - ... "Iceland": 337000, - ... "Nauru": 11300, - ... "Tuvalu": 11300, - ... "Anguilla": 11300, - ... "Montserrat": 5200, - ... } - >>> s = pd.Series(countries_population) - >>> s - Italy 59000000 - France 65000000 - Brunei 434000 - Malta 434000 - Maldives 434000 - Iceland 337000 - Nauru 11300 - Tuvalu 11300 - Anguilla 11300 - Montserrat 5200 - dtype: int64 - - The `n` smallest elements where ``n=5`` by default. - - >>> s.nsmallest() - Montserrat 5200 - Nauru 11300 - Tuvalu 11300 - Anguilla 11300 - Iceland 337000 - dtype: int64 - - The `n` smallest elements where ``n=3``. Default `keep` value is - 'first' so Nauru and Tuvalu will be kept. - - >>> s.nsmallest(3) - Montserrat 5200 - Nauru 11300 - Tuvalu 11300 - dtype: int64 - - The `n` smallest elements where ``n=3`` and keeping the last - duplicates. Anguilla and Tuvalu will be kept since they are the last - with value 11300 based on the index order. - - >>> s.nsmallest(3, keep="last") - Montserrat 5200 - Anguilla 11300 - Tuvalu 11300 - dtype: int64 - - The `n` smallest elements where ``n=3`` with all duplicates kept. Note - that the returned Series has four elements due to the three duplicates. - - >>> s.nsmallest(3, keep="all") - Montserrat 5200 - Nauru 11300 - Tuvalu 11300 - Anguilla 11300 - dtype: int64 - """ f = partial(Series.nsmallest, n=n, keep=keep) data = self._obj_with_exclusions # Don't change behavior if result index happens to be the same, i.e. @@ -3217,142 +2554,149 @@ def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs): def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): """ - Call ``func`` on self producing a DataFrame with the same axis shape as self. + Call function producing a same-indexed DataFrame on each group. + + Returns a DataFrame having the same indexes as the original object + filled with the transformed values. Parameters ---------- - func : function, str, list-like or dict-like - Function to use for transforming the data. If a function, must either - work when passed a DataFrame or when passed to DataFrame.apply. If func - is both list-like and dict-like, dict-like behavior takes precedence. + func : function, str + Function to apply to each group. + See the Notes section below for requirements. - Accepted combinations are: + Accepted inputs are: - - function - - string function name - - list-like of functions and/or function names, e.g. ``[np.exp, 'sqrt']`` - - dict-like of axis labels -> functions, - function names or list-like of such. - axis : {0 or 'index', 1 or 'columns'}, default 0 - If 0 or 'index': apply function to each column. - If 1 or 'columns': apply function to each row. + - String + - Python function + - Numba JIT function with ``engine='numba'`` specified. + + Only passing a single function is supported with this engine. + If the ``'numba'`` engine is chosen, the function must be + a user defined function with ``values`` and ``index`` as the + first and second arguments respectively in the function signature. + Each group's index will be passed to the user defined function + and optionally available for use. + + If a string is chosen, then it needs to be the name + of the groupby method you want to use. *args - Positional arguments to pass to `func`. + Positional arguments to pass to func. + engine : str, default None + * ``'cython'`` : Runs the function through C-extensions from cython. + * ``'numba'`` : Runs the function through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` + or the global setting ``compute.use_numba`` + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be + applied to the function + **kwargs - Keyword arguments to pass to `func`. + Keyword arguments to be passed into func. Returns ------- DataFrame - A DataFrame that must have the same length as self. - - Raises - ------ - ValueError : If the returned DataFrame has a different length than self. + DataFrame with the same indexes as the original object filled + with transformed values. See Also -------- - DataFrame.agg : Only perform aggregating type operations. - DataFrame.apply : Invoke function on a DataFrame. + DataFrame.groupby.apply : Apply function ``func`` group-wise and combine + the results together. + DataFrame.groupby.aggregate : Aggregate using one or more operations. + DataFrame.transform : Call ``func`` on self producing a DataFrame with the + same axis shape as self. Notes ----- - Functions that mutate the passed object can produce unexpected - behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` - for more details. + Each group is endowed the attribute 'name' in case you need to know + which group you are working on. + + The current implementation imposes three requirements on f: + + * f must return a value that either has the same shape as the input + subframe or can be broadcast to the shape of the input subframe. + For example, if `f` returns a scalar it will be broadcast to have the + same shape as the input subframe. + * if this is a DataFrame, f must support application column-by-column + in the subframe. If f also supports application to the entire subframe, + then a fast path is used starting from the second chunk. + * f must not mutate groups. Mutation is not supported and may + produce unexpected results. See :ref:`gotchas.udf-mutation` for more details. + + When using ``engine='numba'``, there will be no "fall back" behavior internally. + The group data and group index will be passed as numpy arrays to the JITed + user defined function, and no alternative execution attempts will be tried. + + The resulting dtype will reflect the return value of the passed ``func``, + see the examples below. + + .. versionchanged:: 2.0.0 + + When using ``.transform`` on a grouped DataFrame + and the transformation function returns a DataFrame, + pandas now aligns the result's index with the input's index. + You can call ``.to_numpy()`` on the result of the + transformation function to avoid alignment. Examples -------- - >>> df = pd.DataFrame({"A": range(3), "B": range(1, 4)}) - >>> df - A B - 0 0 1 - 1 1 2 - 2 2 3 - >>> df.transform(lambda x: x + 1) - A B - 0 1 2 - 1 2 3 - 2 3 4 - - Even though the resulting DataFrame must have the same length as the - input DataFrame, it is possible to provide several input functions: - - >>> s = pd.Series(range(3)) - >>> s - 0 0 - 1 1 - 2 2 - dtype: int64 - >>> s.transform([np.sqrt, np.exp]) - sqrt exp - 0 0.000000 1.000000 - 1 1.000000 2.718282 - 2 1.414214 7.389056 - - You can call transform on a GroupBy object: >>> df = pd.DataFrame( ... { - ... "Date": [ - ... "2015-05-08", - ... "2015-05-07", - ... "2015-05-06", - ... "2015-05-05", - ... "2015-05-08", - ... "2015-05-07", - ... "2015-05-06", - ... "2015-05-05", - ... ], - ... "Data": [5, 8, 6, 1, 50, 100, 60, 120], + ... "A": ["foo", "bar", "foo", "bar", "foo", "bar"], + ... "B": ["one", "one", "two", "three", "two", "two"], + ... "C": [1, 5, 5, 2, 5, 5], + ... "D": [2.0, 5.0, 8.0, 1.0, 2.0, 9.0], ... } ... ) - >>> df - Date Data - 0 2015-05-08 5 - 1 2015-05-07 8 - 2 2015-05-06 6 - 3 2015-05-05 1 - 4 2015-05-08 50 - 5 2015-05-07 100 - 6 2015-05-06 60 - 7 2015-05-05 120 - >>> df.groupby("Date")["Data"].transform("sum") - 0 55 - 1 108 - 2 66 - 3 121 - 4 55 - 5 108 - 6 66 - 7 121 - Name: Data, dtype: int64 + >>> grouped = df.groupby("A")[["C", "D"]] + >>> grouped.transform(lambda x: (x - x.mean()) / x.std()) + C D + 0 -1.154701 -0.577350 + 1 0.577350 0.000000 + 2 0.577350 1.154701 + 3 -1.154701 -1.000000 + 4 0.577350 -0.577350 + 5 0.577350 1.000000 + + Broadcast result of the transformation + + >>> grouped.transform(lambda x: x.max() - x.min()) + C D + 0 4.0 6.0 + 1 3.0 8.0 + 2 4.0 6.0 + 3 3.0 8.0 + 4 4.0 6.0 + 5 3.0 8.0 + + >>> grouped.transform("mean") + C D + 0 3.666667 4.0 + 1 4.000000 5.0 + 2 3.666667 4.0 + 3 4.000000 5.0 + 4 3.666667 4.0 + 5 4.000000 5.0 - >>> df = pd.DataFrame( - ... { - ... "c": [1, 1, 1, 2, 2, 2, 2], - ... "type": ["m", "n", "o", "m", "m", "n", "n"], - ... } - ... ) - >>> df - c type - 0 1 m - 1 1 n - 2 1 o - 3 2 m - 4 2 m - 5 2 n - 6 2 n - >>> df["size"] = df.groupby("c")["type"].transform(len) - >>> df - c type size - 0 1 m 3 - 1 1 n 3 - 2 1 o 3 - 3 2 m 4 - 4 2 m 4 - 5 2 n 4 - 6 2 n 4 + The resulting dtype will reflect the return value of the passed ``func``, + for example: + + >>> grouped.transform(lambda x: x.astype(int).max()) + C D + 0 5 8 + 1 5 9 + 2 5 8 + 3 5 9 + 4 5 8 + 5 5 9 """ return self._transform( func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs @@ -4184,417 +3528,30 @@ def kurt( ) @property + @doc(DataFrame.plot.__doc__) def plot(self) -> GroupByPlot: - """ - Make plots of Series or DataFrame. - - Uses the backend specified by the - option ``plotting.backend``. By default, matplotlib is used. - - Parameters - ---------- - data : Series or DataFrame - The object for which the method is called. - - Attributes - ---------- - x : label or position, default None - Only used if data is a DataFrame. - y : label, position or list of label, positions, default None - Allows plotting of one column versus another. Only used if data is a - DataFrame. - kind : str - The kind of plot to produce: - - - 'line' : line plot (default) - - 'bar' : vertical bar plot - - 'barh' : horizontal bar plot - - 'hist' : histogram - - 'box' : boxplot - - 'kde' : Kernel Density Estimation plot - - 'density' : same as 'kde' - - 'area' : area plot - - 'pie' : pie plot - - 'scatter' : scatter plot (DataFrame only) - - 'hexbin' : hexbin plot (DataFrame only) - ax : matplotlib axes object, default None - An axes of the current figure. - subplots : bool or sequence of iterables, default False - Whether to group columns into subplots: - - - ``False`` : No subplots will be used - - ``True`` : Make separate subplots for each column. - - sequence of iterables of column labels: Create a subplot for each - group of columns. For example `[('a', 'c'), ('b', 'd')]` will - create 2 subplots: one with columns 'a' and 'c', and one - with columns 'b' and 'd'. Remaining columns that aren't specified - will be plotted in additional subplots (one per column). - - sharex : bool, default True if ax is None else False - In case ``subplots=True``, share x axis and set some x axis labels - to invisible; defaults to True if ax is None otherwise False if - an ax is passed in; Be aware, that passing in both an ax and - ``sharex=True`` will alter all x axis labels for all axis in a figure. - sharey : bool, default False - In case ``subplots=True``, - share y axis and set some y axis labels to invisible. - layout : tuple, optional - (rows, columns) for the layout of subplots. - figsize : a tuple (width, height) in inches - Size of a figure object. - use_index : bool, default True - Use index as ticks for x axis. - title : str or list - Title to use for the plot. If a string is passed, print the string - at the top of the figure. If a list is passed and `subplots` is - True, print each item in the list above the corresponding subplot. - grid : bool, default None (matlab style default) - Axis grid lines. - legend : bool or {'reverse'} - Place legend on axis subplots. - style : list or dict - The matplotlib line style per column. - logx : bool or 'sym', default False - Use log scaling or symlog scaling on x axis. - - logy : bool or 'sym' default False - Use log scaling or symlog scaling on y axis. - - loglog : bool or 'sym', default False - Use log scaling or symlog scaling on both x and y axes. - - xticks : sequence - Values to use for the xticks. - yticks : sequence - Values to use for the yticks. - xlim : 2-tuple/list - Set the x limits of the current axes. - ylim : 2-tuple/list - Set the y limits of the current axes. - xlabel : label, optional - Name to use for the xlabel on x-axis. - Default uses index name as xlabel, or the - x-column name for planar plots. - - .. versionchanged:: 2.0.0 - - Now applicable to histograms. - - ylabel : label, optional - Name to use for the ylabel on y-axis. Default will show no ylabel, or the - y-column name for planar plots. - - .. versionchanged:: 2.0.0 - - Now applicable to histograms. - - rot : float, default None - Rotation for ticks (xticks for vertical, yticks for horizontal - plots). - fontsize : float, default None - Font size for xticks and yticks. - colormap : str or matplotlib colormap object, default None - Colormap to select colors from. If string, load colormap with that - name from matplotlib. - colorbar : bool, optional - If True, plot colorbar (only relevant for 'scatter' and 'hexbin' - plots). - position : float - Specify relative alignments for bar plot layout. - From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 - (center). - table : bool, Series or DataFrame, default False - If True, draw a table using the data in the DataFrame and the data - will be transposed to meet matplotlib's default layout. - If a Series or DataFrame is passed, use passed data to draw a - table. - yerr : DataFrame, Series, array-like, dict and str - See :ref:`Plotting with Error Bars ` for - detail. - xerr : DataFrame, Series, array-like, dict and str - Equivalent to yerr. - stacked : bool, default False in line and bar plots, and True in area plot - If True, create stacked plot. - secondary_y : bool or sequence, default False - Whether to plot on the secondary y-axis if a list/tuple, which - columns to plot on secondary y-axis. - mark_right : bool, default True - When using a secondary_y axis, automatically mark the column - labels with "(right)" in the legend. - include_bool : bool, default is False - If True, boolean values can be plotted. - backend : str, default None - Backend to use instead of the backend specified in the option - ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to - specify the ``plotting.backend`` for the whole session, set - ``pd.options.plotting.backend``. - **kwargs - Options to pass to matplotlib plotting method. - - Returns - ------- - :class:`matplotlib.axes.Axes` or numpy.ndarray of them - If the backend is not the default matplotlib one, the return value - will be the object returned by the backend. - - See Also - -------- - matplotlib.pyplot.plot : Plot y versus x as lines and/or markers. - DataFrame.hist : Make a histogram. - DataFrame.boxplot : Make a box plot. - DataFrame.plot.scatter : Make a scatter plot with varying marker - point size and color. - DataFrame.plot.hexbin : Make a hexagonal binning plot of - two variables. - DataFrame.plot.kde : Make Kernel Density Estimate plot using - Gaussian kernels. - DataFrame.plot.area : Make a stacked area plot. - DataFrame.plot.bar : Make a bar plot. - DataFrame.plot.barh : Make a horizontal bar plot. - - Notes - ----- - - See matplotlib documentation online for more on this subject - - If `kind` = 'bar' or 'barh', you can specify relative alignments - for bar plot layout by `position` keyword. - From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 - (center) - - Examples - -------- - For Series: - - .. plot:: - :context: close-figs - - >>> ser = pd.Series([1, 2, 3, 3]) - >>> plot = ser.plot(kind="hist", title="My plot") - - For DataFrame: - - .. plot:: - :context: close-figs - - >>> df = pd.DataFrame( - ... { - ... "length": [1.5, 0.5, 1.2, 0.9, 3], - ... "width": [0.7, 0.2, 0.15, 0.2, 1.1], - ... }, - ... index=["pig", "rabbit", "duck", "chicken", "horse"], - ... ) - >>> plot = df.plot(title="DataFrame Plot") - - For SeriesGroupBy: - - .. plot:: - :context: close-figs - - >>> lst = [-1, -2, -3, 1, 2, 3] - >>> ser = pd.Series([1, 2, 2, 4, 6, 6], index=lst) - >>> plot = ser.groupby(lambda x: x > 0).plot(title="SeriesGroupBy Plot") - - For DataFrameGroupBy: - - .. plot:: - :context: close-figs - - >>> df = pd.DataFrame({"col1": [1, 2, 3, 4], "col2": ["A", "B", "A", "B"]}) - >>> plot = df.groupby("col2").plot( - ... kind="bar", title="DataFrameGroupBy Plot" - ... ) - """ result = GroupByPlot(self) return result + @doc(DataFrame.corr.__doc__) def corr( self, method: str | Callable[[np.ndarray, np.ndarray], float] = "pearson", min_periods: int = 1, numeric_only: bool = False, ) -> DataFrame: - """ - Compute pairwise correlation of columns, excluding NA/null values. - - Parameters - ---------- - method : {'pearson', 'kendall', 'spearman'} or callable - Method of correlation: - - * pearson : standard correlation coefficient - * kendall : Kendall Tau correlation coefficient - * spearman : Spearman rank correlation - * callable: callable with input two 1d ndarrays - and returning a float. Note that the returned matrix from corr - will have 1 along the diagonals and will be symmetric - regardless of the callable's behavior. - min_periods : int, optional - Minimum number of observations required per pair of columns - to have a valid result. Currently only available for Pearson - and Spearman correlation. - numeric_only : bool, default False - Include only `float`, `int` or `boolean` data. - - .. versionchanged:: 2.0.0 - The default value of ``numeric_only`` is now ``False``. - - Returns - ------- - DataFrame - Correlation matrix. - - See Also - -------- - DataFrame.corrwith : Compute pairwise correlation with another - DataFrame or Series. - Series.corr : Compute the correlation between two Series. - - Notes - ----- - Pearson, Kendall and Spearman correlation are currently - computed using pairwise complete observations. - - * `Pearson correlation coefficient `_ - * `Kendall rank correlation coefficient `_ - * `Spearman's rank correlation coefficient `_ - - Examples - -------- - >>> def histogram_intersection(a, b): - ... v = np.minimum(a, b).sum().round(decimals=1) - ... return v - >>> df = pd.DataFrame( - ... [(0.2, 0.3), (0.0, 0.6), (0.6, 0.0), (0.2, 0.1)], - ... columns=["dogs", "cats"], - ... ) - >>> df.corr(method=histogram_intersection) - dogs cats - dogs 1.0 0.3 - cats 0.3 1.0 - - >>> df = pd.DataFrame( - ... [(1, 1), (2, np.nan), (np.nan, 3), (4, 4)], columns=["dogs", "cats"] - ... ) - >>> df.corr(min_periods=3) - dogs cats - dogs 1.0 NaN - cats NaN 1.0 - """ result = self._op_via_apply( "corr", method=method, min_periods=min_periods, numeric_only=numeric_only ) return result + @doc(DataFrame.cov.__doc__) def cov( self, min_periods: int | None = None, ddof: int | None = 1, numeric_only: bool = False, ) -> DataFrame: - """ - Compute pairwise covariance of columns, excluding NA/null values. - - Compute the pairwise covariance among the series of a DataFrame. - The returned data frame is the `covariance matrix - `__ of the columns - of the DataFrame. - - Both NA and null values are automatically excluded from the - calculation. (See the note below about bias from missing values.) - A threshold can be set for the minimum number of - observations for each value created. Comparisons with observations - below this threshold will be returned as ``NaN``. - - This method is generally used for the analysis of time series data to - understand the relationship between different measures - across time. - - Parameters - ---------- - min_periods : int, optional - Minimum number of observations required per pair of columns - to have a valid result. - - ddof : int, default 1 - Delta degrees of freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` represents the number of elements. - This argument is applicable only when no ``nan`` is in the dataframe. - - numeric_only : bool, default False - Include only `float`, `int` or `boolean` data. - - .. versionchanged:: 2.0.0 - The default value of ``numeric_only`` is now ``False``. - - Returns - ------- - DataFrame - The covariance matrix of the series of the DataFrame. - - See Also - -------- - Series.cov : Compute covariance with another Series. - core.window.ewm.ExponentialMovingWindow.cov : Exponential weighted sample - covariance. - core.window.expanding.Expanding.cov : Expanding sample covariance. - core.window.rolling.Rolling.cov : Rolling sample covariance. - - Notes - ----- - Returns the covariance matrix of the DataFrame's time series. - The covariance is normalized by N-ddof. - - For DataFrames that have Series that are missing data (assuming that - data is `missing at random - `__) - the returned covariance matrix will be an unbiased estimate - of the variance and covariance between the member Series. - - However, for many applications this estimate may not be acceptable - because the estimate covariance matrix is not guaranteed to be positive - semi-definite. This could lead to estimate correlations having - absolute values which are greater than one, and/or a non-invertible - covariance matrix. See `Estimation of covariance matrices - `__ for more details. - - Examples - -------- - >>> df = pd.DataFrame( - ... [(1, 2), (0, 3), (2, 0), (1, 1)], columns=["dogs", "cats"] - ... ) - >>> df.cov() - dogs cats - dogs 0.666667 -1.000000 - cats -1.000000 1.666667 - - >>> np.random.seed(42) - >>> df = pd.DataFrame( - ... np.random.randn(1000, 5), columns=["a", "b", "c", "d", "e"] - ... ) - >>> df.cov() - a b c d e - a 0.998438 -0.020161 0.059277 -0.008943 0.014144 - b -0.020161 1.059352 -0.008543 -0.024738 0.009826 - c 0.059277 -0.008543 1.010670 -0.001486 -0.000271 - d -0.008943 -0.024738 -0.001486 0.921297 -0.013692 - e 0.014144 0.009826 -0.000271 -0.013692 0.977795 - - **Minimum number of periods** - - This method also supports an optional ``min_periods`` keyword - that specifies the required minimum number of non-NA observations for - each column pair in order to have a valid result: - - >>> np.random.seed(42) - >>> df = pd.DataFrame(np.random.randn(20, 3), columns=["a", "b", "c"]) - >>> df.loc[df.index[:5], "a"] = np.nan - >>> df.loc[df.index[5:10], "b"] = np.nan - >>> df.cov(min_periods=12) - a b c - a 0.316741 NaN -0.150812 - b NaN 1.248003 0.191417 - c -0.150812 0.191417 0.895202 - """ result = self._op_via_apply( "cov", min_periods=min_periods, ddof=ddof, numeric_only=numeric_only ) From b6cde6319d1bb468fd1ca28e7c9cb1e0a2667218 Mon Sep 17 00:00:00 2001 From: zhangbowen-coder <2439796518@qq.com> Date: Sun, 14 Dec 2025 23:01:22 +0800 Subject: [PATCH 07/10] Changes in frame.py is commited by mistake, now it is recovered to origin. --- pandas/core/frame.py | 6885 +++--------------------------------------- 1 file changed, 411 insertions(+), 6474 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8c52de1a5bf51..9d6af3c7b9917 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -66,6 +66,8 @@ _chained_assignment_msg, ) from pandas.util._decorators import ( + Appender, + Substitution, deprecate_nonkeyword_arguments, doc, set_module, @@ -151,6 +153,7 @@ ) from pandas.core.generic import ( NDFrame, + make_doc, ) from pandas.core.indexers import check_key_length from pandas.core.indexes.api import ( @@ -197,7 +200,9 @@ format as fmt, ) from pandas.io.formats.info import ( + INFO_DOCSTRING, DataFrameInfo, + frame_sub_kwargs, ) import pandas.plotting @@ -1297,6 +1302,17 @@ def to_string( encoding: str | None = ..., ) -> None: ... + @Substitution( + header_type="bool or list of str", + header="Write out the column names. If a list of columns " + "is given, it is assumed to be aliases for the " + "column names", + col_space_type="int, list or dict of int", + col_space="The minimum width of each column. If a list of ints is given " + "every integers corresponds with one column. If a dict is given, the key " + "references the column, while the value defines the space to use.", + ) + @Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring) def to_string( self, buf: FilePath | WriteBuffer[str] | None = None, @@ -1322,65 +1338,7 @@ def to_string( ) -> str | None: """ Render a DataFrame to a console-friendly tabular output. - - Parameters - ---------- - buf : str, Path or StringIO-like, optional, default None - Buffer to write to. If None, the output is returned as a string. - columns : array-like, optional, default None - The subset of columns to write. Writes all columns by default. - col_space : int, list or dict of int, optional - The minimum width of each column. - If a list of ints is given every integers corresponds with one column. - If a dict is given, the key references the column, - while the value defines the space to use. - header : bool or list of str, optional - Write out the column names. If a list of columns is given, - it is assumed to be aliases for the column names. - index : bool, optional, default True - Whether to print index (row) labels. - na_rep : str, optional, default 'NaN' - String representation of ``NaN`` to use. - formatters : list, tuple or dict of one-param. functions, optional - Formatter functions to apply to columns' elements by position or - name. - The result of each function must be a unicode string. - List/tuple must be of length equal to the number of columns. - float_format : one-parameter function, optional, default None - Formatter function to apply to columns' elements if they are - floats. This function must return a unicode string and will be - applied only to the non-``NaN`` elements, with ``NaN`` being - handled by ``na_rep``. - sparsify : bool, optional, default True - Set to False for a DataFrame with a hierarchical index to print - every multiindex key at each row. - index_names : bool, optional, default True - Prints the names of the indexes. - justify : str, default None - How to justify the column labels. If None uses the option from - the print configuration (controlled by set_option), 'right' out - of the box. Valid values are - - * left - * right - * center - * justify - * justify-all - * start - * end - * inherit - * match-parent - * initial - * unset. - max_rows : int, optional - Maximum number of rows to display in the console. - max_cols : int, optional - Maximum number of columns to display in the console. - show_dimensions : bool, default False - Display DataFrame dimensions (number of rows by number of columns). - decimal : str, default '.' - Character recognized as decimal separator, e.g. ',' in Europe. - + %(shared_params)s line_width : int, optional Width to wrap a line in characters. min_rows : int, optional @@ -1390,13 +1348,7 @@ def to_string( Max width to truncate each column in characters. By default, no limit. encoding : str, default "utf-8" Set character encoding. - - Returns - ------- - str or None - If buf is None, returns the result as a string. Otherwise returns - None. - + %(returns)s See Also -------- to_html : Convert DataFrame to HTML. @@ -1406,7 +1358,7 @@ def to_string( >>> d = {"col1": [1, 2, 3], "col2": [4, 5, 6]} >>> df = pd.DataFrame(d) >>> print(df.to_string()) - col1 col2 + col1 col2 0 1 4 1 2 5 2 3 6 @@ -2703,6 +2655,10 @@ def _from_arrays( ) return cls._from_mgr(mgr, axes=mgr.axes) + @doc( + storage_options=_shared_docs["storage_options"], + compression_options=_shared_docs["compression_options"] % "path", + ) def to_stata( self, path: FilePath | WriteBuffer[bytes], @@ -2750,7 +2706,7 @@ def to_stata( variable_labels : dict Dictionary containing columns as keys and variable labels as values. Each label must be 80 characters or smaller. - version : {114, 117, 118, 119, None}, default 114 + version : {{114, 117, 118, 119, None}}, default 114 Version to use in the output dta file. Set to None to let pandas decide between 118 or 119 formats depending on the number of columns in the frame. Version 114 can be read by Stata 10 and @@ -2772,34 +2728,9 @@ def to_stata( format. Only available if version is 117. Storing strings in the StrL format can produce smaller dta files if strings have more than 8 characters and values are repeated. - compression : str or dict, default 'infer' - For on-the-fly compression of the output data. If 'infer' and 'path' is - path-like, then detect compression from the following extensions: '.gz', - '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' - (otherwise no compression). - Set to ``None`` for no compression. - Can also be a dict with key ``'method'`` set - to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, - ``'xz'``, ``'tar'``} and other key-value pairs are forwarded to - ``zipfile.ZipFile``, ``gzip.GzipFile``, - ``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or - ``tarfile.TarFile``, respectively. - As an example, the following could be passed for faster compression - and to create a reproducible gzip archive: - ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. - - .. versionadded:: 1.5.0 - Added support for `.tar` files. + {compression_options} - storage_options : dict, optional - Extra options that make sense for a particular storage connection, e.g. - host, port, username, password, etc. For HTTP(S) URLs the key-value pairs - are forwarded to ``urllib.request.Request`` as header options. For other - URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are - forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more - details, and for more examples on storage options refer `here - `_. + {storage_options} value_labels : dict of dicts Dictionary containing columns as keys and dictionaries of column value @@ -3060,6 +2991,7 @@ def to_parquet( **kwargs, ) -> None: ... + @doc(storage_options=_shared_docs["storage_options"]) def to_parquet( self, path: FilePath | WriteBuffer[bytes] | None = None, @@ -3087,7 +3019,7 @@ def to_parquet( object implementing a binary ``write()`` function. If None, the result is returned as bytes. If a string or path, it will be used as Root Directory path when writing a partitioned dataset. - engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' + engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto' Parquet library to use. If 'auto', then the option ``io.parquet.engine`` is used. The default ``io.parquet.engine`` behavior is to try 'pyarrow', falling back to 'fastparquet' if @@ -3107,15 +3039,7 @@ def to_parquet( Column names by which to partition the dataset. Columns are partitioned in the order they are given. Must be None if path is not a string. - storage_options : dict, optional - Extra options that make sense for a particular storage connection, e.g. - host, port, username, password, etc. For HTTP(S) URLs the key-value pairs - are forwarded to ``urllib.request.Request`` as header options. For other - URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are - forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more - details, and for more examples on storage options refer `here - `_. + {storage_options} filesystem : fsspec or pyarrow filesystem, default None Filesystem object to use when reading the parquet file. Only implemented @@ -3157,7 +3081,7 @@ def to_parquet( Examples -------- - >>> df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + >>> df = pd.DataFrame(data={{"col1": [1, 2], "col2": [3, 4]}}) >>> df.to_parquet("df.parquet.gzip", compression="gzip") # doctest: +SKIP >>> pd.read_parquet("df.parquet.gzip") # doctest: +SKIP col1 col2 @@ -3365,6 +3289,14 @@ def to_html( encoding: str | None = ..., ) -> str: ... + @Substitution( + header_type="bool", + header="Whether to print column labels, default True", + col_space_type="str or int, list or dict of int or str", + col_space="The minimum width of each column in CSS length " + "units. An int is assumed to be px units.", + ) + @Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring) def to_html( self, buf: FilePath | WriteBuffer[str] | None = None, @@ -3394,62 +3326,7 @@ def to_html( ) -> str | None: """ Render a DataFrame as an HTML table. - - Parameters - ---------- - buf : str, Path or StringIO-like, optional, default None - Buffer to write to. If None, the output is returned as a string. - columns : array-like, optional, default None - The subset of columns to write. Writes all columns by default. - col_space : str or int, list or dict of int or str, optional - The minimum width of each column in CSS length units. - An int is assumed to be px units. - header : bool, optional - Whether to print column labels, default True. - index : bool, optional, default True - Whether to print index (row) labels. - na_rep : str, optional, default 'NaN' - String representation of ``NaN`` to use. - formatters : list, tuple or dict of one-param. functions, optional - Formatter functions to apply to columns' elements by position or - name. - The result of each function must be a unicode string. - List/tuple must be of length equal to the number of columns. - float_format : one-parameter function, optional, default None - Formatter function to apply to columns' elements if they are - floats. This function must return a unicode string and will be - applied only to the non-``NaN`` elements, with ``NaN`` being - handled by ``na_rep``. - sparsify : bool, optional, default True - Set to False for a DataFrame with a hierarchical index to print - every multiindex key at each row. - index_names : bool, optional, default True - Prints the names of the indexes. - justify : str, default None - How to justify the column labels. If None uses the option from - the print configuration (controlled by set_option), 'right' out - of the box. Valid values are - - * left - * right - * center - * justify - * justify-all - * start - * end - * inherit - * match-parent - * initial - * unset. - max_rows : int, optional - Maximum number of rows to display in the console. - max_cols : int, optional - Maximum number of columns to display in the console. - show_dimensions : bool, default False - Display DataFrame dimensions (number of rows by number of columns). - decimal : str, default '.' - Character recognized as decimal separator, e.g. ',' in Europe. - + %(shared_params)s bold_rows : bool, default True Make the row labels bold in the output. classes : str or list or tuple, default None @@ -3471,85 +3348,79 @@ def to_html( Convert URLs to HTML links. encoding : str, default "utf-8" Set character encoding. + %(returns)s + See Also + -------- + to_string : Convert DataFrame to a string. - Returns - ------- - str or None - If buf is None, returns the result as a string. Otherwise returns - None. + Examples + -------- + >>> df = pd.DataFrame(data={"col1": [1, 2], "col2": [4, 3]}) + >>> html_string = df.to_html() + >>> print(html_string) + + + + + + + + + + + + + + + + + + + + +
col1col2
014
123
+ + HTML output + + +----+-----+-----+ + | |col1 |col2 | + +====+=====+=====+ + |0 |1 |4 | + +----+-----+-----+ + |1 |2 |3 | + +----+-----+-----+ - See Also - -------- - to_string : Convert DataFrame to a string. - - Examples - -------- - >>> df = pd.DataFrame(data={"col1": [1, 2], "col2": [4, 3]}) - >>> html_string = df.to_html() - >>> print(html_string) - - - - - - - - - - - - - - - - - - - - -
col1col2
014
123
- - HTML output - - +----+-----+-----+ - | |col1 |col2 | - +====+=====+=====+ - |0 |1 |4 | - +----+-----+-----+ - |1 |2 |3 | - +----+-----+-----+ - - >>> df = pd.DataFrame(data={"col1": [1, 2], "col2": [4, 3]}) - >>> html_string = df.to_html(index=False) - >>> print(html_string) - - - - - - - - - - - - - - - - - -
col1col2
14
23
- - HTML output - - +-----+-----+ - |col1 |col2 | - +=====+=====+ - |1 |4 | - +-----+-----+ - |2 |3 | - +-----+-----+ + >>> df = pd.DataFrame(data={"col1": [1, 2], "col2": [4, 3]}) + >>> html_string = df.to_html(index=False) + >>> print(html_string) + + + + + + + + + + + + + + + + + +
col1col2
14
23
+ + HTML output + + +-----+-----+ + |col1 |col2 | + +=====+=====+ + |1 |4 | + +-----+-----+ + |2 |3 | + +-----+-----+ """ if justify is not None and justify not in fmt.VALID_JUSTIFY_PARAMETERS: raise ValueError("Invalid value for justify parameter") @@ -3628,6 +3499,10 @@ def to_xml( storage_options: StorageOptions | None = ..., ) -> None: ... + @doc( + storage_options=_shared_docs["storage_options"], + compression_options=_shared_docs["compression_options"] % "path_or_buffer", + ) def to_xml( self, path_or_buffer: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, @@ -3680,7 +3555,7 @@ def to_xml( Default namespaces should be given empty string key. For example, :: - namespaces = {"": "https://example.com"} + namespaces = {{"": "https://example.com"}} prefix : str, optional Namespace prefix to be used for every element and/or attribute @@ -3693,7 +3568,7 @@ def to_xml( pretty_print : bool, default True Whether output should be pretty printed with indentation and line breaks. - parser : {'lxml','etree'}, default 'lxml' + parser : {{'lxml','etree'}}, default 'lxml' Parser module to use for building of tree. Only 'lxml' and 'etree' are supported. With 'lxml', the ability to use XSLT stylesheet is supported. @@ -3703,35 +3578,9 @@ def to_xml( layout of elements and attributes from original output. This argument requires ``lxml`` to be installed. Only XSLT 1.0 scripts and not later versions is currently supported. - compression : str or dict, default 'infer' - For on-the-fly compression of the output data. If 'infer' - and 'path_or_buffer' is path-like, - then detect compression from the following extensions: '.gz', - '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' - (otherwise no compression). - Set to ``None`` for no compression. - Can also be a dict with key ``'method'`` set - to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, - ``'tar'``} and other key-value pairs are forwarded to - ``zipfile.ZipFile``, ``gzip.GzipFile``, - ``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or - ``tarfile.TarFile``, respectively. - As an example, the following could be passed for faster - compression and to create a reproducible gzip archive: - ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. - - .. versionadded:: 1.5.0 - Added support for `.tar` files. + {compression_options} - storage_options : dict, optional - Extra options that make sense for a particular storage connection, e.g. - host, port, username, password, etc. For HTTP(S) URLs the key-value pairs - are forwarded to ``urllib.request.Request`` as header options. For other - URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are - forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more - details, and for more examples on storage options refer `here - `_. + {storage_options} Returns ------- @@ -3785,7 +3634,7 @@ def to_xml( >>> df.to_xml( - ... namespaces={"doc": "https://example.com"}, prefix="doc" + ... namespaces={{"doc": "https://example.com"}}, prefix="doc" ... ) # doctest: +SKIP @@ -3809,6 +3658,7 @@ def to_xml( """ + from pandas.io.formats.xml import ( EtreeXMLFormatter, LxmlXMLFormatter, @@ -3910,6 +3760,7 @@ def to_iceberg( ) # ---------------------------------------------------------------------- + @doc(INFO_DOCSTRING, **frame_sub_kwargs) def info( self, verbose: bool | None = None, @@ -3918,149 +3769,6 @@ def info( memory_usage: bool | str | None = None, show_counts: bool | None = None, ) -> None: - """ - Print a concise summary of a DataFrame. - - This method prints information about a DataFrame including - the index dtype and columns, non-NA values and memory usage. - - Parameters - ---------- - verbose : bool, optional - Whether to print the full summary. By default, the setting in - ``pandas.options.display.max_info_columns`` is followed. - buf : writable buffer, defaults to sys.stdout - Where to send the output. By default, the output is printed to - sys.stdout. Pass a writable buffer if you need to further process - the output. - max_cols : int, optional - When to switch from the verbose to the truncated output. If the - DataFrame has more than `max_cols` columns, the truncated output - is used. By default, the setting in - ``pandas.options.display.max_info_columns`` is used. - memory_usage : bool, str, optional - Specifies whether total memory usage of the DataFrame - elements (including the index) should be displayed. By default, - this follows the ``pandas.options.display.memory_usage`` setting. - - True always show memory usage. False never shows memory usage. - A value of 'deep' is equivalent to "True with deep introspection". - Memory usage is shown in human-readable units (base-2 - representation). Without deep introspection a memory estimation is - made based in column dtype and number of rows assuming values - consume the same memory amount for corresponding dtypes. With deep - memory introspection, a real memory usage calculation is performed - at the cost of computational resources. See the - :ref:`Frequently Asked Questions ` for more - details. - show_counts : bool, optional - Whether to show the non-null counts. By default, this is shown - only if the DataFrame is smaller than - ``pandas.options.display.max_info_rows`` and - ``pandas.options.display.max_info_columns``. A value of True always - shows the counts, and False never shows the counts. - - Returns - ------- - None - This method prints a summary of a DataFrame and returns None. - - See Also - -------- - DataFrame.describe: Generate descriptive statistics of DataFrame - columns. - DataFrame.memory_usage: Memory usage of DataFrame columns. - - Examples - -------- - >>> int_values = [1, 2, 3, 4, 5] - >>> text_values = ["alpha", "beta", "gamma", "delta", "epsilon"] - >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0] - >>> df = pd.DataFrame( - ... { - ... "int_col": int_values, - ... "text_col": text_values, - ... "float_col": float_values, - ... } - ... ) - >>> df - int_col text_col float_col - 0 1 alpha 0.00 - 1 2 beta 0.25 - 2 3 gamma 0.50 - 3 4 delta 0.75 - 4 5 epsilon 1.00 - - Prints information of all columns: - - >>> df.info(verbose=True) - - RangeIndex: 5 entries, 0 to 4 - Data columns (total 3 columns): - # Column Non-Null Count Dtype - --- ------ -------------- ----- - 0 int_col 5 non-null int64 - 1 text_col 5 non-null object - 2 float_col 5 non-null float64 - dtypes: float64(1), int64(1), object(1) - memory usage: 248.0+ bytes - - Prints a summary of columns count and its dtypes but not per column - information: - - >>> df.info(verbose=False) - - RangeIndex: 5 entries, 0 to 4 - Columns: 3 entries, int_col to float_col - dtypes: float64(1), int64(1), object(1) - memory usage: 248.0+ bytes - - Pipe output of DataFrame.info to buffer instead of sys.stdout, get - buffer content and writes to a text file: - - >>> import io - >>> buffer = io.StringIO() - >>> df.info(buf=buffer) - >>> s = buffer.getvalue() - >>> with open("df_info.txt", "w", encoding="utf-8") as f: # doctest: +SKIP - ... f.write(s) - 260 - - The `memory_usage` parameter allows deep introspection mode, specially - useful for big DataFrames and fine-tune memory optimization: - - >>> random_strings_array = np.random.choice(["a", "b", "c"], 10**6) - >>> df = pd.DataFrame( - ... { - ... "column_1": np.random.choice(["a", "b", "c"], 10**6), - ... "column_2": np.random.choice(["a", "b", "c"], 10**6), - ... "column_3": np.random.choice(["a", "b", "c"], 10**6), - ... } - ... ) - >>> df.info() - - RangeIndex: 1000000 entries, 0 to 999999 - Data columns (total 3 columns): - # Column Non-Null Count Dtype - --- ------ -------------- ----- - 0 column_1 1000000 non-null object - 1 column_2 1000000 non-null object - 2 column_3 1000000 non-null object - dtypes: object(3) - memory usage: 22.9+ MB - - >>> df.info(memory_usage="deep") - - RangeIndex: 1000000 entries, 0 to 999999 - Data columns (total 3 columns): - # Column Non-Null Count Dtype - --- ------ -------------- ----- - 0 column_1 1000000 non-null object - 1 column_2 1000000 non-null object - 2 column_3 1000000 non-null object - dtypes: object(3) - memory usage: 165.9 MB - """ info = DataFrameInfo( data=self, memory_usage=memory_usage, @@ -5874,72 +5582,51 @@ def _reindex_multi(self, axes: dict[str, Index], fill_value) -> DataFrame: fill_value=fill_value, ) - def set_axis( - self, - labels, - *, - axis: Axis = 0, - copy: bool | lib.NoDefault = lib.no_default, - ) -> DataFrame: + @Appender( """ - Assign desired index to given axis. - - Indexes for column or row labels can be changed by assigning - a list-like or Index. - - Parameters - ---------- - labels : list-like, Index - The values for the new index. - - axis : {0 or 'index', 1 or 'columns'}, default 0 - The axis to update. The value 0 identifies the rows. For `Series` - this parameter is unused and defaults to 0. - - copy : bool, default False - This keyword is now ignored; changing its value will have no - impact on the method. - - .. deprecated:: 3.0.0 - - This keyword is ignored and will be removed in pandas 4.0. Since - pandas 3.0, this method always returns a new object using a lazy - copy mechanism that defers copies until necessary - (Copy-on-Write). See the `user guide on Copy-on-Write - `__ - for more details. - - Returns - ------- - DataFrame - An object of type DataFrame. - - See Also + Examples -------- - DataFrame.rename_axis : Alter the name of the index or columns. - - Examples - -------- - >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - Change the row labels. + Change the row labels. - >>> df.set_axis(["a", "b", "c"], axis="index") - A B - a 1 4 - b 2 5 - c 3 6 + >>> df.set_axis(['a', 'b', 'c'], axis='index') + A B + a 1 4 + b 2 5 + c 3 6 - Change the column labels. + Change the column labels. - >>> df.set_axis(["I", "II"], axis="columns") - I II - 0 1 4 - 1 2 5 - 2 3 6 + >>> df.set_axis(['I', 'II'], axis='columns') + I II + 0 1 4 + 1 2 5 + 2 3 6 """ + ) + @Substitution( + klass=_shared_doc_kwargs["klass"], + axes_single_arg=_shared_doc_kwargs["axes_single_arg"], + extended_summary_sub=" column or", + axis_description_sub=", and 1 identifies the columns", + see_also_sub=" or columns", + ) + @Appender(NDFrame.set_axis.__doc__) + def set_axis( + self, + labels, + *, + axis: Axis = 0, + copy: bool | lib.NoDefault = lib.no_default, + ) -> DataFrame: return super().set_axis(labels, axis=axis, copy=copy) + @doc( + NDFrame.reindex, + klass=_shared_doc_kwargs["klass"], + optional_reindex=_shared_doc_kwargs["optional_reindex"], + ) def reindex( self, labels=None, @@ -5954,227 +5641,6 @@ def reindex( limit: int | None = None, tolerance=None, ) -> DataFrame: - """ - Conform DataFrame to new index with optional filling logic. - - Places NA/NaN in locations having no value in the previous index. A new object - is produced unless the new index is equivalent to the current one and - ``copy=False``. - - Parameters - ---------- - - labels : array-like, optional - New labels / index to conform the axis specified by 'axis' to. - index : array-like, optional - New labels for the index. Preferably an Index object to avoid - duplicating data. - columns : array-like, optional - New labels for the columns. Preferably an Index object to avoid - duplicating data. - axis : int or str, optional - Axis to target. Can be either the axis name ('index', 'columns') - or number (0, 1). - method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'} - Method to use for filling holes in reindexed DataFrame. - Please note: this is only applicable to DataFrames/Series with a - monotonically increasing/decreasing index. - - * None (default): don't fill gaps - * pad / ffill: Propagate last valid observation forward to next - valid. - * backfill / bfill: Use next valid observation to fill gap. - * nearest: Use nearest valid observations to fill gap. - - copy : bool, default False - This keyword is now ignored; changing its value will have no - impact on the method. - - .. deprecated:: 3.0.0 - - This keyword is ignored and will be removed in pandas 4.0. Since - pandas 3.0, this method always returns a new object using a lazy - copy mechanism that defers copies until necessary - (Copy-on-Write). See the `user guide on Copy-on-Write - `__ - for more details. - - level : int or name - Broadcast across a level, matching Index values on the - passed MultiIndex level. - fill_value : scalar, default np.nan - Value to use for missing values. Defaults to NaN, but can be any - "compatible" value. - limit : int, default None - Maximum number of consecutive elements to forward or backward fill. - tolerance : optional - Maximum distance between original and new labels for inexact - matches. The values of the index at the matching locations most - satisfy the equation ``abs(index[indexer] - target) <= tolerance``. - - Tolerance may be a scalar value, which applies the same tolerance - to all values, or list-like, which applies variable tolerance per - element. List-like includes list, tuple, array, Series, and must be - the same size as the index and its dtype must exactly match the - index's type. - - Returns - ------- - DataFrame - DataFrame with changed index. - - See Also - -------- - DataFrame.set_index : Set row labels. - DataFrame.reset_index : Remove row labels or move them to new columns. - DataFrame.reindex_like : Change to same indices as other DataFrame. - - Examples - -------- - ``DataFrame.reindex`` supports two calling conventions - - * ``(index=index_labels, columns=column_labels, ...)`` - * ``(labels, axis={'index', 'columns'}, ...)`` - - We *highly* recommend using keyword arguments to clarify your - intent. - - Create a DataFrame with some fictional data. - - >>> index = ["Firefox", "Chrome", "Safari", "IE10", "Konqueror"] - >>> columns = ["http_status", "response_time"] - >>> df = pd.DataFrame( - ... [[200, 0.04], [200, 0.02], [404, 0.07], [404, 0.08], [301, 1.0]], - ... columns=columns, - ... index=index, - ... ) - >>> df - http_status response_time - Firefox 200 0.04 - Chrome 200 0.02 - Safari 404 0.07 - IE10 404 0.08 - Konqueror 301 1.00 - - Create a new index and reindex the DataFrame. By default - values in the new index that do not have corresponding - records in the DataFrame are assigned ``NaN``. - - >>> new_index = ["Safari", "Iceweasel", "Comodo Dragon", "IE10", "Chrome"] - >>> df.reindex(new_index) - http_status response_time - Safari 404.0 0.07 - Iceweasel NaN NaN - Comodo Dragon NaN NaN - IE10 404.0 0.08 - Chrome 200.0 0.02 - - We can fill in the missing values by passing a value to - the keyword ``fill_value``. Because the index is not monotonically - increasing or decreasing, we cannot use arguments to the keyword - ``method`` to fill the ``NaN`` values. - - >>> df.reindex(new_index, fill_value=0) - http_status response_time - Safari 404 0.07 - Iceweasel 0 0.00 - Comodo Dragon 0 0.00 - IE10 404 0.08 - Chrome 200 0.02 - - >>> df.reindex(new_index, fill_value="missing") - http_status response_time - Safari 404 0.07 - Iceweasel missing missing - Comodo Dragon missing missing - IE10 404 0.08 - Chrome 200 0.02 - - We can also reindex the columns. - - >>> df.reindex(columns=["http_status", "user_agent"]) - http_status user_agent - Firefox 200 NaN - Chrome 200 NaN - Safari 404 NaN - IE10 404 NaN - Konqueror 301 NaN - - Or we can use "axis-style" keyword arguments - - >>> df.reindex(["http_status", "user_agent"], axis="columns") - http_status user_agent - Firefox 200 NaN - Chrome 200 NaN - Safari 404 NaN - IE10 404 NaN - Konqueror 301 NaN - - To further illustrate the filling functionality in - ``reindex``, we will create a DataFrame with a - monotonically increasing index (for example, a sequence - of dates). - - >>> date_index = pd.date_range("1/1/2010", periods=6, freq="D") - >>> df2 = pd.DataFrame( - ... {"prices": [100, 101, np.nan, 100, 89, 88]}, index=date_index - ... ) - >>> df2 - prices - 2010-01-01 100.0 - 2010-01-02 101.0 - 2010-01-03 NaN - 2010-01-04 100.0 - 2010-01-05 89.0 - 2010-01-06 88.0 - - Suppose we decide to expand the DataFrame to cover a wider - date range. - - >>> date_index2 = pd.date_range("12/29/2009", periods=10, freq="D") - >>> df2.reindex(date_index2) - prices - 2009-12-29 NaN - 2009-12-30 NaN - 2009-12-31 NaN - 2010-01-01 100.0 - 2010-01-02 101.0 - 2010-01-03 NaN - 2010-01-04 100.0 - 2010-01-05 89.0 - 2010-01-06 88.0 - 2010-01-07 NaN - - The index entries that did not have a value in the original data frame - (for example, '2009-12-29') are by default filled with ``NaN``. - If desired, we can fill in the missing values using one of several - options. - - For example, to back-propagate the last valid value to fill the ``NaN`` - values, pass ``bfill`` as an argument to the ``method`` keyword. - - >>> df2.reindex(date_index2, method="bfill") - prices - 2009-12-29 100.0 - 2009-12-30 100.0 - 2009-12-31 100.0 - 2010-01-01 100.0 - 2010-01-02 101.0 - 2010-01-03 NaN - 2010-01-04 100.0 - 2010-01-05 89.0 - 2010-01-06 88.0 - 2010-01-07 NaN - - Please note that the ``NaN`` value present in the original DataFrame - (at index value 2010-01-03) will not be filled by any of the - value propagation schemes. This is because filling while reindexing - does not look at DataFrame values, but only compares the original and - desired indexes. If you do want to fill in the ``NaN`` values present - in the original DataFrame, use the ``fillna()`` method. - - See the :ref:`user guide ` for more. - """ return super().reindex( labels=labels, index=index, @@ -6663,6 +6129,7 @@ def _replace_columnwise( return res if inplace else res.__finalize__(self) + @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"]) def shift( self, periods: int | Sequence[int] = 1, @@ -6671,120 +6138,6 @@ def shift( fill_value: Hashable = lib.no_default, suffix: str | None = None, ) -> DataFrame: - """ - Shift index by desired number of periods with an optional time `freq`. - - When `freq` is not passed, shift the index without realigning the data. - If `freq` is passed (in this case, the index must be date or datetime, - or it will raise a `NotImplementedError`), the index will be - increased using the periods and the `freq`. `freq` can be inferred - when specified as "infer" as long as either freq or inferred_freq - attribute is set in the index. - - Parameters - ---------- - periods : int or Sequence - Number of periods to shift. Can be positive or negative. - If an iterable of ints, the data will be shifted once by each int. - This is equivalent to shifting by one value at a time and - concatenating all resulting frames. The resulting columns will have - the shift suffixed to their column names. For multiple periods, - axis must not be 1. - freq : DateOffset, tseries.offsets, timedelta, or str, optional - Offset to use from the tseries module or time rule (e.g. 'EOM'). - If `freq` is specified then the index values are shifted but the - data is not realigned. That is, use `freq` if you would like to - extend the index when shifting and preserve the original data. - If `freq` is specified as "infer" then it will be inferred from - the freq or inferred_freq attributes of the index. If neither of - those attributes exist, a ValueError is thrown. - axis : {0 or 'index', 1 or 'columns', None}, default None - Shift direction. For `Series` this parameter is unused and defaults to 0. - fill_value : object, optional - The scalar value to use for newly introduced missing values. - the default depends on the dtype of `self`. - For Boolean and numeric NumPy data types, ``np.nan`` is used. - For datetime, timedelta, or period data, etc. :attr:`NaT` is used. - For extension dtypes, ``self.dtype.na_value`` is used. - suffix : str, optional - If str and periods is an iterable, this is added after the column - name and before the shift value for each shifted column name. - For `Series` this parameter is unused and defaults to `None`. - - Returns - ------- - DataFrame - Copy of input object, shifted. - - See Also - -------- - Index.shift : Shift values of Index. - DatetimeIndex.shift : Shift values of DatetimeIndex. - PeriodIndex.shift : Shift values of PeriodIndex. - - Examples - -------- - >>> df = pd.DataFrame( - ... [[10, 13, 17], [20, 23, 27], [15, 18, 22], [30, 33, 37], [45, 48, 52]], - ... columns=["Col1", "Col2", "Col3"], - ... index=pd.date_range("2020-01-01", "2020-01-05"), - ... ) - >>> df - Col1 Col2 Col3 - 2020-01-01 10 13 17 - 2020-01-02 20 23 27 - 2020-01-03 15 18 22 - 2020-01-04 30 33 37 - 2020-01-05 45 48 52 - - >>> df.shift(periods=3) - Col1 Col2 Col3 - 2020-01-01 NaN NaN NaN - 2020-01-02 NaN NaN NaN - 2020-01-03 NaN NaN NaN - 2020-01-04 10.0 13.0 17.0 - 2020-01-05 20.0 23.0 27.0 - - >>> df.shift(periods=1, axis="columns") - Col1 Col2 Col3 - 2020-01-01 NaN 10 13 - 2020-01-02 NaN 20 23 - 2020-01-03 NaN 15 18 - 2020-01-04 NaN 30 33 - 2020-01-05 NaN 45 48 - - >>> df.shift(periods=3, fill_value=0) - Col1 Col2 Col3 - 2020-01-01 0 0 0 - 2020-01-02 0 0 0 - 2020-01-03 0 0 0 - 2020-01-04 10 13 17 - 2020-01-05 20 23 27 - - >>> df.shift(periods=3, freq="D") - Col1 Col2 Col3 - 2020-01-04 10 13 17 - 2020-01-05 20 23 27 - 2020-01-06 15 18 22 - 2020-01-07 30 33 37 - 2020-01-08 45 48 52 - - >>> df.shift(periods=3, freq="infer") - Col1 Col2 Col3 - 2020-01-04 10 13 17 - 2020-01-05 20 23 27 - 2020-01-06 15 18 22 - 2020-01-07 30 33 37 - 2020-01-08 45 48 52 - - >>> df["Col1"].shift(periods=[0, 1, 2]) - Col1_0 Col1_1 Col1_2 - 2020-01-01 10 NaN NaN - 2020-01-02 20 10.0 NaN - 2020-01-03 15 20.0 10.0 - 2020-01-04 30 15.0 20.0 - 2020-01-05 45 30.0 15.0 - """ if freq is not None and fill_value is not lib.no_default: # GH#53832 raise ValueError( @@ -7437,75 +6790,8 @@ class max type # ---------------------------------------------------------------------- # Reindex-based selection methods + @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) def isna(self) -> DataFrame: - """ - Detect missing values. - - Return a boolean same-sized object indicating if the values are NA. - NA values, such as None or :attr:`numpy.NaN`, gets mapped to True - values. - Everything else gets mapped to False values. Characters such as empty - strings ``''`` or :attr:`numpy.inf` are not considered NA values. - - Returns - ------- - Series/DataFrame - Mask of bool values for each element in Series/DataFrame - that indicates whether an element is an NA value. - - See Also - -------- - Series.isnull : Alias of isna. - DataFrame.isnull : Alias of isna. - Series.notna : Boolean inverse of isna. - DataFrame.notna : Boolean inverse of isna. - Series.dropna : Omit axes labels with missing values. - DataFrame.dropna : Omit axes labels with missing values. - isna : Top-level isna. - - Examples - -------- - Show which entries in a DataFrame are NA. - - >>> df = pd.DataFrame( - ... dict( - ... age=[5, 6, np.nan], - ... born=[ - ... pd.NaT, - ... pd.Timestamp("1939-05-27"), - ... pd.Timestamp("1940-04-25"), - ... ], - ... name=["Alfred", "Batman", ""], - ... toy=[None, "Batmobile", "Joker"], - ... ) - ... ) - >>> df - age born name toy - 0 5.0 NaT Alfred NaN - 1 6.0 1939-05-27 Batman Batmobile - 2 NaN 1940-04-25 Joker - - >>> df.isna() - age born name toy - 0 False True False True - 1 False False False False - 2 True False False False - - Show which entries in a Series are NA. - - >>> ser = pd.Series([5, 6, np.nan]) - >>> ser - 0 5.0 - 1 6.0 - 2 NaN - dtype: float64 - - >>> ser.isna() - 0 False - 1 False - 2 True - dtype: bool - """ res_mgr = self._mgr.isna(func=isna) result = self._constructor_from_mgr(res_mgr, axes=res_mgr.axes) return result.__finalize__(self, method="isna") @@ -7517,75 +6803,8 @@ def isnull(self) -> DataFrame: """ return self.isna() + @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) def notna(self) -> DataFrame: - """ - Detect existing (non-missing) values. - - Return a boolean same-sized object indicating if the values are not NA. - Non-missing values get mapped to True. Characters such as empty - strings ``''`` or :attr:`numpy.inf` are not considered NA values. - NA values, such as None or :attr:`numpy.NaN`, get mapped to False - values. - - Returns - ------- - Series/DataFrame - Mask of bool values for each element in Series/DataFrame - that indicates whether an element is not an NA value. - - See Also - -------- - Series.notnull : Alias of notna. - DataFrame.notnull : Alias of notna. - Series.isna : Boolean inverse of notna. - DataFrame.isna : Boolean inverse of notna. - Series.dropna : Omit axes labels with missing values. - DataFrame.dropna : Omit axes labels with missing values. - notna : Top-level notna. - - Examples - -------- - Show which entries in a DataFrame are not NA. - - >>> df = pd.DataFrame( - ... dict( - ... age=[5, 6, np.nan], - ... born=[ - ... pd.NaT, - ... pd.Timestamp("1939-05-27"), - ... pd.Timestamp("1940-04-25"), - ... ], - ... name=["Alfred", "Batman", ""], - ... toy=[None, "Batmobile", "Joker"], - ... ) - ... ) - >>> df - age born name toy - 0 5.0 NaT Alfred NaN - 1 6.0 1939-05-27 Batman Batmobile - 2 NaN 1940-04-25 Joker - - >>> df.notna() - age born name toy - 0 True False True False - 1 True True True True - 2 False True True True - - Show which entries in a Series are not NA. - - >>> ser = pd.Series([5, 6, np.nan]) - >>> ser - 0 5.0 - 1 6.0 - 2 NaN - dtype: float64 - - >>> ser.notna() - 0 True - 1 True - 2 False - dtype: bool - """ return ~self.isna() @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) @@ -9524,2341 +8743,86 @@ def _flex_cmp_method(self, other, op, *, axis: Axis = "columns", level=None): new_data = self._dispatch_frame_op(other, op, axis=axis) return self._construct_result(new_data, other=other) + @Appender(ops.make_flex_doc("eq", "dataframe")) def eq(self, other, axis: Axis = "columns", level=None) -> DataFrame: - """ - Get Not equal to of dataframe and other, element-wise (binary operator `eq`). + return self._flex_cmp_method(other, operator.eq, axis=axis, level=level) - Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison - operators. + @Appender(ops.make_flex_doc("ne", "dataframe")) + def ne(self, other, axis: Axis = "columns", level=None) -> DataFrame: + return self._flex_cmp_method(other, operator.ne, axis=axis, level=level) - Equivalent to `==`, `!=`, `<=`, `<`, `>=`, `>` with support to choose axis - (rows or columns) and level for comparison. - - Parameters - ---------- - other : scalar, sequence, Series, or DataFrame - Any single or multiple element data structure, or list-like object. - axis : {0 or 'index', 1 or 'columns'}, default 'columns' - Whether to compare by the index (0 or 'index') or columns - (1 or 'columns'). - level : int or label - Broadcast across a level, matching Index values on the passed - MultiIndex level. - - Returns - ------- - DataFrame of bool - Result of the comparison. - - See Also - -------- - DataFrame.eq : Compare DataFrames for equality elementwise. - DataFrame.ne : Compare DataFrames for inequality elementwise. - DataFrame.le : Compare DataFrames for less than inequality - or equality elementwise. - DataFrame.lt : Compare DataFrames for strictly less than - inequality elementwise. - DataFrame.ge : Compare DataFrames for greater than inequality - or equality elementwise. - DataFrame.gt : Compare DataFrames for strictly greater than - inequality elementwise. - - Notes - ----- - Mismatched indices will be unioned together. - `NaN` values are considered different (i.e. `NaN` != `NaN`). - - Examples - -------- - >>> df = pd.DataFrame( - ... {"cost": [250, 150, 100], "revenue": [100, 250, 300]}, - ... index=["A", "B", "C"], - ... ) - >>> df - cost revenue - A 250 100 - B 150 250 - C 100 300 - - Comparison with a scalar, using either the operator or method: - - >>> df == 100 - cost revenue - A False True - B False False - C True False - - >>> df.eq(100) - cost revenue - A False True - B False False - C True False - - When `other` is a :class:`Series`, the columns of a DataFrame are aligned - with the index of `other` and broadcast: - - >>> df != pd.Series([100, 250], index=["cost", "revenue"]) - cost revenue - A True True - B True False - C False True - - Use the method to control the broadcast axis: - - >>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis="index") - cost revenue - A True False - B True True - C True True - D True True - - When comparing to an arbitrary sequence, the number of columns must - match the number elements in `other`: - - >>> df == [250, 100] - cost revenue - A True True - B False False - C False False - - Use the method to control the axis: - - >>> df.eq([250, 250, 100], axis="index") - cost revenue - A True False - B False True - C True False - - Compare to a DataFrame of different shape. - - >>> other = pd.DataFrame( - ... {"revenue": [300, 250, 100, 150]}, index=["A", "B", "C", "D"] - ... ) - >>> other - revenue - A 300 - B 250 - C 100 - D 150 - - >>> df.gt(other) - cost revenue - A False False - B False False - C False True - D False False - - Compare to a MultiIndex by level. - - >>> df_multindex = pd.DataFrame( - ... { - ... "cost": [250, 150, 100, 150, 300, 220], - ... "revenue": [100, 250, 300, 200, 175, 225], - ... }, - ... index=[ - ... ["Q1", "Q1", "Q1", "Q2", "Q2", "Q2"], - ... ["A", "B", "C", "A", "B", "C"], - ... ], - ... ) - >>> df_multindex - cost revenue - Q1 A 250 100 - B 150 250 - C 100 300 - Q2 A 150 200 - B 300 175 - C 220 225 - - >>> df.le(df_multindex, level=1) - cost revenue - Q1 A True True - B True True - C True True - Q2 A False True - B True False - C True False - """ - return self._flex_cmp_method(other, operator.eq, axis=axis, level=level) - - def ne(self, other, axis: Axis = "columns", level=None) -> DataFrame: - """ - Get Not equal to of dataframe and other, element-wise (binary operator `ne`). - - Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison - operators. - - Equivalent to `==`, `!=`, `<=`, `<`, `>=`, `>` with support to choose axis - (rows or columns) and level for comparison. - - Parameters - ---------- - other : scalar, sequence, Series, or DataFrame - Any single or multiple element data structure, or list-like object. - axis : {0 or 'index', 1 or 'columns'}, default 'columns' - Whether to compare by the index (0 or 'index') or columns - (1 or 'columns'). - level : int or label - Broadcast across a level, matching Index values on the passed - MultiIndex level. - - Returns - ------- - DataFrame of bool - Result of the comparison. - - See Also - -------- - DataFrame.eq : Compare DataFrames for equality elementwise. - DataFrame.ne : Compare DataFrames for inequality elementwise. - DataFrame.le : Compare DataFrames for less than inequality - or equality elementwise. - DataFrame.lt : Compare DataFrames for strictly less than - inequality elementwise. - DataFrame.ge : Compare DataFrames for greater than inequality - or equality elementwise. - DataFrame.gt : Compare DataFrames for strictly greater than - inequality elementwise. - - Notes - ----- - Mismatched indices will be unioned together. - `NaN` values are considered different (i.e. `NaN` != `NaN`). - - Examples - -------- - >>> df = pd.DataFrame( - ... {"cost": [250, 150, 100], "revenue": [100, 250, 300]}, - ... index=["A", "B", "C"], - ... ) - >>> df - cost revenue - A 250 100 - B 150 250 - C 100 300 - - Comparison with a scalar, using either the operator or method: - - >>> df == 100 - cost revenue - A False True - B False False - C True False - - >>> df.eq(100) - cost revenue - A False True - B False False - C True False - - When `other` is a :class:`Series`, the columns of a DataFrame are aligned - with the index of `other` and broadcast: - - >>> df != pd.Series([100, 250], index=["cost", "revenue"]) - cost revenue - A True True - B True False - C False True - - Use the method to control the broadcast axis: - - >>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis="index") - cost revenue - A True False - B True True - C True True - D True True - - When comparing to an arbitrary sequence, the number of columns must - match the number elements in `other`: - - >>> df == [250, 100] - cost revenue - A True True - B False False - C False False - - Use the method to control the axis: - - >>> df.eq([250, 250, 100], axis="index") - cost revenue - A True False - B False True - C True False - - Compare to a DataFrame of different shape. - - >>> other = pd.DataFrame( - ... {"revenue": [300, 250, 100, 150]}, index=["A", "B", "C", "D"] - ... ) - >>> other - revenue - A 300 - B 250 - C 100 - D 150 - - >>> df.gt(other) - cost revenue - A False False - B False False - C False True - D False False - - Compare to a MultiIndex by level. - - >>> df_multindex = pd.DataFrame( - ... { - ... "cost": [250, 150, 100, 150, 300, 220], - ... "revenue": [100, 250, 300, 200, 175, 225], - ... }, - ... index=[ - ... ["Q1", "Q1", "Q1", "Q2", "Q2", "Q2"], - ... ["A", "B", "C", "A", "B", "C"], - ... ], - ... ) - >>> df_multindex - cost revenue - Q1 A 250 100 - B 150 250 - C 100 300 - Q2 A 150 200 - B 300 175 - C 220 225 - - >>> df.le(df_multindex, level=1) - cost revenue - Q1 A True True - B True True - C True True - Q2 A False True - B True False - C True False - """ - return self._flex_cmp_method(other, operator.ne, axis=axis, level=level) - - def le(self, other, axis: Axis = "columns", level=None) -> DataFrame: - """ - Get Greater than or equal to of dataframe and other, - element-wise (binary operator `le`). - - Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison - operators. - - Equivalent to `==`, `!=`, `<=`, `<`, `>=`, `>` with support to choose axis - (rows or columns) and level for comparison. - - Parameters - ---------- - other : scalar, sequence, Series, or DataFrame - Any single or multiple element data structure, or list-like object. - axis : {0 or 'index', 1 or 'columns'}, default 'columns' - Whether to compare by the index (0 or 'index') or columns - (1 or 'columns'). - level : int or label - Broadcast across a level, matching Index values on the passed - MultiIndex level. - - Returns - ------- - DataFrame of bool - Result of the comparison. - - See Also - -------- - DataFrame.eq : Compare DataFrames for equality elementwise. - DataFrame.ne : Compare DataFrames for inequality elementwise. - DataFrame.le : Compare DataFrames for less than inequality - or equality elementwise. - DataFrame.lt : Compare DataFrames for strictly less than - inequality elementwise. - DataFrame.ge : Compare DataFrames for greater than inequality - or equality elementwise. - DataFrame.gt : Compare DataFrames for strictly greater than - inequality elementwise. - - Notes - ----- - Mismatched indices will be unioned together. - `NaN` values are considered different (i.e. `NaN` != `NaN`). - - Examples - -------- - >>> df = pd.DataFrame( - ... {"cost": [250, 150, 100], "revenue": [100, 250, 300]}, - ... index=["A", "B", "C"], - ... ) - >>> df - cost revenue - A 250 100 - B 150 250 - C 100 300 - - Comparison with a scalar, using either the operator or method: - - >>> df == 100 - cost revenue - A False True - B False False - C True False - - >>> df.eq(100) - cost revenue - A False True - B False False - C True False - - When `other` is a :class:`Series`, the columns of a DataFrame are aligned - with the index of `other` and broadcast: - - >>> df != pd.Series([100, 250], index=["cost", "revenue"]) - cost revenue - A True True - B True False - C False True - - Use the method to control the broadcast axis: - - >>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis="index") - cost revenue - A True False - B True True - C True True - D True True - - When comparing to an arbitrary sequence, the number of columns must - match the number elements in `other`: - - >>> df == [250, 100] - cost revenue - A True True - B False False - C False False - - Use the method to control the axis: - - >>> df.eq([250, 250, 100], axis="index") - cost revenue - A True False - B False True - C True False - - Compare to a DataFrame of different shape. - - >>> other = pd.DataFrame( - ... {"revenue": [300, 250, 100, 150]}, index=["A", "B", "C", "D"] - ... ) - >>> other - revenue - A 300 - B 250 - C 100 - D 150 - - >>> df.gt(other) - cost revenue - A False False - B False False - C False True - D False False - - Compare to a MultiIndex by level. - - >>> df_multindex = pd.DataFrame( - ... { - ... "cost": [250, 150, 100, 150, 300, 220], - ... "revenue": [100, 250, 300, 200, 175, 225], - ... }, - ... index=[ - ... ["Q1", "Q1", "Q1", "Q2", "Q2", "Q2"], - ... ["A", "B", "C", "A", "B", "C"], - ... ], - ... ) - >>> df_multindex - cost revenue - Q1 A 250 100 - B 150 250 - C 100 300 - Q2 A 150 200 - B 300 175 - C 220 225 - - >>> df.le(df_multindex, level=1) - cost revenue - Q1 A True True - B True True - C True True - Q2 A False True - B True False - C True False - """ - return self._flex_cmp_method(other, operator.le, axis=axis, level=level) + @Appender(ops.make_flex_doc("le", "dataframe")) + def le(self, other, axis: Axis = "columns", level=None) -> DataFrame: + return self._flex_cmp_method(other, operator.le, axis=axis, level=level) + @Appender(ops.make_flex_doc("lt", "dataframe")) def lt(self, other, axis: Axis = "columns", level=None) -> DataFrame: - """ - Get Greater than of dataframe and other, element-wise (binary operator `lt`). - - Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison - operators. - - Equivalent to `==`, `!=`, `<=`, `<`, `>=`, `>` with support to choose axis - (rows or columns) and level for comparison. - - Parameters - ---------- - other : scalar, sequence, Series, or DataFrame - Any single or multiple element data structure, or list-like object. - axis : {0 or 'index', 1 or 'columns'}, default 'columns' - Whether to compare by the index (0 or 'index') or columns - (1 or 'columns'). - level : int or label - Broadcast across a level, matching Index values on the passed - MultiIndex level. - - Returns - ------- - DataFrame of bool - Result of the comparison. - - See Also - -------- - DataFrame.eq : Compare DataFrames for equality elementwise. - DataFrame.ne : Compare DataFrames for inequality elementwise. - DataFrame.le : Compare DataFrames for less than inequality - or equality elementwise. - DataFrame.lt : Compare DataFrames for strictly less than - inequality elementwise. - DataFrame.ge : Compare DataFrames for greater than inequality - or equality elementwise. - DataFrame.gt : Compare DataFrames for strictly greater than - inequality elementwise. - - Notes - ----- - Mismatched indices will be unioned together. - `NaN` values are considered different (i.e. `NaN` != `NaN`). - - Examples - -------- - >>> df = pd.DataFrame( - ... {"cost": [250, 150, 100], "revenue": [100, 250, 300]}, - ... index=["A", "B", "C"], - ... ) - >>> df - cost revenue - A 250 100 - B 150 250 - C 100 300 - - Comparison with a scalar, using either the operator or method: - - >>> df == 100 - cost revenue - A False True - B False False - C True False - - >>> df.eq(100) - cost revenue - A False True - B False False - C True False - - When `other` is a :class:`Series`, the columns of a DataFrame are aligned - with the index of `other` and broadcast: - - >>> df != pd.Series([100, 250], index=["cost", "revenue"]) - cost revenue - A True True - B True False - C False True - - Use the method to control the broadcast axis: - - >>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis="index") - cost revenue - A True False - B True True - C True True - D True True - - When comparing to an arbitrary sequence, the number of columns must - match the number elements in `other`: - - >>> df == [250, 100] - cost revenue - A True True - B False False - C False False - - Use the method to control the axis: - - >>> df.eq([250, 250, 100], axis="index") - cost revenue - A True False - B False True - C True False - - Compare to a DataFrame of different shape. - - >>> other = pd.DataFrame( - ... {"revenue": [300, 250, 100, 150]}, index=["A", "B", "C", "D"] - ... ) - >>> other - revenue - A 300 - B 250 - C 100 - D 150 - - >>> df.gt(other) - cost revenue - A False False - B False False - C False True - D False False - - Compare to a MultiIndex by level. - - >>> df_multindex = pd.DataFrame( - ... { - ... "cost": [250, 150, 100, 150, 300, 220], - ... "revenue": [100, 250, 300, 200, 175, 225], - ... }, - ... index=[ - ... ["Q1", "Q1", "Q1", "Q2", "Q2", "Q2"], - ... ["A", "B", "C", "A", "B", "C"], - ... ], - ... ) - >>> df_multindex - cost revenue - Q1 A 250 100 - B 150 250 - C 100 300 - Q2 A 150 200 - B 300 175 - C 220 225 - - >>> df.le(df_multindex, level=1) - cost revenue - Q1 A True True - B True True - C True True - Q2 A False True - B True False - C True False - """ return self._flex_cmp_method(other, operator.lt, axis=axis, level=level) + @Appender(ops.make_flex_doc("ge", "dataframe")) def ge(self, other, axis: Axis = "columns", level=None) -> DataFrame: - """ - Get Greater than or equal to of dataframe and other, - element-wise (binary operator `ge`). - - Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison - operators. - - Equivalent to `==`, `!=`, `<=`, `<`, `>=`, `>` with support to choose axis - (rows or columns) and level for comparison. - - Parameters - ---------- - other : scalar, sequence, Series, or DataFrame - Any single or multiple element data structure, or list-like object. - axis : {0 or 'index', 1 or 'columns'}, default 'columns' - Whether to compare by the index (0 or 'index') or columns - (1 or 'columns'). - level : int or label - Broadcast across a level, matching Index values on the passed - MultiIndex level. - - Returns - ------- - DataFrame of bool - Result of the comparison. - - See Also - -------- - DataFrame.eq : Compare DataFrames for equality elementwise. - DataFrame.ne : Compare DataFrames for inequality elementwise. - DataFrame.le : Compare DataFrames for less than inequality - or equality elementwise. - DataFrame.lt : Compare DataFrames for strictly less than - inequality elementwise. - DataFrame.ge : Compare DataFrames for greater than inequality - or equality elementwise. - DataFrame.gt : Compare DataFrames for strictly greater than - inequality elementwise. - - Notes - ----- - Mismatched indices will be unioned together. - `NaN` values are considered different (i.e. `NaN` != `NaN`). - - Examples - -------- - >>> df = pd.DataFrame( - ... {"cost": [250, 150, 100], "revenue": [100, 250, 300]}, - ... index=["A", "B", "C"], - ... ) - >>> df - cost revenue - A 250 100 - B 150 250 - C 100 300 - - Comparison with a scalar, using either the operator or method: - - >>> df == 100 - cost revenue - A False True - B False False - C True False - - >>> df.eq(100) - cost revenue - A False True - B False False - C True False - - When `other` is a :class:`Series`, the columns of a DataFrame are aligned - with the index of `other` and broadcast: - - >>> df != pd.Series([100, 250], index=["cost", "revenue"]) - cost revenue - A True True - B True False - C False True - - Use the method to control the broadcast axis: - - >>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis="index") - cost revenue - A True False - B True True - C True True - D True True - - When comparing to an arbitrary sequence, the number of columns must - match the number elements in `other`: - - >>> df == [250, 100] - cost revenue - A True True - B False False - C False False - - Use the method to control the axis: - - >>> df.eq([250, 250, 100], axis="index") - cost revenue - A True False - B False True - C True False - - Compare to a DataFrame of different shape. - - >>> other = pd.DataFrame( - ... {"revenue": [300, 250, 100, 150]}, index=["A", "B", "C", "D"] - ... ) - >>> other - revenue - A 300 - B 250 - C 100 - D 150 - - >>> df.gt(other) - cost revenue - A False False - B False False - C False True - D False False - - Compare to a MultiIndex by level. - - >>> df_multindex = pd.DataFrame( - ... { - ... "cost": [250, 150, 100, 150, 300, 220], - ... "revenue": [100, 250, 300, 200, 175, 225], - ... }, - ... index=[ - ... ["Q1", "Q1", "Q1", "Q2", "Q2", "Q2"], - ... ["A", "B", "C", "A", "B", "C"], - ... ], - ... ) - >>> df_multindex - cost revenue - Q1 A 250 100 - B 150 250 - C 100 300 - Q2 A 150 200 - B 300 175 - C 220 225 - - >>> df.le(df_multindex, level=1) - cost revenue - Q1 A True True - B True True - C True True - Q2 A False True - B True False - C True False - """ return self._flex_cmp_method(other, operator.ge, axis=axis, level=level) + @Appender(ops.make_flex_doc("gt", "dataframe")) def gt(self, other, axis: Axis = "columns", level=None) -> DataFrame: - """ - Get Greater than of dataframe and other, element-wise (binary operator `gt`). - - Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison - operators. - - Equivalent to `==`, `!=`, `<=`, `<`, `>=`, `>` with support to choose axis - (rows or columns) and level for comparison. - - Parameters - ---------- - other : scalar, sequence, Series, or DataFrame - Any single or multiple element data structure, or list-like object. - axis : {0 or 'index', 1 or 'columns'}, default 'columns' - Whether to compare by the index (0 or 'index') or columns - (1 or 'columns'). - level : int or label - Broadcast across a level, matching Index values on the passed - MultiIndex level. - - Returns - ------- - DataFrame of bool - Result of the comparison. - - See Also - -------- - DataFrame.eq : Compare DataFrames for equality elementwise. - DataFrame.ne : Compare DataFrames for inequality elementwise. - DataFrame.le : Compare DataFrames for less than inequality - or equality elementwise. - DataFrame.lt : Compare DataFrames for strictly less than - inequality elementwise. - DataFrame.ge : Compare DataFrames for greater than inequality - or equality elementwise. - DataFrame.gt : Compare DataFrames for strictly greater than - inequality elementwise. - - Notes - ----- - Mismatched indices will be unioned together. - `NaN` values are considered different (i.e. `NaN` != `NaN`). - - Examples - -------- - >>> df = pd.DataFrame( - ... {"cost": [250, 150, 100], "revenue": [100, 250, 300]}, - ... index=["A", "B", "C"], - ... ) - >>> df - cost revenue - A 250 100 - B 150 250 - C 100 300 - - Comparison with a scalar, using either the operator or method: - - >>> df == 100 - cost revenue - A False True - B False False - C True False - - >>> df.eq(100) - cost revenue - A False True - B False False - C True False - - When `other` is a :class:`Series`, the columns of a DataFrame are aligned - with the index of `other` and broadcast: - - >>> df != pd.Series([100, 250], index=["cost", "revenue"]) - cost revenue - A True True - B True False - C False True - - Use the method to control the broadcast axis: - - >>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis="index") - cost revenue - A True False - B True True - C True True - D True True - - When comparing to an arbitrary sequence, the number of columns must - match the number elements in `other`: - - >>> df == [250, 100] - cost revenue - A True True - B False False - C False False - - Use the method to control the axis: - - >>> df.eq([250, 250, 100], axis="index") - cost revenue - A True False - B False True - C True False - - Compare to a DataFrame of different shape. - - >>> other = pd.DataFrame( - ... {"revenue": [300, 250, 100, 150]}, index=["A", "B", "C", "D"] - ... ) - >>> other - revenue - A 300 - B 250 - C 100 - D 150 - - >>> df.gt(other) - cost revenue - A False False - B False False - C False True - D False False - - Compare to a MultiIndex by level. - - >>> df_multindex = pd.DataFrame( - ... { - ... "cost": [250, 150, 100, 150, 300, 220], - ... "revenue": [100, 250, 300, 200, 175, 225], - ... }, - ... index=[ - ... ["Q1", "Q1", "Q1", "Q2", "Q2", "Q2"], - ... ["A", "B", "C", "A", "B", "C"], - ... ], - ... ) - >>> df_multindex - cost revenue - Q1 A 250 100 - B 150 250 - C 100 300 - Q2 A 150 200 - B 300 175 - C 220 225 - - >>> df.le(df_multindex, level=1) - cost revenue - Q1 A True True - B True True - C True True - Q2 A False True - B True False - C True False - """ return self._flex_cmp_method(other, operator.gt, axis=axis, level=level) + @Appender(ops.make_flex_doc("add", "dataframe")) def add( self, other, axis: Axis = "columns", level=None, fill_value=None ) -> DataFrame: - """ - Get Addition of dataframe and other, element-wise (binary operator `add`). - - Equivalent to ``dataframe + other``, but with support to substitute a fill_value - for missing data in one of the inputs. With reverse version, `radd`. - - Among flexible wrappers (`add`, `sub`, `mul`, `div`, - `floordiv`, `mod`, `pow`) to arithmetic operators: - `+`, `-`, `*`, `/`, `//`, `%`, `**`. - - Parameters - ---------- - other : scalar, sequence, Series, dict or DataFrame - Any single or multiple element data structure, or list-like object. - axis : {0 or 'index', 1 or 'columns'} - Whether to compare by the index (0 or 'index') or columns. - (1 or 'columns'). For Series input, axis to match Series index on. - level : int or label - Broadcast across a level, matching Index values on the - passed MultiIndex level. - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed for - successful DataFrame alignment, with this value before computation. - If data in both corresponding DataFrame locations is missing - the result will be missing. - - Returns - ------- - DataFrame - Result of the arithmetic operation. - - See Also - -------- - DataFrame.add : Add DataFrames. - DataFrame.sub : Subtract DataFrames. - DataFrame.mul : Multiply DataFrames. - DataFrame.div : Divide DataFrames (float division). - DataFrame.truediv : Divide DataFrames (float division). - DataFrame.floordiv : Divide DataFrames (integer division). - DataFrame.mod : Calculate modulo (remainder after division). - DataFrame.pow : Calculate exponential power. - - Notes - ----- - Mismatched indices will be unioned together. - - Examples - -------- - >>> df = pd.DataFrame( - ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, - ... index=["circle", "triangle", "rectangle"], - ... ) - >>> df - angles degrees - circle 0 360 - triangle 3 180 - rectangle 4 360 - - Add a scalar with operator version which return the same - results. - - >>> df + 1 - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - >>> df.add(1) - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - Divide by constant with reverse version. - - >>> df.div(10) - angles degrees - circle 0.0 36.0 - triangle 0.3 18.0 - rectangle 0.4 36.0 - - >>> df.rdiv(10) - angles degrees - circle inf 0.027778 - triangle 3.333333 0.055556 - rectangle 2.500000 0.027778 - - Subtract a list and Series by axis with operator version. - - >>> df - [1, 2] - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub([1, 2], axis="columns") - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub( - ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), - ... axis="index", - ... ) - angles degrees - circle -1 359 - triangle 2 179 - rectangle 3 359 - - Multiply a dictionary by axis. - - >>> df.mul({"angles": 0, "degrees": 2}) - angles degrees - circle 0 720 - triangle 0 360 - rectangle 0 720 - - >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") - angles degrees - circle 0 0 - triangle 6 360 - rectangle 12 1080 - - Multiply a DataFrame of different shape with operator version. - - >>> other = pd.DataFrame( - ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] - ... ) - >>> other - angles - circle 0 - triangle 3 - rectangle 4 - - >>> df * other - angles degrees - circle 0 NaN - triangle 9 NaN - rectangle 16 NaN - - >>> df.mul(other, fill_value=0) - angles degrees - circle 0 0.0 - triangle 9 0.0 - rectangle 16 0.0 - - Divide by a MultiIndex by level. - - >>> df_multindex = pd.DataFrame( - ... { - ... "angles": [0, 3, 4, 4, 5, 6], - ... "degrees": [360, 180, 360, 360, 540, 720], - ... }, - ... index=[ - ... ["A", "A", "A", "B", "B", "B"], - ... [ - ... "circle", - ... "triangle", - ... "rectangle", - ... "square", - ... "pentagon", - ... "hexagon", - ... ], - ... ], - ... ) - >>> df_multindex - angles degrees - A circle 0 360 - triangle 3 180 - rectangle 4 360 - B square 4 360 - pentagon 5 540 - hexagon 6 720 - - >>> df.div(df_multindex, level=1, fill_value=0) - angles degrees - A circle NaN 1.0 - triangle 1.0 1.0 - rectangle 1.0 1.0 - B square 0.0 0.0 - pentagon 0.0 0.0 - hexagon 0.0 0.0 - - >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) - >>> df_pow.pow(2) - A B - 0 4 36 - 1 9 49 - 2 16 64 - 3 25 81 - """ return self._flex_arith_method( other, operator.add, level=level, fill_value=fill_value, axis=axis ) + @Appender(ops.make_flex_doc("radd", "dataframe")) def radd( self, other, axis: Axis = "columns", level=None, fill_value=None ) -> DataFrame: - """ - Get Addition of dataframe and other, element-wise (binary operator `radd`). - - Equivalent to ``other + dataframe``, but with support to substitute a fill_value - for missing data in one of the inputs. With reverse version, `add`. - - Among flexible wrappers (`add`, `sub`, `mul`, `div`, - `floordiv`, `mod`, `pow`) to arithmetic operators: - `+`, `-`, `*`, `/`, `//`, `%`, `**`. - - Parameters - ---------- - other : scalar, sequence, Series, dict or DataFrame - Any single or multiple element data structure, or list-like object. - axis : {0 or 'index', 1 or 'columns'} - Whether to compare by the index (0 or 'index') or columns. - (1 or 'columns'). For Series input, axis to match Series index on. - level : int or label - Broadcast across a level, matching Index values on the - passed MultiIndex level. - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed for - successful DataFrame alignment, with this value before computation. - If data in both corresponding DataFrame locations is missing - the result will be missing. - - Returns - ------- - DataFrame - Result of the arithmetic operation. - - See Also - -------- - DataFrame.add : Add DataFrames. - DataFrame.sub : Subtract DataFrames. - DataFrame.mul : Multiply DataFrames. - DataFrame.div : Divide DataFrames (float division). - DataFrame.truediv : Divide DataFrames (float division). - DataFrame.floordiv : Divide DataFrames (integer division). - DataFrame.mod : Calculate modulo (remainder after division). - DataFrame.pow : Calculate exponential power. - - Notes - ----- - Mismatched indices will be unioned together. - - Examples - -------- - >>> df = pd.DataFrame( - ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, - ... index=["circle", "triangle", "rectangle"], - ... ) - >>> df - angles degrees - circle 0 360 - triangle 3 180 - rectangle 4 360 - - Add a scalar with operator version which return the same - results. - - >>> df + 1 - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - >>> df.add(1) - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - Divide by constant with reverse version. - - >>> df.div(10) - angles degrees - circle 0.0 36.0 - triangle 0.3 18.0 - rectangle 0.4 36.0 - - >>> df.rdiv(10) - angles degrees - circle inf 0.027778 - triangle 3.333333 0.055556 - rectangle 2.500000 0.027778 - - Subtract a list and Series by axis with operator version. - - >>> df - [1, 2] - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub([1, 2], axis="columns") - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub( - ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), - ... axis="index", - ... ) - angles degrees - circle -1 359 - triangle 2 179 - rectangle 3 359 - - Multiply a dictionary by axis. - - >>> df.mul({"angles": 0, "degrees": 2}) - angles degrees - circle 0 720 - triangle 0 360 - rectangle 0 720 - - >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") - angles degrees - circle 0 0 - triangle 6 360 - rectangle 12 1080 - - Multiply a DataFrame of different shape with operator version. - - >>> other = pd.DataFrame( - ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] - ... ) - >>> other - angles - circle 0 - triangle 3 - rectangle 4 - - >>> df * other - angles degrees - circle 0 NaN - triangle 9 NaN - rectangle 16 NaN - - >>> df.mul(other, fill_value=0) - angles degrees - circle 0 0.0 - triangle 9 0.0 - rectangle 16 0.0 - - Divide by a MultiIndex by level. - - >>> df_multindex = pd.DataFrame( - ... { - ... "angles": [0, 3, 4, 4, 5, 6], - ... "degrees": [360, 180, 360, 360, 540, 720], - ... }, - ... index=[ - ... ["A", "A", "A", "B", "B", "B"], - ... [ - ... "circle", - ... "triangle", - ... "rectangle", - ... "square", - ... "pentagon", - ... "hexagon", - ... ], - ... ], - ... ) - >>> df_multindex - angles degrees - A circle 0 360 - triangle 3 180 - rectangle 4 360 - B square 4 360 - pentagon 5 540 - hexagon 6 720 - - >>> df.div(df_multindex, level=1, fill_value=0) - angles degrees - A circle NaN 1.0 - triangle 1.0 1.0 - rectangle 1.0 1.0 - B square 0.0 0.0 - pentagon 0.0 0.0 - hexagon 0.0 0.0 - - >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) - >>> df_pow.pow(2) - A B - 0 4 36 - 1 9 49 - 2 16 64 - 3 25 81 - """ return self._flex_arith_method( other, roperator.radd, level=level, fill_value=fill_value, axis=axis ) + @Appender(ops.make_flex_doc("sub", "dataframe")) def sub( self, other, axis: Axis = "columns", level=None, fill_value=None ) -> DataFrame: - """ - Get Subtraction of dataframe and other, element-wise (binary operator `sub`). - - Equivalent to ``dataframe - other``, but with support to substitute a fill_value - for missing data in one of the inputs. With reverse version, `rsub`. - - Among flexible wrappers (`add`, `sub`, `mul`, `div`, - `floordiv`, `mod`, `pow`) to arithmetic operators: - `+`, `-`, `*`, `/`, `//`, `%`, `**`. - - Parameters - ---------- - other : scalar, sequence, Series, dict or DataFrame - Any single or multiple element data structure, or list-like object. - axis : {0 or 'index', 1 or 'columns'} - Whether to compare by the index (0 or 'index') or columns. - (1 or 'columns'). For Series input, axis to match Series index on. - level : int or label - Broadcast across a level, matching Index values on the - passed MultiIndex level. - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed for - successful DataFrame alignment, with this value before computation. - If data in both corresponding DataFrame locations is missing - the result will be missing. - - Returns - ------- - DataFrame - Result of the arithmetic operation. - - See Also - -------- - DataFrame.add : Add DataFrames. - DataFrame.sub : Subtract DataFrames. - DataFrame.mul : Multiply DataFrames. - DataFrame.div : Divide DataFrames (float division). - DataFrame.truediv : Divide DataFrames (float division). - DataFrame.floordiv : Divide DataFrames (integer division). - DataFrame.mod : Calculate modulo (remainder after division). - DataFrame.pow : Calculate exponential power. - - Notes - ----- - Mismatched indices will be unioned together. - - Examples - -------- - >>> df = pd.DataFrame( - ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, - ... index=["circle", "triangle", "rectangle"], - ... ) - >>> df - angles degrees - circle 0 360 - triangle 3 180 - rectangle 4 360 - - Add a scalar with operator version which return the same - results. - - >>> df + 1 - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - >>> df.add(1) - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - Divide by constant with reverse version. - - >>> df.div(10) - angles degrees - circle 0.0 36.0 - triangle 0.3 18.0 - rectangle 0.4 36.0 - - >>> df.rdiv(10) - angles degrees - circle inf 0.027778 - triangle 3.333333 0.055556 - rectangle 2.500000 0.027778 - - Subtract a list and Series by axis with operator version. - - >>> df - [1, 2] - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub([1, 2], axis="columns") - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub( - ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), - ... axis="index", - ... ) - angles degrees - circle -1 359 - triangle 2 179 - rectangle 3 359 - - Multiply a dictionary by axis. - - >>> df.mul({"angles": 0, "degrees": 2}) - angles degrees - circle 0 720 - triangle 0 360 - rectangle 0 720 - - >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") - angles degrees - circle 0 0 - triangle 6 360 - rectangle 12 1080 - - Multiply a DataFrame of different shape with operator version. - - >>> other = pd.DataFrame( - ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] - ... ) - >>> other - angles - circle 0 - triangle 3 - rectangle 4 - - >>> df * other - angles degrees - circle 0 NaN - triangle 9 NaN - rectangle 16 NaN - - >>> df.mul(other, fill_value=0) - angles degrees - circle 0 0.0 - triangle 9 0.0 - rectangle 16 0.0 - - Divide by a MultiIndex by level. - - >>> df_multindex = pd.DataFrame( - ... { - ... "angles": [0, 3, 4, 4, 5, 6], - ... "degrees": [360, 180, 360, 360, 540, 720], - ... }, - ... index=[ - ... ["A", "A", "A", "B", "B", "B"], - ... [ - ... "circle", - ... "triangle", - ... "rectangle", - ... "square", - ... "pentagon", - ... "hexagon", - ... ], - ... ], - ... ) - >>> df_multindex - angles degrees - A circle 0 360 - triangle 3 180 - rectangle 4 360 - B square 4 360 - pentagon 5 540 - hexagon 6 720 - - >>> df.div(df_multindex, level=1, fill_value=0) - angles degrees - A circle NaN 1.0 - triangle 1.0 1.0 - rectangle 1.0 1.0 - B square 0.0 0.0 - pentagon 0.0 0.0 - hexagon 0.0 0.0 - - >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) - >>> df_pow.pow(2) - A B - 0 4 36 - 1 9 49 - 2 16 64 - 3 25 81 - """ return self._flex_arith_method( other, operator.sub, level=level, fill_value=fill_value, axis=axis ) subtract = sub + @Appender(ops.make_flex_doc("rsub", "dataframe")) def rsub( self, other, axis: Axis = "columns", level=None, fill_value=None ) -> DataFrame: - """ - Get Subtraction of dataframe and other, - element-wise (binary operator `rsub`). - - Equivalent to ``other - dataframe``, but with support to substitute a fill_value - for missing data in one of the inputs. With reverse version, `sub`. - - Among flexible wrappers (`add`, `sub`, `mul`, `div`, - `floordiv`, `mod`, `pow`) to arithmetic operators: - `+`, `-`, `*`, `/`, `//`, `%`, `**`. - - Parameters - ---------- - other : scalar, sequence, Series, dict or DataFrame - Any single or multiple element data structure, or list-like object. - axis : {0 or 'index', 1 or 'columns'} - Whether to compare by the index (0 or 'index') or columns. - (1 or 'columns'). For Series input, axis to match Series index on. - level : int or label - Broadcast across a level, matching Index values on the - passed MultiIndex level. - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed for - successful DataFrame alignment, with this value before computation. - If data in both corresponding DataFrame locations is missing - the result will be missing. - - Returns - ------- - DataFrame - Result of the arithmetic operation. - - See Also - -------- - DataFrame.add : Add DataFrames. - DataFrame.sub : Subtract DataFrames. - DataFrame.mul : Multiply DataFrames. - DataFrame.div : Divide DataFrames (float division). - DataFrame.truediv : Divide DataFrames (float division). - DataFrame.floordiv : Divide DataFrames (integer division). - DataFrame.mod : Calculate modulo (remainder after division). - DataFrame.pow : Calculate exponential power. - - Notes - ----- - Mismatched indices will be unioned together. - - Examples - -------- - >>> df = pd.DataFrame( - ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, - ... index=["circle", "triangle", "rectangle"], - ... ) - >>> df - angles degrees - circle 0 360 - triangle 3 180 - rectangle 4 360 - - Add a scalar with operator version which return the same - results. - - >>> df + 1 - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - >>> df.add(1) - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - Divide by constant with reverse version. - - >>> df.div(10) - angles degrees - circle 0.0 36.0 - triangle 0.3 18.0 - rectangle 0.4 36.0 - - >>> df.rdiv(10) - angles degrees - circle inf 0.027778 - triangle 3.333333 0.055556 - rectangle 2.500000 0.027778 - - Subtract a list and Series by axis with operator version. - - >>> df - [1, 2] - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub([1, 2], axis="columns") - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub( - ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), - ... axis="index", - ... ) - angles degrees - circle -1 359 - triangle 2 179 - rectangle 3 359 - - Multiply a dictionary by axis. - - >>> df.mul({"angles": 0, "degrees": 2}) - angles degrees - circle 0 720 - triangle 0 360 - rectangle 0 720 - - >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") - angles degrees - circle 0 0 - triangle 6 360 - rectangle 12 1080 - - Multiply a DataFrame of different shape with operator version. - - >>> other = pd.DataFrame( - ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] - ... ) - >>> other - angles - circle 0 - triangle 3 - rectangle 4 - - >>> df * other - angles degrees - circle 0 NaN - triangle 9 NaN - rectangle 16 NaN - - >>> df.mul(other, fill_value=0) - angles degrees - circle 0 0.0 - triangle 9 0.0 - rectangle 16 0.0 - - Divide by a MultiIndex by level. - - >>> df_multindex = pd.DataFrame( - ... { - ... "angles": [0, 3, 4, 4, 5, 6], - ... "degrees": [360, 180, 360, 360, 540, 720], - ... }, - ... index=[ - ... ["A", "A", "A", "B", "B", "B"], - ... [ - ... "circle", - ... "triangle", - ... "rectangle", - ... "square", - ... "pentagon", - ... "hexagon", - ... ], - ... ], - ... ) - >>> df_multindex - angles degrees - A circle 0 360 - triangle 3 180 - rectangle 4 360 - B square 4 360 - pentagon 5 540 - hexagon 6 720 - - >>> df.div(df_multindex, level=1, fill_value=0) - angles degrees - A circle NaN 1.0 - triangle 1.0 1.0 - rectangle 1.0 1.0 - B square 0.0 0.0 - pentagon 0.0 0.0 - hexagon 0.0 0.0 - - >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) - >>> df_pow.pow(2) - A B - 0 4 36 - 1 9 49 - 2 16 64 - 3 25 81 - """ return self._flex_arith_method( other, roperator.rsub, level=level, fill_value=fill_value, axis=axis ) + @Appender(ops.make_flex_doc("mul", "dataframe")) def mul( self, other, axis: Axis = "columns", level=None, fill_value=None ) -> DataFrame: - """ - Get Multiplication of dataframe and other, - element-wise (binary operator `mul`). - - Equivalent to ``dataframe * other``, but with support to substitute a fill_value - for missing data in one of the inputs. With reverse version, `rmul`. - - Among flexible wrappers (`add`, `sub`, `mul`, `div`, - `floordiv`, `mod`, `pow`) to arithmetic operators: - `+`, `-`, `*`, `/`, `//`, `%`, `**`. - - Parameters - ---------- - other : scalar, sequence, Series, dict or DataFrame - Any single or multiple element data structure, or list-like object. - axis : {0 or 'index', 1 or 'columns'} - Whether to compare by the index (0 or 'index') or columns. - (1 or 'columns'). For Series input, axis to match Series index on. - level : int or label - Broadcast across a level, matching Index values on the - passed MultiIndex level. - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed for - successful DataFrame alignment, with this value before computation. - If data in both corresponding DataFrame locations is missing - the result will be missing. - - Returns - ------- - DataFrame - Result of the arithmetic operation. - - See Also - -------- - DataFrame.add : Add DataFrames. - DataFrame.sub : Subtract DataFrames. - DataFrame.mul : Multiply DataFrames. - DataFrame.div : Divide DataFrames (float division). - DataFrame.truediv : Divide DataFrames (float division). - DataFrame.floordiv : Divide DataFrames (integer division). - DataFrame.mod : Calculate modulo (remainder after division). - DataFrame.pow : Calculate exponential power. - - Notes - ----- - Mismatched indices will be unioned together. - - Examples - -------- - >>> df = pd.DataFrame( - ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, - ... index=["circle", "triangle", "rectangle"], - ... ) - >>> df - angles degrees - circle 0 360 - triangle 3 180 - rectangle 4 360 - - Add a scalar with operator version which return the same - results. - - >>> df + 1 - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - >>> df.add(1) - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - Divide by constant with reverse version. - - >>> df.div(10) - angles degrees - circle 0.0 36.0 - triangle 0.3 18.0 - rectangle 0.4 36.0 - - >>> df.rdiv(10) - angles degrees - circle inf 0.027778 - triangle 3.333333 0.055556 - rectangle 2.500000 0.027778 - - Subtract a list and Series by axis with operator version. - - >>> df - [1, 2] - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub([1, 2], axis="columns") - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub( - ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), - ... axis="index", - ... ) - angles degrees - circle -1 359 - triangle 2 179 - rectangle 3 359 - - Multiply a dictionary by axis. - - >>> df.mul({"angles": 0, "degrees": 2}) - angles degrees - circle 0 720 - triangle 0 360 - rectangle 0 720 - - >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") - angles degrees - circle 0 0 - triangle 6 360 - rectangle 12 1080 - - Multiply a DataFrame of different shape with operator version. - - >>> other = pd.DataFrame( - ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] - ... ) - >>> other - angles - circle 0 - triangle 3 - rectangle 4 - - >>> df * other - angles degrees - circle 0 NaN - triangle 9 NaN - rectangle 16 NaN - - >>> df.mul(other, fill_value=0) - angles degrees - circle 0 0.0 - triangle 9 0.0 - rectangle 16 0.0 - - Divide by a MultiIndex by level. - - >>> df_multindex = pd.DataFrame( - ... { - ... "angles": [0, 3, 4, 4, 5, 6], - ... "degrees": [360, 180, 360, 360, 540, 720], - ... }, - ... index=[ - ... ["A", "A", "A", "B", "B", "B"], - ... [ - ... "circle", - ... "triangle", - ... "rectangle", - ... "square", - ... "pentagon", - ... "hexagon", - ... ], - ... ], - ... ) - >>> df_multindex - angles degrees - A circle 0 360 - triangle 3 180 - rectangle 4 360 - B square 4 360 - pentagon 5 540 - hexagon 6 720 - - >>> df.div(df_multindex, level=1, fill_value=0) - angles degrees - A circle NaN 1.0 - triangle 1.0 1.0 - rectangle 1.0 1.0 - B square 0.0 0.0 - pentagon 0.0 0.0 - hexagon 0.0 0.0 - - >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) - >>> df_pow.pow(2) - A B - 0 4 36 - 1 9 49 - 2 16 64 - 3 25 81 - """ - return self._flex_arith_method( - other, operator.mul, level=level, fill_value=fill_value, axis=axis - ) - - multiply = mul - - def rmul( - self, other, axis: Axis = "columns", level=None, fill_value=None - ) -> DataFrame: - """ - Get Multiplication of dataframe and other, - element-wise (binary operator `rmul`). - - Equivalent to ``other * dataframe``, but with support to substitute a fill_value - for missing data in one of the inputs. With reverse version, `mul`. - - Among flexible wrappers (`add`, `sub`, `mul`, `div`, - `floordiv`, `mod`, `pow`) to arithmetic operators: - `+`, `-`, `*`, `/`, `//`, `%`, `**`. - - Parameters - ---------- - other : scalar, sequence, Series, dict or DataFrame - Any single or multiple element data structure, or list-like object. - axis : {0 or 'index', 1 or 'columns'} - Whether to compare by the index (0 or 'index') or columns. - (1 or 'columns'). For Series input, axis to match Series index on. - level : int or label - Broadcast across a level, matching Index values on the - passed MultiIndex level. - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed for - successful DataFrame alignment, with this value before computation. - If data in both corresponding DataFrame locations is missing - the result will be missing. - - Returns - ------- - DataFrame - Result of the arithmetic operation. - - See Also - -------- - DataFrame.add : Add DataFrames. - DataFrame.sub : Subtract DataFrames. - DataFrame.mul : Multiply DataFrames. - DataFrame.div : Divide DataFrames (float division). - DataFrame.truediv : Divide DataFrames (float division). - DataFrame.floordiv : Divide DataFrames (integer division). - DataFrame.mod : Calculate modulo (remainder after division). - DataFrame.pow : Calculate exponential power. - - Notes - ----- - Mismatched indices will be unioned together. - - Examples - -------- - >>> df = pd.DataFrame( - ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, - ... index=["circle", "triangle", "rectangle"], - ... ) - >>> df - angles degrees - circle 0 360 - triangle 3 180 - rectangle 4 360 - - Add a scalar with operator version which return the same - results. - - >>> df + 1 - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - >>> df.add(1) - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - Divide by constant with reverse version. - - >>> df.div(10) - angles degrees - circle 0.0 36.0 - triangle 0.3 18.0 - rectangle 0.4 36.0 - - >>> df.rdiv(10) - angles degrees - circle inf 0.027778 - triangle 3.333333 0.055556 - rectangle 2.500000 0.027778 - - Subtract a list and Series by axis with operator version. - - >>> df - [1, 2] - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub([1, 2], axis="columns") - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub( - ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), - ... axis="index", - ... ) - angles degrees - circle -1 359 - triangle 2 179 - rectangle 3 359 - - Multiply a dictionary by axis. - - >>> df.mul({"angles": 0, "degrees": 2}) - angles degrees - circle 0 720 - triangle 0 360 - rectangle 0 720 - - >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") - angles degrees - circle 0 0 - triangle 6 360 - rectangle 12 1080 - - Multiply a DataFrame of different shape with operator version. - - >>> other = pd.DataFrame( - ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] - ... ) - >>> other - angles - circle 0 - triangle 3 - rectangle 4 - - >>> df * other - angles degrees - circle 0 NaN - triangle 9 NaN - rectangle 16 NaN - - >>> df.mul(other, fill_value=0) - angles degrees - circle 0 0.0 - triangle 9 0.0 - rectangle 16 0.0 - - Divide by a MultiIndex by level. - - >>> df_multindex = pd.DataFrame( - ... { - ... "angles": [0, 3, 4, 4, 5, 6], - ... "degrees": [360, 180, 360, 360, 540, 720], - ... }, - ... index=[ - ... ["A", "A", "A", "B", "B", "B"], - ... [ - ... "circle", - ... "triangle", - ... "rectangle", - ... "square", - ... "pentagon", - ... "hexagon", - ... ], - ... ], - ... ) - >>> df_multindex - angles degrees - A circle 0 360 - triangle 3 180 - rectangle 4 360 - B square 4 360 - pentagon 5 540 - hexagon 6 720 - - >>> df.div(df_multindex, level=1, fill_value=0) - angles degrees - A circle NaN 1.0 - triangle 1.0 1.0 - rectangle 1.0 1.0 - B square 0.0 0.0 - pentagon 0.0 0.0 - hexagon 0.0 0.0 - - >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) - >>> df_pow.pow(2) - A B - 0 4 36 - 1 9 49 - 2 16 64 - 3 25 81 - """ - return self._flex_arith_method( - other, roperator.rmul, level=level, fill_value=fill_value, axis=axis - ) - - def truediv( - self, other, axis: Axis = "columns", level=None, fill_value=None - ) -> DataFrame: - """ - Get Floating division of dataframe and other, - element-wise (binary operator `truediv`). - - Equivalent to ``dataframe / other``, but with support to substitute a fill_value - for missing data in one of the inputs. With reverse version, `rtruediv`. - - Among flexible wrappers (`add`, `sub`, `mul`, `div`, - `floordiv`, `mod`, `pow`) to arithmetic operators: - `+`, `-`, `*`, `/`, `//`, `%`, `**`. - - Parameters - ---------- - other : scalar, sequence, Series, dict or DataFrame - Any single or multiple element data structure, or list-like object. - axis : {0 or 'index', 1 or 'columns'} - Whether to compare by the index (0 or 'index') or columns. - (1 or 'columns'). For Series input, axis to match Series index on. - level : int or label - Broadcast across a level, matching Index values on the - passed MultiIndex level. - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed for - successful DataFrame alignment, with this value before computation. - If data in both corresponding DataFrame locations is missing - the result will be missing. - - Returns - ------- - DataFrame - Result of the arithmetic operation. - - See Also - -------- - DataFrame.add : Add DataFrames. - DataFrame.sub : Subtract DataFrames. - DataFrame.mul : Multiply DataFrames. - DataFrame.div : Divide DataFrames (float division). - DataFrame.truediv : Divide DataFrames (float division). - DataFrame.floordiv : Divide DataFrames (integer division). - DataFrame.mod : Calculate modulo (remainder after division). - DataFrame.pow : Calculate exponential power. - - Notes - ----- - Mismatched indices will be unioned together. - - Examples - -------- - >>> df = pd.DataFrame( - ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, - ... index=["circle", "triangle", "rectangle"], - ... ) - >>> df - angles degrees - circle 0 360 - triangle 3 180 - rectangle 4 360 - - Add a scalar with operator version which return the same - results. - - >>> df + 1 - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - >>> df.add(1) - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - Divide by constant with reverse version. - - >>> df.div(10) - angles degrees - circle 0.0 36.0 - triangle 0.3 18.0 - rectangle 0.4 36.0 - - >>> df.rdiv(10) - angles degrees - circle inf 0.027778 - triangle 3.333333 0.055556 - rectangle 2.500000 0.027778 - - Subtract a list and Series by axis with operator version. - - >>> df - [1, 2] - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub([1, 2], axis="columns") - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub( - ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), - ... axis="index", - ... ) - angles degrees - circle -1 359 - triangle 2 179 - rectangle 3 359 - - Multiply a dictionary by axis. - - >>> df.mul({"angles": 0, "degrees": 2}) - angles degrees - circle 0 720 - triangle 0 360 - rectangle 0 720 + return self._flex_arith_method( + other, operator.mul, level=level, fill_value=fill_value, axis=axis + ) - >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") - angles degrees - circle 0 0 - triangle 6 360 - rectangle 12 1080 + multiply = mul - Multiply a DataFrame of different shape with operator version. + @Appender(ops.make_flex_doc("rmul", "dataframe")) + def rmul( + self, other, axis: Axis = "columns", level=None, fill_value=None + ) -> DataFrame: + return self._flex_arith_method( + other, roperator.rmul, level=level, fill_value=fill_value, axis=axis + ) - >>> other = pd.DataFrame( - ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] - ... ) - >>> other - angles - circle 0 - triangle 3 - rectangle 4 - - >>> df * other - angles degrees - circle 0 NaN - triangle 9 NaN - rectangle 16 NaN - - >>> df.mul(other, fill_value=0) - angles degrees - circle 0 0.0 - triangle 9 0.0 - rectangle 16 0.0 - - Divide by a MultiIndex by level. - - >>> df_multindex = pd.DataFrame( - ... { - ... "angles": [0, 3, 4, 4, 5, 6], - ... "degrees": [360, 180, 360, 360, 540, 720], - ... }, - ... index=[ - ... ["A", "A", "A", "B", "B", "B"], - ... [ - ... "circle", - ... "triangle", - ... "rectangle", - ... "square", - ... "pentagon", - ... "hexagon", - ... ], - ... ], - ... ) - >>> df_multindex - angles degrees - A circle 0 360 - triangle 3 180 - rectangle 4 360 - B square 4 360 - pentagon 5 540 - hexagon 6 720 - - >>> df.div(df_multindex, level=1, fill_value=0) - angles degrees - A circle NaN 1.0 - triangle 1.0 1.0 - rectangle 1.0 1.0 - B square 0.0 0.0 - pentagon 0.0 0.0 - hexagon 0.0 0.0 - - >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) - >>> df_pow.pow(2) - A B - 0 4 36 - 1 9 49 - 2 16 64 - 3 25 81 - """ + @Appender(ops.make_flex_doc("truediv", "dataframe")) + def truediv( + self, other, axis: Axis = "columns", level=None, fill_value=None + ) -> DataFrame: return self._flex_arith_method( other, operator.truediv, level=level, fill_value=fill_value, axis=axis ) @@ -11866,1406 +8830,60 @@ def truediv( div = truediv divide = truediv + @Appender(ops.make_flex_doc("rtruediv", "dataframe")) def rtruediv( self, other, axis: Axis = "columns", level=None, fill_value=None ) -> DataFrame: - """ - Get Floating division of dataframe and other, - element-wise (binary operator `rtruediv`). - - Equivalent to ``other / dataframe``, but with support to substitute a fill_value - for missing data in one of the inputs. With reverse version, `truediv`. - - Among flexible wrappers (`add`, `sub`, `mul`, `div`, - `floordiv`, `mod`, `pow`) to arithmetic operators: - `+`, `-`, `*`, `/`, `//`, `%`, `**`. - - Parameters - ---------- - other : scalar, sequence, Series, dict or DataFrame - Any single or multiple element data structure, or list-like object. - axis : {0 or 'index', 1 or 'columns'} - Whether to compare by the index (0 or 'index') or columns. - (1 or 'columns'). For Series input, axis to match Series index on. - level : int or label - Broadcast across a level, matching Index values on the - passed MultiIndex level. - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed for - successful DataFrame alignment, with this value before computation. - If data in both corresponding DataFrame locations is missing - the result will be missing. - - Returns - ------- - DataFrame - Result of the arithmetic operation. - - See Also - -------- - DataFrame.add : Add DataFrames. - DataFrame.sub : Subtract DataFrames. - DataFrame.mul : Multiply DataFrames. - DataFrame.div : Divide DataFrames (float division). - DataFrame.truediv : Divide DataFrames (float division). - DataFrame.floordiv : Divide DataFrames (integer division). - DataFrame.mod : Calculate modulo (remainder after division). - DataFrame.pow : Calculate exponential power. - - Notes - ----- - Mismatched indices will be unioned together. - - Examples - -------- - >>> df = pd.DataFrame( - ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, - ... index=["circle", "triangle", "rectangle"], - ... ) - >>> df - angles degrees - circle 0 360 - triangle 3 180 - rectangle 4 360 - - Add a scalar with operator version which return the same - results. - - >>> df + 1 - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - >>> df.add(1) - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - Divide by constant with reverse version. - - >>> df.div(10) - angles degrees - circle 0.0 36.0 - triangle 0.3 18.0 - rectangle 0.4 36.0 - - >>> df.rdiv(10) - angles degrees - circle inf 0.027778 - triangle 3.333333 0.055556 - rectangle 2.500000 0.027778 - - Subtract a list and Series by axis with operator version. - - >>> df - [1, 2] - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub([1, 2], axis="columns") - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub( - ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), - ... axis="index", - ... ) - angles degrees - circle -1 359 - triangle 2 179 - rectangle 3 359 - - Multiply a dictionary by axis. - - >>> df.mul({"angles": 0, "degrees": 2}) - angles degrees - circle 0 720 - triangle 0 360 - rectangle 0 720 - - >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") - angles degrees - circle 0 0 - triangle 6 360 - rectangle 12 1080 - - Multiply a DataFrame of different shape with operator version. - - >>> other = pd.DataFrame( - ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] - ... ) - >>> other - angles - circle 0 - triangle 3 - rectangle 4 - - >>> df * other - angles degrees - circle 0 NaN - triangle 9 NaN - rectangle 16 NaN - - >>> df.mul(other, fill_value=0) - angles degrees - circle 0 0.0 - triangle 9 0.0 - rectangle 16 0.0 - - Divide by a MultiIndex by level. - - >>> df_multindex = pd.DataFrame( - ... { - ... "angles": [0, 3, 4, 4, 5, 6], - ... "degrees": [360, 180, 360, 360, 540, 720], - ... }, - ... index=[ - ... ["A", "A", "A", "B", "B", "B"], - ... [ - ... "circle", - ... "triangle", - ... "rectangle", - ... "square", - ... "pentagon", - ... "hexagon", - ... ], - ... ], - ... ) - >>> df_multindex - angles degrees - A circle 0 360 - triangle 3 180 - rectangle 4 360 - B square 4 360 - pentagon 5 540 - hexagon 6 720 - - >>> df.div(df_multindex, level=1, fill_value=0) - angles degrees - A circle NaN 1.0 - triangle 1.0 1.0 - rectangle 1.0 1.0 - B square 0.0 0.0 - pentagon 0.0 0.0 - hexagon 0.0 0.0 - - >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) - >>> df_pow.pow(2) - A B - 0 4 36 - 1 9 49 - 2 16 64 - 3 25 81 - """ return self._flex_arith_method( other, roperator.rtruediv, level=level, fill_value=fill_value, axis=axis ) rdiv = rtruediv + @Appender(ops.make_flex_doc("floordiv", "dataframe")) def floordiv( self, other, axis: Axis = "columns", level=None, fill_value=None ) -> DataFrame: - """ - Get Integer division of dataframe and other, - element-wise (binary operator `floordiv`). - - Equivalent to ``dataframe // other``, - but with support to substitute a fill_value - for missing data in one of the inputs. With reverse version, `rfloordiv`. - - Among flexible wrappers (`add`, `sub`, `mul`, `div`, - `floordiv`, `mod`, `pow`) to arithmetic operators: - `+`, `-`, `*`, `/`, `//`, `%`, `**`. - - Parameters - ---------- - other : scalar, sequence, Series, dict or DataFrame - Any single or multiple element data structure, or list-like object. - axis : {0 or 'index', 1 or 'columns'} - Whether to compare by the index (0 or 'index') or columns. - (1 or 'columns'). For Series input, axis to match Series index on. - level : int or label - Broadcast across a level, matching Index values on the - passed MultiIndex level. - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed for - successful DataFrame alignment, with this value before computation. - If data in both corresponding DataFrame locations is missing - the result will be missing. - - Returns - ------- - DataFrame - Result of the arithmetic operation. - - See Also - -------- - DataFrame.add : Add DataFrames. - DataFrame.sub : Subtract DataFrames. - DataFrame.mul : Multiply DataFrames. - DataFrame.div : Divide DataFrames (float division). - DataFrame.truediv : Divide DataFrames (float division). - DataFrame.floordiv : Divide DataFrames (integer division). - DataFrame.mod : Calculate modulo (remainder after division). - DataFrame.pow : Calculate exponential power. - - Notes - ----- - Mismatched indices will be unioned together. - - Examples - -------- - >>> df = pd.DataFrame( - ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, - ... index=["circle", "triangle", "rectangle"], - ... ) - >>> df - angles degrees - circle 0 360 - triangle 3 180 - rectangle 4 360 - - Add a scalar with operator version which return the same - results. - - >>> df + 1 - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - >>> df.add(1) - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - Divide by constant with reverse version. - - >>> df.div(10) - angles degrees - circle 0.0 36.0 - triangle 0.3 18.0 - rectangle 0.4 36.0 - - >>> df.rdiv(10) - angles degrees - circle inf 0.027778 - triangle 3.333333 0.055556 - rectangle 2.500000 0.027778 - - Subtract a list and Series by axis with operator version. - - >>> df - [1, 2] - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub([1, 2], axis="columns") - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub( - ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), - ... axis="index", - ... ) - angles degrees - circle -1 359 - triangle 2 179 - rectangle 3 359 - - Multiply a dictionary by axis. - - >>> df.mul({"angles": 0, "degrees": 2}) - angles degrees - circle 0 720 - triangle 0 360 - rectangle 0 720 - - >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") - angles degrees - circle 0 0 - triangle 6 360 - rectangle 12 1080 - - Multiply a DataFrame of different shape with operator version. - - >>> other = pd.DataFrame( - ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] - ... ) - >>> other - angles - circle 0 - triangle 3 - rectangle 4 - - >>> df * other - angles degrees - circle 0 NaN - triangle 9 NaN - rectangle 16 NaN - - >>> df.mul(other, fill_value=0) - angles degrees - circle 0 0.0 - triangle 9 0.0 - rectangle 16 0.0 - - Divide by a MultiIndex by level. - - >>> df_multindex = pd.DataFrame( - ... { - ... "angles": [0, 3, 4, 4, 5, 6], - ... "degrees": [360, 180, 360, 360, 540, 720], - ... }, - ... index=[ - ... ["A", "A", "A", "B", "B", "B"], - ... [ - ... "circle", - ... "triangle", - ... "rectangle", - ... "square", - ... "pentagon", - ... "hexagon", - ... ], - ... ], - ... ) - >>> df_multindex - angles degrees - A circle 0 360 - triangle 3 180 - rectangle 4 360 - B square 4 360 - pentagon 5 540 - hexagon 6 720 - - >>> df.div(df_multindex, level=1, fill_value=0) - angles degrees - A circle NaN 1.0 - triangle 1.0 1.0 - rectangle 1.0 1.0 - B square 0.0 0.0 - pentagon 0.0 0.0 - hexagon 0.0 0.0 - - >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) - >>> df_pow.pow(2) - A B - 0 4 36 - 1 9 49 - 2 16 64 - 3 25 81 - """ return self._flex_arith_method( other, operator.floordiv, level=level, fill_value=fill_value, axis=axis ) + @Appender(ops.make_flex_doc("rfloordiv", "dataframe")) def rfloordiv( self, other, axis: Axis = "columns", level=None, fill_value=None ) -> DataFrame: - """ - Get Integer division of dataframe and other, - element-wise (binary operator `rfloordiv`). - - Equivalent to ``other // dataframe``, - but with support to substitute a fill_value - for missing data in one of the inputs. With reverse version, `floordiv`. - - Among flexible wrappers (`add`, `sub`, `mul`, `div`, - `floordiv`, `mod`, `pow`) to arithmetic operators: - `+`, `-`, `*`, `/`, `//`, `%`, `**`. - - Parameters - ---------- - other : scalar, sequence, Series, dict or DataFrame - Any single or multiple element data structure, or list-like object. - axis : {0 or 'index', 1 or 'columns'} - Whether to compare by the index (0 or 'index') or columns. - (1 or 'columns'). For Series input, axis to match Series index on. - level : int or label - Broadcast across a level, matching Index values on the - passed MultiIndex level. - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed for - successful DataFrame alignment, with this value before computation. - If data in both corresponding DataFrame locations is missing - the result will be missing. - - Returns - ------- - DataFrame - Result of the arithmetic operation. - - See Also - -------- - DataFrame.add : Add DataFrames. - DataFrame.sub : Subtract DataFrames. - DataFrame.mul : Multiply DataFrames. - DataFrame.div : Divide DataFrames (float division). - DataFrame.truediv : Divide DataFrames (float division). - DataFrame.floordiv : Divide DataFrames (integer division). - DataFrame.mod : Calculate modulo (remainder after division). - DataFrame.pow : Calculate exponential power. - - Notes - ----- - Mismatched indices will be unioned together. - - Examples - -------- - >>> df = pd.DataFrame( - ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, - ... index=["circle", "triangle", "rectangle"], - ... ) - >>> df - angles degrees - circle 0 360 - triangle 3 180 - rectangle 4 360 - - Add a scalar with operator version which return the same - results. - - >>> df + 1 - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - >>> df.add(1) - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - Divide by constant with reverse version. - - >>> df.div(10) - angles degrees - circle 0.0 36.0 - triangle 0.3 18.0 - rectangle 0.4 36.0 - - >>> df.rdiv(10) - angles degrees - circle inf 0.027778 - triangle 3.333333 0.055556 - rectangle 2.500000 0.027778 - - Subtract a list and Series by axis with operator version. - - >>> df - [1, 2] - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub([1, 2], axis="columns") - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub( - ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), - ... axis="index", - ... ) - angles degrees - circle -1 359 - triangle 2 179 - rectangle 3 359 - - Multiply a dictionary by axis. - - >>> df.mul({"angles": 0, "degrees": 2}) - angles degrees - circle 0 720 - triangle 0 360 - rectangle 0 720 - - >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") - angles degrees - circle 0 0 - triangle 6 360 - rectangle 12 1080 - - Multiply a DataFrame of different shape with operator version. - - >>> other = pd.DataFrame( - ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] - ... ) - >>> other - angles - circle 0 - triangle 3 - rectangle 4 - - >>> df * other - angles degrees - circle 0 NaN - triangle 9 NaN - rectangle 16 NaN - - >>> df.mul(other, fill_value=0) - angles degrees - circle 0 0.0 - triangle 9 0.0 - rectangle 16 0.0 - - Divide by a MultiIndex by level. - - >>> df_multindex = pd.DataFrame( - ... { - ... "angles": [0, 3, 4, 4, 5, 6], - ... "degrees": [360, 180, 360, 360, 540, 720], - ... }, - ... index=[ - ... ["A", "A", "A", "B", "B", "B"], - ... [ - ... "circle", - ... "triangle", - ... "rectangle", - ... "square", - ... "pentagon", - ... "hexagon", - ... ], - ... ], - ... ) - >>> df_multindex - angles degrees - A circle 0 360 - triangle 3 180 - rectangle 4 360 - B square 4 360 - pentagon 5 540 - hexagon 6 720 - - >>> df.div(df_multindex, level=1, fill_value=0) - angles degrees - A circle NaN 1.0 - triangle 1.0 1.0 - rectangle 1.0 1.0 - B square 0.0 0.0 - pentagon 0.0 0.0 - hexagon 0.0 0.0 - - >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) - >>> df_pow.pow(2) - A B - 0 4 36 - 1 9 49 - 2 16 64 - 3 25 81 - """ return self._flex_arith_method( other, roperator.rfloordiv, level=level, fill_value=fill_value, axis=axis ) + @Appender(ops.make_flex_doc("mod", "dataframe")) def mod( self, other, axis: Axis = "columns", level=None, fill_value=None ) -> DataFrame: - """ - Get Modulo of dataframe and other, element-wise (binary operator `mod`). - - Equivalent to ``dataframe % other``, but with support to substitute a fill_value - for missing data in one of the inputs. With reverse version, `rmod`. - - Among flexible wrappers (`add`, `sub`, `mul`, `div`, - `floordiv`, `mod`, `pow`) to arithmetic operators: - `+`, `-`, `*`, `/`, `//`, `%`, `**`. - - Parameters - ---------- - other : scalar, sequence, Series, dict or DataFrame - Any single or multiple element data structure, or list-like object. - axis : {0 or 'index', 1 or 'columns'} - Whether to compare by the index (0 or 'index') or columns. - (1 or 'columns'). For Series input, axis to match Series index on. - level : int or label - Broadcast across a level, matching Index values on the - passed MultiIndex level. - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed for - successful DataFrame alignment, with this value before computation. - If data in both corresponding DataFrame locations is missing - the result will be missing. - - Returns - ------- - DataFrame - Result of the arithmetic operation. - - See Also - -------- - DataFrame.add : Add DataFrames. - DataFrame.sub : Subtract DataFrames. - DataFrame.mul : Multiply DataFrames. - DataFrame.div : Divide DataFrames (float division). - DataFrame.truediv : Divide DataFrames (float division). - DataFrame.floordiv : Divide DataFrames (integer division). - DataFrame.mod : Calculate modulo (remainder after division). - DataFrame.pow : Calculate exponential power. - - Notes - ----- - Mismatched indices will be unioned together. - - Examples - -------- - >>> df = pd.DataFrame( - ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, - ... index=["circle", "triangle", "rectangle"], - ... ) - >>> df - angles degrees - circle 0 360 - triangle 3 180 - rectangle 4 360 - - Add a scalar with operator version which return the same - results. - - >>> df + 1 - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - >>> df.add(1) - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - Divide by constant with reverse version. - - >>> df.div(10) - angles degrees - circle 0.0 36.0 - triangle 0.3 18.0 - rectangle 0.4 36.0 - - >>> df.rdiv(10) - angles degrees - circle inf 0.027778 - triangle 3.333333 0.055556 - rectangle 2.500000 0.027778 - - Subtract a list and Series by axis with operator version. - - >>> df - [1, 2] - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub([1, 2], axis="columns") - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub( - ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), - ... axis="index", - ... ) - angles degrees - circle -1 359 - triangle 2 179 - rectangle 3 359 - - Multiply a dictionary by axis. - - >>> df.mul({"angles": 0, "degrees": 2}) - angles degrees - circle 0 720 - triangle 0 360 - rectangle 0 720 - - >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") - angles degrees - circle 0 0 - triangle 6 360 - rectangle 12 1080 - - Multiply a DataFrame of different shape with operator version. - - >>> other = pd.DataFrame( - ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] - ... ) - >>> other - angles - circle 0 - triangle 3 - rectangle 4 - - >>> df * other - angles degrees - circle 0 NaN - triangle 9 NaN - rectangle 16 NaN - - >>> df.mul(other, fill_value=0) - angles degrees - circle 0 0.0 - triangle 9 0.0 - rectangle 16 0.0 - - Divide by a MultiIndex by level. - - >>> df_multindex = pd.DataFrame( - ... { - ... "angles": [0, 3, 4, 4, 5, 6], - ... "degrees": [360, 180, 360, 360, 540, 720], - ... }, - ... index=[ - ... ["A", "A", "A", "B", "B", "B"], - ... [ - ... "circle", - ... "triangle", - ... "rectangle", - ... "square", - ... "pentagon", - ... "hexagon", - ... ], - ... ], - ... ) - >>> df_multindex - angles degrees - A circle 0 360 - triangle 3 180 - rectangle 4 360 - B square 4 360 - pentagon 5 540 - hexagon 6 720 - - >>> df.div(df_multindex, level=1, fill_value=0) - angles degrees - A circle NaN 1.0 - triangle 1.0 1.0 - rectangle 1.0 1.0 - B square 0.0 0.0 - pentagon 0.0 0.0 - hexagon 0.0 0.0 - - >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) - >>> df_pow.pow(2) - A B - 0 4 36 - 1 9 49 - 2 16 64 - 3 25 81 - """ return self._flex_arith_method( other, operator.mod, level=level, fill_value=fill_value, axis=axis ) - def rmod( - self, other, axis: Axis = "columns", level=None, fill_value=None - ) -> DataFrame: - """ - Get Modulo of dataframe and other, element-wise (binary operator `rmod`). - - Equivalent to ``other % dataframe``, but with support to substitute a fill_value - for missing data in one of the inputs. With reverse version, `mod`. - - Among flexible wrappers (`add`, `sub`, `mul`, `div`, - `floordiv`, `mod`, `pow`) to arithmetic operators: - `+`, `-`, `*`, `/`, `//`, `%`, `**`. - - Parameters - ---------- - other : scalar, sequence, Series, dict or DataFrame - Any single or multiple element data structure, or list-like object. - axis : {0 or 'index', 1 or 'columns'} - Whether to compare by the index (0 or 'index') or columns. - (1 or 'columns'). For Series input, axis to match Series index on. - level : int or label - Broadcast across a level, matching Index values on the - passed MultiIndex level. - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed for - successful DataFrame alignment, with this value before computation. - If data in both corresponding DataFrame locations is missing - the result will be missing. - - Returns - ------- - DataFrame - Result of the arithmetic operation. - - See Also - -------- - DataFrame.add : Add DataFrames. - DataFrame.sub : Subtract DataFrames. - DataFrame.mul : Multiply DataFrames. - DataFrame.div : Divide DataFrames (float division). - DataFrame.truediv : Divide DataFrames (float division). - DataFrame.floordiv : Divide DataFrames (integer division). - DataFrame.mod : Calculate modulo (remainder after division). - DataFrame.pow : Calculate exponential power. - - Notes - ----- - Mismatched indices will be unioned together. - - Examples - -------- - >>> df = pd.DataFrame( - ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, - ... index=["circle", "triangle", "rectangle"], - ... ) - >>> df - angles degrees - circle 0 360 - triangle 3 180 - rectangle 4 360 - - Add a scalar with operator version which return the same - results. - - >>> df + 1 - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - >>> df.add(1) - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - Divide by constant with reverse version. - - >>> df.div(10) - angles degrees - circle 0.0 36.0 - triangle 0.3 18.0 - rectangle 0.4 36.0 - - >>> df.rdiv(10) - angles degrees - circle inf 0.027778 - triangle 3.333333 0.055556 - rectangle 2.500000 0.027778 - - Subtract a list and Series by axis with operator version. - - >>> df - [1, 2] - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub([1, 2], axis="columns") - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub( - ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), - ... axis="index", - ... ) - angles degrees - circle -1 359 - triangle 2 179 - rectangle 3 359 - - Multiply a dictionary by axis. - - >>> df.mul({"angles": 0, "degrees": 2}) - angles degrees - circle 0 720 - triangle 0 360 - rectangle 0 720 - - >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") - angles degrees - circle 0 0 - triangle 6 360 - rectangle 12 1080 - - Multiply a DataFrame of different shape with operator version. - - >>> other = pd.DataFrame( - ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] - ... ) - >>> other - angles - circle 0 - triangle 3 - rectangle 4 - - >>> df * other - angles degrees - circle 0 NaN - triangle 9 NaN - rectangle 16 NaN - - >>> df.mul(other, fill_value=0) - angles degrees - circle 0 0.0 - triangle 9 0.0 - rectangle 16 0.0 - - Divide by a MultiIndex by level. - - >>> df_multindex = pd.DataFrame( - ... { - ... "angles": [0, 3, 4, 4, 5, 6], - ... "degrees": [360, 180, 360, 360, 540, 720], - ... }, - ... index=[ - ... ["A", "A", "A", "B", "B", "B"], - ... [ - ... "circle", - ... "triangle", - ... "rectangle", - ... "square", - ... "pentagon", - ... "hexagon", - ... ], - ... ], - ... ) - >>> df_multindex - angles degrees - A circle 0 360 - triangle 3 180 - rectangle 4 360 - B square 4 360 - pentagon 5 540 - hexagon 6 720 - - >>> df.div(df_multindex, level=1, fill_value=0) - angles degrees - A circle NaN 1.0 - triangle 1.0 1.0 - rectangle 1.0 1.0 - B square 0.0 0.0 - pentagon 0.0 0.0 - hexagon 0.0 0.0 - - >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) - >>> df_pow.pow(2) - A B - 0 4 36 - 1 9 49 - 2 16 64 - 3 25 81 - """ - return self._flex_arith_method( - other, roperator.rmod, level=level, fill_value=fill_value, axis=axis - ) - - def pow( - self, other, axis: Axis = "columns", level=None, fill_value=None - ) -> DataFrame: - """ - Get Exponential power of dataframe and other, - element-wise (binary operator `pow`). - - Equivalent to ``dataframe ** other``, - but with support to substitute a fill_value - for missing data in one of the inputs. With reverse version, `rpow`. - - Among flexible wrappers (`add`, `sub`, `mul`, `div`, - `floordiv`, `mod`, `pow`) to arithmetic operators: - `+`, `-`, `*`, `/`, `//`, `%`, `**`. - - Parameters - ---------- - other : scalar, sequence, Series, dict or DataFrame - Any single or multiple element data structure, or list-like object. - axis : {0 or 'index', 1 or 'columns'} - Whether to compare by the index (0 or 'index') or columns. - (1 or 'columns'). For Series input, axis to match Series index on. - level : int or label - Broadcast across a level, matching Index values on the - passed MultiIndex level. - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed for - successful DataFrame alignment, with this value before computation. - If data in both corresponding DataFrame locations is missing - the result will be missing. - - Returns - ------- - DataFrame - Result of the arithmetic operation. - - See Also - -------- - DataFrame.add : Add DataFrames. - DataFrame.sub : Subtract DataFrames. - DataFrame.mul : Multiply DataFrames. - DataFrame.div : Divide DataFrames (float division). - DataFrame.truediv : Divide DataFrames (float division). - DataFrame.floordiv : Divide DataFrames (integer division). - DataFrame.mod : Calculate modulo (remainder after division). - DataFrame.pow : Calculate exponential power. - - Notes - ----- - Mismatched indices will be unioned together. - - Examples - -------- - >>> df = pd.DataFrame( - ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, - ... index=["circle", "triangle", "rectangle"], - ... ) - >>> df - angles degrees - circle 0 360 - triangle 3 180 - rectangle 4 360 - - Add a scalar with operator version which return the same - results. - - >>> df + 1 - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - >>> df.add(1) - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - Divide by constant with reverse version. - - >>> df.div(10) - angles degrees - circle 0.0 36.0 - triangle 0.3 18.0 - rectangle 0.4 36.0 - - >>> df.rdiv(10) - angles degrees - circle inf 0.027778 - triangle 3.333333 0.055556 - rectangle 2.500000 0.027778 - - Subtract a list and Series by axis with operator version. - - >>> df - [1, 2] - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub([1, 2], axis="columns") - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub( - ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), - ... axis="index", - ... ) - angles degrees - circle -1 359 - triangle 2 179 - rectangle 3 359 - - Multiply a dictionary by axis. - - >>> df.mul({"angles": 0, "degrees": 2}) - angles degrees - circle 0 720 - triangle 0 360 - rectangle 0 720 - - >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") - angles degrees - circle 0 0 - triangle 6 360 - rectangle 12 1080 - - Multiply a DataFrame of different shape with operator version. + @Appender(ops.make_flex_doc("rmod", "dataframe")) + def rmod( + self, other, axis: Axis = "columns", level=None, fill_value=None + ) -> DataFrame: + return self._flex_arith_method( + other, roperator.rmod, level=level, fill_value=fill_value, axis=axis + ) - >>> other = pd.DataFrame( - ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] - ... ) - >>> other - angles - circle 0 - triangle 3 - rectangle 4 - - >>> df * other - angles degrees - circle 0 NaN - triangle 9 NaN - rectangle 16 NaN - - >>> df.mul(other, fill_value=0) - angles degrees - circle 0 0.0 - triangle 9 0.0 - rectangle 16 0.0 - - Divide by a MultiIndex by level. - - >>> df_multindex = pd.DataFrame( - ... { - ... "angles": [0, 3, 4, 4, 5, 6], - ... "degrees": [360, 180, 360, 360, 540, 720], - ... }, - ... index=[ - ... ["A", "A", "A", "B", "B", "B"], - ... [ - ... "circle", - ... "triangle", - ... "rectangle", - ... "square", - ... "pentagon", - ... "hexagon", - ... ], - ... ], - ... ) - >>> df_multindex - angles degrees - A circle 0 360 - triangle 3 180 - rectangle 4 360 - B square 4 360 - pentagon 5 540 - hexagon 6 720 - - >>> df.div(df_multindex, level=1, fill_value=0) - angles degrees - A circle NaN 1.0 - triangle 1.0 1.0 - rectangle 1.0 1.0 - B square 0.0 0.0 - pentagon 0.0 0.0 - hexagon 0.0 0.0 - - >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) - >>> df_pow.pow(2) - A B - 0 4 36 - 1 9 49 - 2 16 64 - 3 25 81 - """ + @Appender(ops.make_flex_doc("pow", "dataframe")) + def pow( + self, other, axis: Axis = "columns", level=None, fill_value=None + ) -> DataFrame: return self._flex_arith_method( other, operator.pow, level=level, fill_value=fill_value, axis=axis ) + @Appender(ops.make_flex_doc("rpow", "dataframe")) def rpow( self, other, axis: Axis = "columns", level=None, fill_value=None ) -> DataFrame: - """ - Get Exponential power of dataframe and other, - element-wise (binary operator `rpow`). - - Equivalent to ``other ** dataframe``, - but with support to substitute a fill_value - for missing data in one of the inputs. With reverse version, `pow`. - - Among flexible wrappers (`add`, `sub`, `mul`, `div`, - `floordiv`, `mod`, `pow`) to arithmetic operators: - `+`, `-`, `*`, `/`, `//`, `%`, `**`. - - Parameters - ---------- - other : scalar, sequence, Series, dict or DataFrame - Any single or multiple element data structure, or list-like object. - axis : {0 or 'index', 1 or 'columns'} - Whether to compare by the index (0 or 'index') or columns. - (1 or 'columns'). For Series input, axis to match Series index on. - level : int or label - Broadcast across a level, matching Index values on the - passed MultiIndex level. - fill_value : float or None, default None - Fill existing missing (NaN) values, and any new element needed for - successful DataFrame alignment, with this value before computation. - If data in both corresponding DataFrame locations is missing - the result will be missing. - - Returns - ------- - DataFrame - Result of the arithmetic operation. - - See Also - -------- - DataFrame.add : Add DataFrames. - DataFrame.sub : Subtract DataFrames. - DataFrame.mul : Multiply DataFrames. - DataFrame.div : Divide DataFrames (float division). - DataFrame.truediv : Divide DataFrames (float division). - DataFrame.floordiv : Divide DataFrames (integer division). - DataFrame.mod : Calculate modulo (remainder after division). - DataFrame.pow : Calculate exponential power. - - Notes - ----- - Mismatched indices will be unioned together. - - Examples - -------- - >>> df = pd.DataFrame( - ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, - ... index=["circle", "triangle", "rectangle"], - ... ) - >>> df - angles degrees - circle 0 360 - triangle 3 180 - rectangle 4 360 - - Add a scalar with operator version which return the same - results. - - >>> df + 1 - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - >>> df.add(1) - angles degrees - circle 1 361 - triangle 4 181 - rectangle 5 361 - - Divide by constant with reverse version. - - >>> df.div(10) - angles degrees - circle 0.0 36.0 - triangle 0.3 18.0 - rectangle 0.4 36.0 - - >>> df.rdiv(10) - angles degrees - circle inf 0.027778 - triangle 3.333333 0.055556 - rectangle 2.500000 0.027778 - - Subtract a list and Series by axis with operator version. - - >>> df - [1, 2] - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub([1, 2], axis="columns") - angles degrees - circle -1 358 - triangle 2 178 - rectangle 3 358 - - >>> df.sub( - ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), - ... axis="index", - ... ) - angles degrees - circle -1 359 - triangle 2 179 - rectangle 3 359 - - Multiply a dictionary by axis. - - >>> df.mul({"angles": 0, "degrees": 2}) - angles degrees - circle 0 720 - triangle 0 360 - rectangle 0 720 - - >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") - angles degrees - circle 0 0 - triangle 6 360 - rectangle 12 1080 - - Multiply a DataFrame of different shape with operator version. - - >>> other = pd.DataFrame( - ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] - ... ) - >>> other - angles - circle 0 - triangle 3 - rectangle 4 - - >>> df * other - angles degrees - circle 0 NaN - triangle 9 NaN - rectangle 16 NaN - - >>> df.mul(other, fill_value=0) - angles degrees - circle 0 0.0 - triangle 9 0.0 - rectangle 16 0.0 - - Divide by a MultiIndex by level. - - >>> df_multindex = pd.DataFrame( - ... { - ... "angles": [0, 3, 4, 4, 5, 6], - ... "degrees": [360, 180, 360, 360, 540, 720], - ... }, - ... index=[ - ... ["A", "A", "A", "B", "B", "B"], - ... [ - ... "circle", - ... "triangle", - ... "rectangle", - ... "square", - ... "pentagon", - ... "hexagon", - ... ], - ... ], - ... ) - >>> df_multindex - angles degrees - A circle 0 360 - triangle 3 180 - rectangle 4 360 - B square 4 360 - pentagon 5 540 - hexagon 6 720 - - >>> df.div(df_multindex, level=1, fill_value=0) - angles degrees - A circle NaN 1.0 - triangle 1.0 1.0 - rectangle 1.0 1.0 - B square 0.0 0.0 - pentagon 0.0 0.0 - hexagon 0.0 0.0 - - >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) - >>> df_pow.pow(2) - A B - 0 4 36 - 1 9 49 - 2 16 64 - 3 25 81 - """ return self._flex_arith_method( other, roperator.rpow, level=level, fill_value=fill_value, axis=axis ) @@ -13273,41 +8891,10 @@ def rpow( # ---------------------------------------------------------------------- # Combination-Related - def compare( - self, - other: DataFrame, - align_axis: Axis = 1, - keep_shape: bool = False, - keep_equal: bool = False, - result_names: Suffixes = ("self", "other"), - ) -> DataFrame: - """ - Compare to another DataFrame and show the differences. - - Parameters - ---------- - other : DataFrame - Object to compare with. - - align_axis : {0 or 'index', 1 or 'columns'}, default 1 - Determine which axis to align the comparison on. - - * 0, or 'index' : Resulting differences are stacked vertically - with rows drawn alternately from self and other. - * 1, or 'columns' : Resulting differences are aligned horizontally - with columns drawn alternately from self and other. - - keep_shape : bool, default False - If true, all rows and columns are kept. - Otherwise, only the ones with different values are kept. - - keep_equal : bool, default False - If true, the result keeps values that are equal. - Otherwise, equal values are shown as NaNs. - - result_names : tuple, default ('self', 'other') - Set the dataframes names in the comparison. - + @doc( + _shared_docs["compare"], + dedent( + """ Returns ------- DataFrame @@ -13336,11 +8923,11 @@ def compare( Examples -------- >>> df = pd.DataFrame( - ... { + ... {{ ... "col1": ["a", "a", "b", "b", "a"], ... "col2": [1.0, 2.0, 3.0, np.nan, 5.0], - ... "col3": [1.0, 2.0, 3.0, 4.0, 5.0], - ... }, + ... "col3": [1.0, 2.0, 3.0, 4.0, 5.0] + ... }}, ... columns=["col1", "col2", "col3"], ... ) >>> df @@ -13352,8 +8939,8 @@ def compare( 4 a 5.0 5.0 >>> df2 = df.copy() - >>> df2.loc[0, "col1"] = "c" - >>> df2.loc[2, "col3"] = 4.0 + >>> df2.loc[0, 'col1'] = 'c' + >>> df2.loc[2, 'col3'] = 4.0 >>> df2 col1 col2 col3 0 c 1.0 1.0 @@ -13417,6 +9004,17 @@ def compare( 3 b b NaN NaN 4.0 4.0 4 a a 5.0 5.0 5.0 5.0 """ + ), + klass=_shared_doc_kwargs["klass"], + ) + def compare( + self, + other: DataFrame, + align_axis: Axis = 1, + keep_shape: bool = False, + keep_equal: bool = False, + result_names: Suffixes = ("self", "other"), + ) -> DataFrame: return super().compare( other=other, align_axis=align_axis, @@ -14055,185 +9653,47 @@ def groupby( b 12.3 123.0 NaN 12.3 33.0 - When using ``.apply()``, use ``group_keys`` to include or exclude the - group keys. The ``group_keys`` argument defaults to ``True`` (include). - - >>> df = pd.DataFrame( - ... { - ... "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"], - ... "Max Speed": [380.0, 370.0, 24.0, 26.0], - ... } - ... ) - >>> df.groupby("Animal", group_keys=True)[["Max Speed"]].apply(lambda x: x) - Max Speed - Animal - Falcon 0 380.0 - 1 370.0 - Parrot 2 24.0 - 3 26.0 - - >>> df.groupby("Animal", group_keys=False)[["Max Speed"]].apply(lambda x: x) - Max Speed - 0 380.0 - 1 370.0 - 2 24.0 - 3 26.0 - """ - from pandas.core.groupby.generic import DataFrameGroupBy - - if level is None and by is None: - raise TypeError("You have to supply one of 'by' and 'level'") - - return DataFrameGroupBy( - obj=self, - keys=by, - level=level, - as_index=as_index, - sort=sort, - group_keys=group_keys, - observed=observed, - dropna=dropna, - ) - - _shared_docs["pivot"] = """ - Return reshaped DataFrame organized by given index / column values. - - Reshape data (produce a "pivot" table) based on column values. Uses - unique values from specified `index` / `columns` to form axes of the - resulting DataFrame. This function does not support data - aggregation, multiple values will result in a MultiIndex in the - columns. See the :ref:`User Guide ` for more on reshaping. - - Parameters - ----------%s - columns : Hashable or a sequence of the previous - Column to use to make new frame's columns. - index : Hashable or a sequence of the previous, optional - Column to use to make new frame's index. If not given, uses existing index. - values : Hashable or a sequence of the previous, optional - Column(s) to use for populating new frame's values. If not - specified, all remaining columns will be used and the result will - have hierarchically indexed columns. - - Returns - ------- - DataFrame - Returns reshaped DataFrame. - - Raises - ------ - ValueError: - When there are any `index`, `columns` combinations with multiple - values. `DataFrame.pivot_table` when you need to aggregate. - - See Also - -------- - DataFrame.pivot_table : Generalization of pivot that can handle - duplicate values for one index/column pair. - DataFrame.unstack : Pivot based on the index values instead of a - column. - wide_to_long : Wide panel to long format. Less flexible but more - user-friendly than melt. - - Notes - ----- - For finer-tuned control, see hierarchical indexing documentation along - with the related stack/unstack methods. - - Reference :ref:`the user guide ` for more examples. - - Examples - -------- - >>> df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', - ... 'two'], - ... 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], - ... 'baz': [1, 2, 3, 4, 5, 6], - ... 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) - >>> df - foo bar baz zoo - 0 one A 1 x - 1 one B 2 y - 2 one C 3 z - 3 two A 4 q - 4 two B 5 w - 5 two C 6 t - - >>> df.pivot(index='foo', columns='bar', values='baz') - bar A B C - foo - one 1 2 3 - two 4 5 6 - - >>> df.pivot(index='foo', columns='bar')['baz'] - bar A B C - foo - one 1 2 3 - two 4 5 6 - - >>> df.pivot(index='foo', columns='bar', values=['baz', 'zoo']) - baz zoo - bar A B C A B C - foo - one 1 2 3 x y z - two 4 5 6 q w t - - You could also assign a list of column names or a list of index names. - - >>> df = pd.DataFrame({ - ... "lev1": [1, 1, 1, 2, 2, 2], - ... "lev2": [1, 1, 2, 1, 1, 2], - ... "lev3": [1, 2, 1, 2, 1, 2], - ... "lev4": [1, 2, 3, 4, 5, 6], - ... "values": [0, 1, 2, 3, 4, 5]}) - >>> df - lev1 lev2 lev3 lev4 values - 0 1 1 1 1 0 - 1 1 1 2 2 1 - 2 1 2 1 3 2 - 3 2 1 2 4 3 - 4 2 1 1 5 4 - 5 2 2 2 6 5 - - >>> df.pivot(index="lev1", columns=["lev2", "lev3"], values="values") - lev2 1 2 - lev3 1 2 1 2 - lev1 - 1 0.0 1.0 2.0 NaN - 2 4.0 3.0 NaN 5.0 - - >>> df.pivot(index=["lev1", "lev2"], columns=["lev3"], values="values") - lev3 1 2 - lev1 lev2 - 1 1 0.0 1.0 - 2 2.0 NaN - 2 1 4.0 3.0 - 2 NaN 5.0 - - A ValueError is raised if there are any duplicates. - - >>> df = pd.DataFrame({"foo": ['one', 'one', 'two', 'two'], - ... "bar": ['A', 'A', 'B', 'C'], - ... "baz": [1, 2, 3, 4]}) - >>> df - foo bar baz - 0 one A 1 - 1 one A 2 - 2 two B 3 - 3 two C 4 - - Notice that the first two rows are the same for our `index` - and `columns` arguments. + When using ``.apply()``, use ``group_keys`` to include or exclude the + group keys. The ``group_keys`` argument defaults to ``True`` (include). - >>> df.pivot(index='foo', columns='bar', values='baz') - Traceback (most recent call last): - ... - ValueError: Index contains duplicate entries, cannot reshape - """ + >>> df = pd.DataFrame( + ... { + ... "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"], + ... "Max Speed": [380.0, 370.0, 24.0, 26.0], + ... } + ... ) + >>> df.groupby("Animal", group_keys=True)[["Max Speed"]].apply(lambda x: x) + Max Speed + Animal + Falcon 0 380.0 + 1 370.0 + Parrot 2 24.0 + 3 26.0 - def pivot( - self, *, columns, index=lib.no_default, values=lib.no_default - ) -> DataFrame: + >>> df.groupby("Animal", group_keys=False)[["Max Speed"]].apply(lambda x: x) + Max Speed + 0 380.0 + 1 370.0 + 2 24.0 + 3 26.0 """ + from pandas.core.groupby.generic import DataFrameGroupBy + + if level is None and by is None: + raise TypeError("You have to supply one of 'by' and 'level'") + + return DataFrameGroupBy( + obj=self, + keys=by, + level=level, + as_index=as_index, + sort=sort, + group_keys=group_keys, + observed=observed, + dropna=dropna, + ) + + _shared_docs["pivot"] = """ Return reshaped DataFrame organized by given index / column values. Reshape data (produce a "pivot" table) based on column values. Uses @@ -14243,7 +9703,7 @@ def pivot( columns. See the :ref:`User Guide ` for more on reshaping. Parameters - ---------- + ----------%s columns : Hashable or a sequence of the previous Column to use to make new frame's columns. index : Hashable or a sequence of the previous, optional @@ -14282,14 +9742,11 @@ def pivot( Examples -------- - >>> df = pd.DataFrame( - ... { - ... "foo": ["one", "one", "one", "two", "two", "two"], - ... "bar": ["A", "B", "C", "A", "B", "C"], - ... "baz": [1, 2, 3, 4, 5, 6], - ... "zoo": ["x", "y", "z", "q", "w", "t"], - ... } - ... ) + >>> df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', + ... 'two'], + ... 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], + ... 'baz': [1, 2, 3, 4, 5, 6], + ... 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) >>> df foo bar baz zoo 0 one A 1 x @@ -14299,19 +9756,19 @@ def pivot( 4 two B 5 w 5 two C 6 t - >>> df.pivot(index="foo", columns="bar", values="baz") + >>> df.pivot(index='foo', columns='bar', values='baz') bar A B C foo one 1 2 3 two 4 5 6 - >>> df.pivot(index="foo", columns="bar")["baz"] + >>> df.pivot(index='foo', columns='bar')['baz'] bar A B C foo one 1 2 3 two 4 5 6 - >>> df.pivot(index="foo", columns="bar", values=["baz", "zoo"]) + >>> df.pivot(index='foo', columns='bar', values=['baz', 'zoo']) baz zoo bar A B C A B C foo @@ -14320,15 +9777,12 @@ def pivot( You could also assign a list of column names or a list of index names. - >>> df = pd.DataFrame( - ... { - ... "lev1": [1, 1, 1, 2, 2, 2], - ... "lev2": [1, 1, 2, 1, 1, 2], - ... "lev3": [1, 2, 1, 2, 1, 2], - ... "lev4": [1, 2, 3, 4, 5, 6], - ... "values": [0, 1, 2, 3, 4, 5], - ... } - ... ) + >>> df = pd.DataFrame({ + ... "lev1": [1, 1, 1, 2, 2, 2], + ... "lev2": [1, 1, 2, 1, 1, 2], + ... "lev3": [1, 2, 1, 2, 1, 2], + ... "lev4": [1, 2, 3, 4, 5, 6], + ... "values": [0, 1, 2, 3, 4, 5]}) >>> df lev1 lev2 lev3 lev4 values 0 1 1 1 1 0 @@ -14355,13 +9809,9 @@ def pivot( A ValueError is raised if there are any duplicates. - >>> df = pd.DataFrame( - ... { - ... "foo": ["one", "one", "two", "two"], - ... "bar": ["A", "A", "B", "C"], - ... "baz": [1, 2, 3, 4], - ... } - ... ) + >>> df = pd.DataFrame({"foo": ['one', 'one', 'two', 'two'], + ... "bar": ['A', 'A', 'B', 'C'], + ... "baz": [1, 2, 3, 4]}) >>> df foo bar baz 0 one A 1 @@ -14370,192 +9820,31 @@ def pivot( 3 two C 4 Notice that the first two rows are the same for our `index` - and `columns` arguments. - - >>> df.pivot(index="foo", columns="bar", values="baz") - Traceback (most recent call last): - ... - ValueError: Index contains duplicate entries, cannot reshape - """ - from pandas.core.reshape.pivot import pivot - - return pivot(self, index=index, columns=columns, values=values) - - _shared_docs["pivot_table"] = """ - Create a spreadsheet-style pivot table as a DataFrame. - - The levels in the pivot table will be stored in MultiIndex objects - (hierarchical indexes) on the index and columns of the result DataFrame. - - Parameters - ----------%s - values : list-like or scalar, optional - Column or columns to aggregate. - index : column, Grouper, array, or sequence of the previous - Keys to group by on the pivot table index. If a list is passed, - it can contain any of the other types (except list). If an array is - passed, it must be the same length as the data and will be used in - the same manner as column values. - columns : column, Grouper, array, or sequence of the previous - Keys to group by on the pivot table column. If a list is passed, - it can contain any of the other types (except list). If an array is - passed, it must be the same length as the data and will be used in - the same manner as column values. - aggfunc : function, list of functions, dict, default "mean" - If a list of functions is passed, the resulting pivot table will have - hierarchical columns whose top level are the function names - (inferred from the function objects themselves). - If a dict is passed, the key is column to aggregate and the value is - function or list of functions. If ``margin=True``, aggfunc will be - used to calculate the partial aggregates. - fill_value : scalar, default None - Value to replace missing values with (in the resulting pivot table, - after aggregation). - margins : bool, default False - If ``margins=True``, special ``All`` columns and rows - will be added with partial group aggregates across the categories - on the rows and columns. - dropna : bool, default True - Do not include columns whose entries are all NaN. If True, - - * rows with an NA value in any column will be omitted before computing - margins, - * index/column keys containing NA values will be dropped (see ``dropna`` - parameter in :meth:`DataFrame.groupby`). - - margins_name : str, default 'All' - Name of the row / column that will contain the totals - when margins is True. - observed : bool, default False - This only applies if any of the groupers are Categoricals. - If True: only show observed values for categorical groupers. - If False: show all values for categorical groupers. - - .. versionchanged:: 3.0.0 - - The default value is now ``True``. - - sort : bool, default True - Specifies if the result should be sorted. - - **kwargs : dict - Optional keyword arguments to pass to ``aggfunc``. - - Returns - ------- - DataFrame - An Excel style pivot table. - - See Also - -------- - DataFrame.pivot : Pivot without aggregation that can handle - non-numeric data. - DataFrame.melt: Unpivot a DataFrame from wide to long format, - optionally leaving identifiers set. - wide_to_long : Wide panel to long format. Less flexible but more - user-friendly than melt. - - Notes - ----- - Reference :ref:`the user guide ` for more examples. - - Examples - -------- - >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", - ... "bar", "bar", "bar", "bar"], - ... "B": ["one", "one", "one", "two", "two", - ... "one", "one", "two", "two"], - ... "C": ["small", "large", "large", "small", - ... "small", "large", "small", "small", - ... "large"], - ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], - ... "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]}) - >>> df - A B C D E - 0 foo one small 1 2 - 1 foo one large 2 4 - 2 foo one large 2 5 - 3 foo two small 3 5 - 4 foo two small 3 6 - 5 bar one large 4 6 - 6 bar one small 5 8 - 7 bar two small 6 9 - 8 bar two large 7 9 - - This first example aggregates values by taking the sum. - - >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], - ... columns=['C'], aggfunc="sum") - >>> table - C large small - A B - bar one 4.0 5.0 - two 7.0 6.0 - foo one 4.0 1.0 - two NaN 6.0 - - We can also fill missing values using the `fill_value` parameter. - - >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], - ... columns=['C'], aggfunc="sum", fill_value=0) - >>> table - C large small - A B - bar one 4 5 - two 7 6 - foo one 4 1 - two 0 6 - - The next example aggregates by taking the mean across multiple columns. - - >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], - ... aggfunc={'D': "mean", 'E': "mean"}) - >>> table - D E - A C - bar large 5.500000 7.500000 - small 5.500000 8.500000 - foo large 2.000000 4.500000 - small 2.333333 4.333333 - - We can also calculate multiple types of aggregations for any given - value column. - - >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], - ... aggfunc={'D': "mean", - ... 'E': ["min", "max", "mean"]}) - >>> table - D E - mean max mean min - A C - bar large 5.500000 9 7.500000 6 - small 5.500000 9 8.500000 8 - foo large 2.000000 5 4.500000 4 - small 2.333333 6 4.333333 2 - """ - - def pivot_table( - self, - values=None, - index=None, - columns=None, - aggfunc: AggFuncType = "mean", - fill_value=None, - margins: bool = False, - dropna: bool = True, - margins_name: Level = "All", - observed: bool = True, - sort: bool = True, - **kwargs, - ) -> DataFrame: + and `columns` arguments. + + >>> df.pivot(index='foo', columns='bar', values='baz') + Traceback (most recent call last): + ... + ValueError: Index contains duplicate entries, cannot reshape """ + + @Substitution("") + @Appender(_shared_docs["pivot"]) + def pivot( + self, *, columns, index=lib.no_default, values=lib.no_default + ) -> DataFrame: + from pandas.core.reshape.pivot import pivot + + return pivot(self, index=index, columns=columns, values=values) + + _shared_docs["pivot_table"] = """ Create a spreadsheet-style pivot table as a DataFrame. The levels in the pivot table will be stored in MultiIndex objects (hierarchical indexes) on the index and columns of the result DataFrame. Parameters - ---------- + ----------%s values : list-like or scalar, optional Column or columns to aggregate. index : column, Grouper, array, or sequence of the previous @@ -14628,45 +9917,15 @@ def pivot_table( Examples -------- - >>> df = pd.DataFrame( - ... { - ... "A": [ - ... "foo", - ... "foo", - ... "foo", - ... "foo", - ... "foo", - ... "bar", - ... "bar", - ... "bar", - ... "bar", - ... ], - ... "B": [ - ... "one", - ... "one", - ... "one", - ... "two", - ... "two", - ... "one", - ... "one", - ... "two", - ... "two", - ... ], - ... "C": [ - ... "small", - ... "large", - ... "large", - ... "small", - ... "small", - ... "large", - ... "small", - ... "small", - ... "large", - ... ], - ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], - ... "E": [2, 4, 5, 5, 6, 6, 8, 9, 9], - ... } - ... ) + >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", + ... "bar", "bar", "bar", "bar"], + ... "B": ["one", "one", "one", "two", "two", + ... "one", "one", "two", "two"], + ... "C": ["small", "large", "large", "small", + ... "small", "large", "small", "small", + ... "large"], + ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], + ... "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]}) >>> df A B C D E 0 foo one small 1 2 @@ -14681,9 +9940,8 @@ def pivot_table( This first example aggregates values by taking the sum. - >>> table = pd.pivot_table( - ... df, values="D", index=["A", "B"], columns=["C"], aggfunc="sum" - ... ) + >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], + ... columns=['C'], aggfunc="sum") >>> table C large small A B @@ -14694,14 +9952,8 @@ def pivot_table( We can also fill missing values using the `fill_value` parameter. - >>> table = pd.pivot_table( - ... df, - ... values="D", - ... index=["A", "B"], - ... columns=["C"], - ... aggfunc="sum", - ... fill_value=0, - ... ) + >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], + ... columns=['C'], aggfunc="sum", fill_value=0) >>> table C large small A B @@ -14712,12 +9964,8 @@ def pivot_table( The next example aggregates by taking the mean across multiple columns. - >>> table = pd.pivot_table( - ... df, - ... values=["D", "E"], - ... index=["A", "C"], - ... aggfunc={"D": "mean", "E": "mean"}, - ... ) + >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], + ... aggfunc={'D': "mean", 'E': "mean"}) >>> table D E A C @@ -14729,12 +9977,9 @@ def pivot_table( We can also calculate multiple types of aggregations for any given value column. - >>> table = pd.pivot_table( - ... df, - ... values=["D", "E"], - ... index=["A", "C"], - ... aggfunc={"D": "mean", "E": ["min", "max", "mean"]}, - ... ) + >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], + ... aggfunc={'D': "mean", + ... 'E': ["min", "max", "mean"]}) >>> table D E mean max mean min @@ -14744,6 +9989,23 @@ def pivot_table( foo large 2.000000 5 4.500000 4 small 2.333333 6 4.333333 2 """ + + @Substitution("") + @Appender(_shared_docs["pivot_table"]) + def pivot_table( + self, + values=None, + index=None, + columns=None, + aggfunc: AggFuncType = "mean", + fill_value=None, + margins: bool = False, + dropna: bool = True, + margins_name: Level = "All", + observed: bool = True, + sort: bool = True, + **kwargs, + ) -> DataFrame: from pandas.core.reshape.pivot import pivot_table return pivot_table( @@ -15313,52 +10575,19 @@ def melt( # ---------------------------------------------------------------------- # Time series-related - def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame: - """ - First discrete difference of element. - - Calculates the difference of a DataFrame element compared with another - element in the DataFrame (default is element in previous row). - - Parameters - ---------- - periods : int, default 1 - Periods to shift for calculating difference, accepts negative - values. - axis : {0 or 'index', 1 or 'columns'}, default 0 - Take difference over rows (0) or columns (1). - - Returns - ------- - DataFrame - First differences of the Series. - - See Also - -------- - DataFrame.pct_change: Percent change over given number of periods. - DataFrame.shift: Shift index by desired number of periods with an - optional time freq. - Series.diff: First discrete difference of object. - - Notes - ----- - For boolean dtypes, this uses :meth:`operator.xor` rather than - :meth:`operator.sub`. - The result is calculated according to current dtype in DataFrame, - however dtype of the result is always float64. - - Examples - -------- - + @doc( + Series.diff, + klass="DataFrame", + extra_params="axis : {0 or 'index', 1 or 'columns'}, default 0\n " + "Take difference over rows (0) or columns (1).\n", + other_klass="Series", + examples=dedent( + """ Difference with previous row - >>> df = pd.DataFrame( - ... { - ... "a": [1, 2, 3, 4, 5, 6], - ... "b": [1, 1, 2, 3, 5, 8], - ... "c": [1, 4, 9, 16, 25, 36], - ... } - ... ) + >>> df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6], + ... 'b': [1, 1, 2, 3, 5, 8], + ... 'c': [1, 4, 9, 16, 25, 36]}) >>> df a b c 0 1 1 1 @@ -15412,12 +10641,14 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame: Overflow in input dtype - >>> df = pd.DataFrame({"a": [1, 0]}, dtype=np.uint8) + >>> df = pd.DataFrame({'a': [1, 0]}, dtype=np.uint8) >>> df.diff() a 0 NaN - 1 255.0 - """ + 1 255.0""" + ), + ) + def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame: if not lib.is_integer(periods): if not (is_float(periods) and periods.is_integer()): raise ValueError("periods must be an integer") @@ -15525,110 +10756,14 @@ def _gotitem( """ ) + @doc( + _shared_docs["aggregate"], + klass=_shared_doc_kwargs["klass"], + axis=_shared_doc_kwargs["axis"], + see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + ) def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): - """ - Aggregate using one or more operations over the specified axis. - - Parameters - ---------- - func : function, str, list or dict - Function to use for aggregating the data. If a function, must either - work when passed a DataFrame or when passed to DataFrame.apply. - - Accepted combinations are: - - - function - - string function name - - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` - - dict of axis labels -> functions, function names or list of such. - axis : {0 or 'index', 1 or 'columns'}, default 0 - If 0 or 'index': apply function to each column. - If 1 or 'columns': apply function to each row. - *args - Positional arguments to pass to `func`. - **kwargs - Keyword arguments to pass to `func`. - - Returns - ------- - scalar, Series or DataFrame - - The return can be: - - * scalar : when Series.agg is called with single function - * Series : when DataFrame.agg is called with a single function - * DataFrame : when DataFrame.agg is called with several functions - - See Also - -------- - DataFrame.apply : Perform any type of operations. - DataFrame.transform : Perform transformation type operations. - DataFrame.groupby : Perform operations over groups. - DataFrame.resample : Perform operations over resampled bins. - DataFrame.rolling : Perform operations over rolling window. - DataFrame.expanding : Perform operations over expanding window. - core.window.ewm.ExponentialMovingWindow : Perform operation over exponential - weighted window. - - Notes - ----- - The aggregation operations are always performed over an axis, either the - index (default) or the column axis. This behavior is different from - `numpy` aggregation functions (`mean`, `median`, `prod`, `sum`, `std`, - `var`), where the default is to compute the aggregation of the flattened - array, e.g., ``numpy.mean(arr_2d)`` as opposed to - ``numpy.mean(arr_2d, axis=0)``. - - `agg` is an alias for `aggregate`. Use the alias. - - Functions that mutate the passed object can produce unexpected - behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` - for more details. - - A passed user-defined-function will be passed a Series for evaluation. - - If ``func`` defines an index relabeling, ``axis`` must be ``0`` or ``index``. - - Examples - -------- - >>> df = pd.DataFrame( - ... [[1, 2, 3], [4, 5, 6], [7, 8, 9], [np.nan, np.nan, np.nan]], - ... columns=["A", "B", "C"], - ... ) - - Aggregate these functions over the rows. - - >>> df.agg(["sum", "min"]) - A B C - sum 12.0 15.0 18.0 - min 1.0 2.0 3.0 - - Different aggregations per column. - - >>> df.agg({"A": ["sum", "min"], "B": ["min", "max"]}) - A B - sum 12.0 NaN - min 1.0 2.0 - max NaN 8.0 - - Aggregate different functions over the columns - and rename the index of the resulting DataFrame. - - >>> df.agg(x=("A", "max"), y=("B", "min"), z=("C", "mean")) - A B C - x 7.0 NaN NaN - y NaN 2.0 NaN - z NaN NaN 6.0 - - Aggregate over the columns. - - >>> df.agg("mean", axis="columns") - 0 2.0 - 1 5.0 - 2 8.0 - 3 NaN - dtype: float64 - """ from pandas.core.apply import frame_apply axis = self._get_axis_number(axis) @@ -15640,147 +10775,14 @@ def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): agg = aggregate + @doc( + _shared_docs["transform"], + klass=_shared_doc_kwargs["klass"], + axis=_shared_doc_kwargs["axis"], + ) def transform( self, func: AggFuncType, axis: Axis = 0, *args, **kwargs ) -> DataFrame: - """ - Call ``func`` on self producing a DataFrame with the same axis shape as self. - - Parameters - ---------- - func : function, str, list-like or dict-like - Function to use for transforming the data. If a function, must either - work when passed a DataFrame or when passed to DataFrame.apply. If func - is both list-like and dict-like, dict-like behavior takes precedence. - - Accepted combinations are: - - - function - - string function name - - list-like of functions and/or function names, e.g. ``[np.exp, 'sqrt']`` - - dict-like of axis labels -> functions, - function names or list-like of such. - axis : {0 or 'index', 1 or 'columns'}, default 0 - If 0 or 'index': apply function to each column. - If 1 or 'columns': apply function to each row. - *args - Positional arguments to pass to `func`. - **kwargs - Keyword arguments to pass to `func`. - - Returns - ------- - DataFrame - A DataFrame that must have the same length as self. - - Raises - ------ - ValueError : If the returned DataFrame has a different length than self. - - See Also - -------- - DataFrame.agg : Only perform aggregating type operations. - DataFrame.apply : Invoke function on a DataFrame. - - Notes - ----- - Functions that mutate the passed object can produce unexpected - behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` - for more details. - - Examples - -------- - >>> df = pd.DataFrame({"A": range(3), "B": range(1, 4)}) - >>> df - A B - 0 0 1 - 1 1 2 - 2 2 3 - >>> df.transform(lambda x: x + 1) - A B - 0 1 2 - 1 2 3 - 2 3 4 - - Even though the resulting DataFrame must have the same length as the - input DataFrame, it is possible to provide several input functions: - - >>> s = pd.Series(range(3)) - >>> s - 0 0 - 1 1 - 2 2 - dtype: int64 - >>> s.transform([np.sqrt, np.exp]) - sqrt exp - 0 0.000000 1.000000 - 1 1.000000 2.718282 - 2 1.414214 7.389056 - - You can call transform on a GroupBy object: - - >>> df = pd.DataFrame( - ... { - ... "Date": [ - ... "2015-05-08", - ... "2015-05-07", - ... "2015-05-06", - ... "2015-05-05", - ... "2015-05-08", - ... "2015-05-07", - ... "2015-05-06", - ... "2015-05-05", - ... ], - ... "Data": [5, 8, 6, 1, 50, 100, 60, 120], - ... } - ... ) - >>> df - Date Data - 0 2015-05-08 5 - 1 2015-05-07 8 - 2 2015-05-06 6 - 3 2015-05-05 1 - 4 2015-05-08 50 - 5 2015-05-07 100 - 6 2015-05-06 60 - 7 2015-05-05 120 - >>> df.groupby("Date")["Data"].transform("sum") - 0 55 - 1 108 - 2 66 - 3 121 - 4 55 - 5 108 - 6 66 - 7 121 - Name: Data, dtype: int64 - - >>> df = pd.DataFrame( - ... { - ... "c": [1, 1, 1, 2, 2, 2, 2], - ... "type": ["m", "n", "o", "m", "m", "n", "n"], - ... } - ... ) - >>> df - c type - 0 1 m - 1 1 n - 2 1 o - 3 2 m - 4 2 m - 5 2 n - 6 2 n - >>> df["size"] = df.groupby("c")["type"].transform(len) - >>> df - c type size - 0 1 m 3 - 1 1 n 3 - 2 1 o 3 - 3 2 m 4 - 4 2 m 4 - 5 2 n 4 - 6 2 n 4 - """ from pandas.core.apply import frame_apply op = frame_apply(self, func=func, axis=axis, args=args, kwargs=kwargs) @@ -16462,233 +11464,23 @@ def join( return joined + @Substitution("") + @Appender(_merge_doc, indents=2) def merge( self, right: DataFrame | Series, - how: MergeHow = "inner", - on: IndexLabel | AnyArrayLike | None = None, - left_on: IndexLabel | AnyArrayLike | None = None, - right_on: IndexLabel | AnyArrayLike | None = None, - left_index: bool = False, - right_index: bool = False, - sort: bool = False, - suffixes: Suffixes = ("_x", "_y"), - copy: bool | lib.NoDefault = lib.no_default, - indicator: str | bool = False, - validate: MergeValidate | None = None, - ) -> DataFrame: - """ - Merge DataFrame or named Series objects with a database-style join. - - A named Series object is treated as a DataFrame with a single named column. - - The join is done on columns or indexes. If joining columns on - columns, the DataFrame indexes *will be ignored*. Otherwise if joining indexes - on indexes or indexes on a column or columns, the index will be passed on. - When performing a cross merge, no column specifications to merge on are - allowed. - - .. warning:: - - If both key columns contain rows where the key is a null value, those - rows will be matched against each other. This is different from usual SQL - join behaviour and can lead to unexpected results. - - Parameters - ---------- - right : DataFrame or named Series - Object to merge with. - how : {'left', 'right', 'outer', 'inner', 'cross', 'left_anti', 'right_anti'}, - default 'inner' - Type of merge to be performed. - - * left: use only keys from left frame, similar to a SQL left outer join; - preserve key order. - * right: use only keys from right frame, similar to a SQL right outer join; - preserve key order. - * outer: use union of keys from both frames, similar to a SQL full outer - join; sort keys lexicographically. - * inner: use intersection of keys from both frames, similar to a SQL inner - join; preserve the order of the left keys. - * cross: creates the cartesian product from both frames, preserves the order - of the left keys. - * left_anti: use only keys from left frame that - are not in right frame, similar - to SQL left anti join; preserve key order. - - .. versionadded:: 3.0 - * right_anti: use only keys from right frame - that are not in left frame, similar - to SQL right anti join; preserve key order. - - .. versionadded:: 3.0 - on : Hashable or a sequence of the previous - Column or index level names to join on. These must be found in both - DataFrames. If `on` is None and not merging on indexes then this defaults - to the intersection of the columns in both DataFrames. - left_on : Hashable or a sequence of the previous, or array-like - Column or index level names to join on in the left DataFrame. Can also - be an array or list of arrays of the length of the left DataFrame. - These arrays are treated as if they are columns. - right_on : Hashable or a sequence of the previous, or array-like - Column or index level names to join on in the right DataFrame. Can also - be an array or list of arrays of the length of the right DataFrame. - These arrays are treated as if they are columns. - left_index : bool, default False - Use the index from the left DataFrame as the join key(s). If it is a - MultiIndex, the number of keys in the other DataFrame (either the index - or a number of columns) must match the number of levels. - right_index : bool, default False - Use the index from the right DataFrame as the join key. Same caveats as - left_index. - sort : bool, default False - Sort the join keys lexicographically in the result DataFrame. If False, - the order of the join keys depends on the join type (how keyword). - suffixes : list-like, default is ("_x", "_y") - A length-2 sequence where each element is optionally a string - indicating the suffix to add to overlapping column names in - `left` and `right` respectively. Pass a value of `None` instead - of a string to indicate that the column name from `left` or - `right` should be left as-is, with no suffix. At least one of the - values must not be None. - copy : bool, default False - This keyword is now ignored; changing its value will have no - impact on the method. - - .. deprecated:: 3.0.0 - - This keyword is ignored and will be removed in pandas 4.0. Since - pandas 3.0, this method always returns a new object using a lazy - copy mechanism that defers copies until necessary - (Copy-on-Write). See the `user guide on Copy-on-Write - `__ - for more details. - - indicator : bool or str, default False - If True, adds a column to the output DataFrame called "_merge" with - information on the source of each row. The column can be given a different - name by providing a string argument. The column will have a Categorical - type with the value of "left_only" for observations whose merge key only - appears in the left DataFrame, "right_only" for observations - whose merge key only appears in the right DataFrame, and "both" - if the observation's merge key is found in both DataFrames. - - validate : str, optional - If specified, checks if merge is of specified type. - - * "one_to_one" or "1:1": check if merge keys are unique in both - left and right datasets. - * "one_to_many" or "1:m": check if merge keys are unique in left - dataset. - * "many_to_one" or "m:1": check if merge keys are unique in right - dataset. - * "many_to_many" or "m:m": allowed, but does not result in checks. - - Returns - ------- - DataFrame - A DataFrame of the two merged objects. - - See Also - -------- - merge_ordered : Merge with optional filling/interpolation. - merge_asof : Merge on nearest keys. - DataFrame.join : Similar method using indices. - - Examples - -------- - >>> df1 = pd.DataFrame( - ... {"lkey": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 5]} - ... ) - >>> df2 = pd.DataFrame( - ... {"rkey": ["foo", "bar", "baz", "foo"], "value": [5, 6, 7, 8]} - ... ) - >>> df1 - lkey value - 0 foo 1 - 1 bar 2 - 2 baz 3 - 3 foo 5 - >>> df2 - rkey value - 0 foo 5 - 1 bar 6 - 2 baz 7 - 3 foo 8 - - Merge df1 and df2 on the lkey and rkey columns. The value columns have - the default suffixes, _x and _y, appended. - - >>> df1.merge(df2, left_on="lkey", right_on="rkey") - lkey value_x rkey value_y - 0 foo 1 foo 5 - 1 foo 1 foo 8 - 2 bar 2 bar 6 - 3 baz 3 baz 7 - 4 foo 5 foo 5 - 5 foo 5 foo 8 - - Merge DataFrames df1 and df2 with specified left and right suffixes - appended to any overlapping columns. - - >>> df1.merge( - ... df2, left_on="lkey", right_on="rkey", suffixes=("_left", "_right") - ... ) - lkey value_left rkey value_right - 0 foo 1 foo 5 - 1 foo 1 foo 8 - 2 bar 2 bar 6 - 3 baz 3 baz 7 - 4 foo 5 foo 5 - 5 foo 5 foo 8 - - Merge DataFrames df1 and df2, but raise an exception if the DataFrames have - any overlapping columns. - - >>> df1.merge(df2, left_on="lkey", right_on="rkey", suffixes=(False, False)) - Traceback (most recent call last): - ... - ValueError: columns overlap but no suffix specified: - Index(['value'], dtype='object') - - >>> df1 = pd.DataFrame({"a": ["foo", "bar"], "b": [1, 2]}) - >>> df2 = pd.DataFrame({"a": ["foo", "baz"], "c": [3, 4]}) - >>> df1 - a b - 0 foo 1 - 1 bar 2 - >>> df2 - a c - 0 foo 3 - 1 baz 4 - - >>> df1.merge(df2, how="inner", on="a") - a b c - 0 foo 1 3 - - >>> df1.merge(df2, how="left", on="a") - a b c - 0 foo 1 3.0 - 1 bar 2 NaN - - >>> df1 = pd.DataFrame({"left": ["foo", "bar"]}) - >>> df2 = pd.DataFrame({"right": [7, 8]}) - >>> df1 - left - 0 foo - 1 bar - >>> df2 - right - 0 7 - 1 8 - - >>> df1.merge(df2, how="cross") - left right - 0 foo 7 - 1 foo 8 - 2 bar 7 - 3 bar 8 - """ + how: MergeHow = "inner", + on: IndexLabel | AnyArrayLike | None = None, + left_on: IndexLabel | AnyArrayLike | None = None, + right_on: IndexLabel | AnyArrayLike | None = None, + left_index: bool = False, + right_index: bool = False, + sort: bool = False, + suffixes: Suffixes = ("_x", "_y"), + copy: bool | lib.NoDefault = lib.no_default, + indicator: str | bool = False, + validate: MergeValidate | None = None, + ) -> DataFrame: self._check_copy_deprecation(copy) from pandas.core.reshape.merge import merge @@ -17499,6 +12291,7 @@ def any( **kwargs, ) -> Series | bool: ... + @doc(make_doc("any", ndim=1)) def any( self, *, @@ -17507,118 +12300,6 @@ def any( skipna: bool = True, **kwargs, ) -> Series | bool: - """ - Return whether any element is True, potentially over an axis. - - Returns False unless there is at least one element within a series or - along a Dataframe axis that is True or equivalent (e.g. non-zero or - non-empty). - - Parameters - ---------- - axis : {0 or 'index', 1 or 'columns', None}, default 0 - Indicate which axis or axes should be reduced. For `Series` this parameter - is unused and defaults to 0. - - * 0 / 'index' : reduce the index, return a Series whose index is the - original column labels. - * 1 / 'columns' : reduce the columns, return a Series whose index is the - original index. - * None : reduce all axes, return a scalar. - - bool_only : bool, default False - Include only boolean columns. Not implemented for Series. - skipna : bool, default True - Exclude NA/null values. If the entire row/column is NA and skipna is - True, then the result will be False, as for an empty row/column. - If skipna is False, then NA are treated as True, because these are not - equal to zero. - **kwargs : any, default None - Additional keywords have no effect but might be accepted for - compatibility with NumPy. - - Returns - ------- - Series or scalar - If axis=None, then a scalar boolean is returned. - Otherwise a Series is returned with index matching the index argument. - - See Also - -------- - numpy.any : Numpy version of this method. - Series.any : Return whether any element is True. - Series.all : Return whether all elements are True. - DataFrame.any : Return whether any element is True over requested axis. - DataFrame.all : Return whether all elements are True over requested axis. - - Examples - -------- - **Series** - - For Series input, the output is a scalar indicating whether any element - is True. - - >>> pd.Series([False, False]).any() - False - >>> pd.Series([True, False]).any() - True - >>> pd.Series([], dtype="float64").any() - False - >>> pd.Series([np.nan]).any() - False - >>> pd.Series([np.nan]).any(skipna=False) - True - - **DataFrame** - - Whether each column contains at least one True element (the default). - - >>> df = pd.DataFrame({"A": [1, 2], "B": [0, 2], "C": [0, 0]}) - >>> df - A B C - 0 1 0 0 - 1 2 2 0 - - >>> df.any() - A True - B True - C False - dtype: bool - - Aggregating over the columns. - - >>> df = pd.DataFrame({"A": [True, False], "B": [1, 2]}) - >>> df - A B - 0 True 1 - 1 False 2 - - >>> df.any(axis="columns") - 0 True - 1 True - dtype: bool - - >>> df = pd.DataFrame({"A": [True, False], "B": [1, 0]}) - >>> df - A B - 0 True 1 - 1 False 0 - - >>> df.any(axis="columns") - 0 True - 1 False - dtype: bool - - Aggregating over the entire DataFrame with ``axis=None``. - - >>> df.any(axis=None) - True - - `any` for an empty DataFrame is an empty Series. - - >>> pd.DataFrame([]).any() - Series([], dtype: bool) - """ result = self._logical_func( "any", nanops.nanany, axis, bool_only, skipna, **kwargs ) @@ -17657,6 +12338,7 @@ def all( ) -> Series | bool: ... @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="all") + @doc(make_doc("all", ndim=1)) def all( self, axis: Axis | None = 0, @@ -17664,91 +12346,6 @@ def all( skipna: bool = True, **kwargs, ) -> Series | bool: - """ - Return whether all elements are True, potentially over an axis. - - Returns True unless there at least one element within a series or - along a Dataframe axis that is False or equivalent (e.g. zero or - empty). - - Parameters - ---------- - axis : {0 or 'index', 1 or 'columns', None}, default 0 - Indicate which axis or axes should be reduced. For `Series` this parameter - is unused and defaults to 0. - - * 0 / 'index' : reduce the index, return a Series whose index is the - original column labels. - * 1 / 'columns' : reduce the columns, return a Series whose index is the - original index. - * None : reduce all axes, return a scalar. - - bool_only : bool, default False - Include only boolean columns. Not implemented for Series. - skipna : bool, default True - Exclude NA/null values. If the entire row/column is NA and skipna is - True, then the result will be True, as for an empty row/column. - If skipna is False, then NA are treated as True, because these are not - equal to zero. - **kwargs : any, default None - Additional keywords have no effect but might be accepted for - compatibility with NumPy. - - Returns - ------- - Series or scalar - If axis=None, then a scalar boolean is returned. - Otherwise a Series is returned with index matching the index argument. - - See Also - -------- - Series.all : Return True if all elements are True. - DataFrame.any : Return True if one (or more) elements are True. - - Examples - -------- - **Series** - - >>> pd.Series([True, True]).all() - True - >>> pd.Series([True, False]).all() - False - >>> pd.Series([], dtype="float64").all() - True - >>> pd.Series([np.nan]).all() - True - >>> pd.Series([np.nan]).all(skipna=False) - True - - **DataFrames** - - Create a DataFrame from a dictionary. - - >>> df = pd.DataFrame({"col1": [True, True], "col2": [True, False]}) - >>> df - col1 col2 - 0 True True - 1 True False - - Default behaviour checks if values in each column all return True. - - >>> df.all() - col1 True - col2 False - dtype: bool - - Specify ``axis='columns'`` to check if values in each row all return True. - - >>> df.all(axis="columns") - 0 True - 1 False - dtype: bool - - Or ``axis=None`` for whether every value is True. - - >>> df.all(axis=None) - False - """ result = self._logical_func( "all", nanops.nanall, axis, bool_only, skipna, **kwargs ) @@ -17788,6 +12385,7 @@ def min( ) -> Series | Any: ... @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="min") + @doc(make_doc("min", ndim=2)) def min( self, axis: Axis | None = 0, @@ -17795,67 +12393,6 @@ def min( numeric_only: bool = False, **kwargs, ) -> Series | Any: - """ - Return the minimum of the values over the requested axis. - - If you want the *index* of the minimum, use ``idxmin``. This is - the equivalent of the ``numpy.ndarray`` method ``argmin``. - - Parameters - ---------- - axis : {index (0), columns (1)} - Axis for the function to be applied on. - For `Series` this parameter is unused and defaults to 0. - - For DataFrames, specifying ``axis=None`` will apply the aggregation - across both axes. - - .. versionadded:: 2.0.0 - - skipna : bool, default True - Exclude NA/null values when computing the result. - numeric_only : bool, default False - Include only float, int, boolean columns. - - **kwargs - Additional keyword arguments to be passed to the function. - - Returns - ------- - Series or scalar - Value containing the calculation referenced in the description. - - See Also - -------- - Series.sum : Return the sum. - Series.min : Return the minimum. - Series.max : Return the maximum. - Series.idxmin : Return the index of the minimum. - Series.idxmax : Return the index of the maximum. - DataFrame.sum : Return the sum over the requested axis. - DataFrame.min : Return the minimum over the requested axis. - DataFrame.max : Return the maximum over the requested axis. - DataFrame.idxmin : Return the index of the minimum over the requested axis. - DataFrame.idxmax : Return the index of the maximum over the requested axis. - - Examples - -------- - >>> idx = pd.MultiIndex.from_arrays( - ... [["warm", "warm", "cold", "cold"], ["dog", "falcon", "fish", "spider"]], - ... names=["blooded", "animal"], - ... ) - >>> s = pd.Series([4, 2, 0, 8], name="legs", index=idx) - >>> s - blooded animal - warm dog 4 - falcon 2 - cold fish 0 - spider 8 - Name: legs, dtype: int64 - - >>> s.min() - 0 - """ result = super().min( axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) @@ -17895,6 +12432,7 @@ def max( ) -> Series | Any: ... @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="max") + @doc(make_doc("max", ndim=2)) def max( self, axis: Axis | None = 0, @@ -17902,67 +12440,6 @@ def max( numeric_only: bool = False, **kwargs, ) -> Series | Any: - """ - Return the maximum of the values over the requested axis. - - If you want the *index* of the maximum, use ``idxmax``. This is - the equivalent of the ``numpy.ndarray`` method ``argmax``. - - Parameters - ---------- - axis : {index (0), columns (1)} - Axis for the function to be applied on. - For `Series` this parameter is unused and defaults to 0. - - For DataFrames, specifying ``axis=None`` will apply the aggregation - across both axes. - - .. versionadded:: 2.0.0 - - skipna : bool, default True - Exclude NA/null values when computing the result. - numeric_only : bool, default False - Include only float, int, boolean columns. - - **kwargs - Additional keyword arguments to be passed to the function. - - Returns - ------- - Series or scalar - Value containing the calculation referenced in the description. - - See Also - -------- - Series.sum : Return the sum. - Series.min : Return the minimum. - Series.max : Return the maximum. - Series.idxmin : Return the index of the minimum. - Series.idxmax : Return the index of the maximum. - DataFrame.sum : Return the sum over the requested axis. - DataFrame.min : Return the minimum over the requested axis. - DataFrame.max : Return the maximum over the requested axis. - DataFrame.idxmin : Return the index of the minimum over the requested axis. - DataFrame.idxmax : Return the index of the maximum over the requested axis. - - Examples - -------- - >>> idx = pd.MultiIndex.from_arrays( - ... [["warm", "warm", "cold", "cold"], ["dog", "falcon", "fish", "spider"]], - ... names=["blooded", "animal"], - ... ) - >>> s = pd.Series([4, 2, 0, 8], name="legs", index=idx) - >>> s - blooded animal - warm dog 4 - falcon 2 - cold fish 0 - spider 8 - Name: legs, dtype: int64 - - >>> s.max() - 8 - """ result = super().max( axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) @@ -18172,108 +12649,32 @@ def mean( @overload def mean( self, - *, - axis: None, - skipna: bool = ..., - numeric_only: bool = ..., - **kwargs, - ) -> Any: ... - - @overload - def mean( - self, - *, - axis: Axis | None, - skipna: bool = ..., - numeric_only: bool = ..., - **kwargs, - ) -> Series | Any: ... - - @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="mean") - def mean( - self, - axis: Axis | None = 0, - skipna: bool = True, - numeric_only: bool = False, - **kwargs, - ) -> Series | Any: - """ - Return the mean of the values over the requested axis. - - Parameters - ---------- - axis : {index (0), columns (1)} - Axis for the function to be applied on. - For `Series` this parameter is unused and defaults to 0. - - For DataFrames, specifying ``axis=None`` will apply the aggregation - across both axes. - - .. versionadded:: 2.0.0 - - skipna : bool, default True - Exclude NA/null values when computing the result. - numeric_only : bool, default False - Include only float, int, boolean columns. - - **kwargs - Additional keyword arguments to be passed to the function. - - Returns - ------- - Series or scalar - Value containing the calculation referenced in the description. + *, + axis: None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Any: ... - See Also - -------- - Series.sum : Return the sum. - Series.min : Return the minimum. - Series.max : Return the maximum. - Series.idxmin : Return the index of the minimum. - Series.idxmax : Return the index of the maximum. - DataFrame.sum : Return the sum over the requested axis. - DataFrame.min : Return the minimum over the requested axis. - DataFrame.max : Return the maximum over the requested axis. - DataFrame.idxmin : Return the index of the minimum over the requested axis. - DataFrame.idxmax : Return the index of the maximum over the requested axis. + @overload + def mean( + self, + *, + axis: Axis | None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series | Any: ... - Examples - -------- - >>> s = pd.Series([1, 2, 3]) - >>> s.mean() - 2.0 - - With a DataFrame - - >>> df = pd.DataFrame( - ... {"a": [1, 2], "b": [2, 3]}, index=["tiger", "zebra"] - ... ) - >>> df - a b - tiger 1 2 - zebra 2 3 - >>> df.mean() - a 1.5 - b 2.5 - dtype: float64 - - Using axis=1 - - >>> df.mean(axis=1) - tiger 1.5 - zebra 2.5 - dtype: float64 - - In this case, `numeric_only` should be set to `True` to avoid - getting an error. - - >>> df = pd.DataFrame( - ... {"a": [1, 2], "b": ["T", "Z"]}, index=["tiger", "zebra"] - ... ) - >>> df.mean(numeric_only=True) - a 1.5 - dtype: float64 - """ + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="mean") + @doc(make_doc("mean", ndim=2)) + def mean( + self, + axis: Axis | None = 0, + skipna: bool = True, + numeric_only: bool = False, + **kwargs, + ) -> Series | Any: result = super().mean( axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) @@ -18315,6 +12716,7 @@ def median( @deprecate_nonkeyword_arguments( Pandas4Warning, allowed_args=["self"], name="median" ) + @doc(make_doc("median", ndim=2)) def median( self, axis: Axis | None = 0, @@ -18322,83 +12724,6 @@ def median( numeric_only: bool = False, **kwargs, ) -> Series | Any: - """ - Return the median of the values over the requested axis. - - Parameters - ---------- - axis : {index (0), columns (1)} - Axis for the function to be applied on. - For `Series` this parameter is unused and defaults to 0. - - For DataFrames, specifying ``axis=None`` will apply the aggregation - across both axes. - - .. versionadded:: 2.0.0 - - skipna : bool, default True - Exclude NA/null values when computing the result. - numeric_only : bool, default False - Include only float, int, boolean columns. - - **kwargs - Additional keyword arguments to be passed to the function. - - Returns - ------- - Series or scalar - Value containing the calculation referenced in the description. - - See Also - -------- - Series.sum : Return the sum. - Series.min : Return the minimum. - Series.max : Return the maximum. - Series.idxmin : Return the index of the minimum. - Series.idxmax : Return the index of the maximum. - DataFrame.sum : Return the sum over the requested axis. - DataFrame.min : Return the minimum over the requested axis. - DataFrame.max : Return the maximum over the requested axis. - DataFrame.idxmin : Return the index of the minimum over the requested axis. - DataFrame.idxmax : Return the index of the maximum over the requested axis. - - Examples - -------- - >>> s = pd.Series([1, 2, 3]) - >>> s.median() - 2.0 - - With a DataFrame - - >>> df = pd.DataFrame( - ... {"a": [1, 2], "b": [2, 3]}, index=["tiger", "zebra"] - ... ) - >>> df - a b - tiger 1 2 - zebra 2 3 - >>> df.median() - a 1.5 - b 2.5 - dtype: float64 - - Using axis=1 - - >>> df.median(axis=1) - tiger 1.5 - zebra 2.5 - dtype: float64 - - In this case, `numeric_only` should be set to `True` - to avoid getting an error. - - >>> df = pd.DataFrame( - ... {"a": [1, 2], "b": ["T", "Z"]}, index=["tiger", "zebra"] - ... ) - >>> df.median(numeric_only=True) - a 1.5 - dtype: float64 - """ result = super().median( axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) @@ -19020,6 +13345,7 @@ def kurt( kurtosis = kurt # type: ignore[assignment] product = prod + @doc(make_doc("cummin", ndim=2)) def cummin( self, axis: Axis = 0, @@ -19028,107 +13354,10 @@ def cummin( *args, **kwargs, ) -> Self: - """ - Return cumulative minimum over a DataFrame or Series axis. - - Returns a DataFrame or Series of the same size containing the cumulative - minimum. - - Parameters - ---------- - axis : {0 or 'index', 1 or 'columns'}, default 0 - The index or the name of the axis. 0 is equivalent to None or 'index'. - For `Series` this parameter is unused and defaults to 0. - skipna : bool, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA. - numeric_only : bool, default False - Include only float, int, boolean columns. - *args, **kwargs - Additional keywords have no effect but might be accepted for - compatibility with NumPy. - - Returns - ------- - Series or DataFrame - Return cumulative minimum of Series or DataFrame. - - See Also - -------- - core.window.expanding.Expanding.min : Similar functionality - but ignores ``NaN`` values. - DataFrame.min : Return the minimum over - DataFrame axis. - DataFrame.cummax : Return cumulative maximum over DataFrame axis. - DataFrame.cummin : Return cumulative minimum over DataFrame axis. - DataFrame.cumsum : Return cumulative sum over DataFrame axis. - DataFrame.cumprod : Return cumulative product over DataFrame axis. - - Examples - -------- - **Series** - - >>> s = pd.Series([2, np.nan, 5, -1, 0]) - >>> s - 0 2.0 - 1 NaN - 2 5.0 - 3 -1.0 - 4 0.0 - dtype: float64 - - By default, NA values are ignored. - - >>> s.cummin() - 0 2.0 - 1 NaN - 2 2.0 - 3 -1.0 - 4 -1.0 - dtype: float64 - - To include NA values in the operation, use ``skipna=False`` - - >>> s.cummin(skipna=False) - 0 2.0 - 1 NaN - 2 NaN - 3 NaN - 4 NaN - dtype: float64 - - **DataFrame** - - >>> df = pd.DataFrame( - ... [[2.0, 1.0], [3.0, np.nan], [1.0, 0.0]], columns=list("AB") - ... ) - >>> df - A B - 0 2.0 1.0 - 1 3.0 NaN - 2 1.0 0.0 - - By default, iterates over rows and finds the minimum - in each column. This is equivalent to ``axis=None`` or ``axis='index'``. - - >>> df.cummin() - A B - 0 2.0 1.0 - 1 2.0 NaN - 2 1.0 0.0 - - To iterate over columns and find the minimum in each row, - use ``axis=1`` - - >>> df.cummin(axis=1) - A B - 0 2.0 1.0 - 1 3.0 NaN - 2 1.0 0.0 - """ data = self._get_numeric_data() if numeric_only else self return NDFrame.cummin(data, axis, skipna, *args, **kwargs) + @doc(make_doc("cummax", ndim=2)) def cummax( self, axis: Axis = 0, @@ -19137,107 +13366,10 @@ def cummax( *args, **kwargs, ) -> Self: - """ - Return cumulative maximum over a DataFrame or Series axis. - - Returns a DataFrame or Series of the same size containing the cumulative - maximum. - - Parameters - ---------- - axis : {0 or 'index', 1 or 'columns'}, default 0 - The index or the name of the axis. 0 is equivalent to None or 'index'. - For `Series` this parameter is unused and defaults to 0. - skipna : bool, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA. - numeric_only : bool, default False - Include only float, int, boolean columns. - *args, **kwargs - Additional keywords have no effect but might be accepted for - compatibility with NumPy. - - Returns - ------- - Series or DataFrame - Return cumulative maximum of Series or DataFrame. - - See Also - -------- - core.window.expanding.Expanding.max : Similar functionality - but ignores ``NaN`` values. - DataFrame.max : Return the maximum over - DataFrame axis. - DataFrame.cummax : Return cumulative maximum over DataFrame axis. - DataFrame.cummin : Return cumulative minimum over DataFrame axis. - DataFrame.cumsum : Return cumulative sum over DataFrame axis. - DataFrame.cumprod : Return cumulative product over DataFrame axis. - - Examples - -------- - **Series** - - >>> s = pd.Series([2, np.nan, 5, -1, 0]) - >>> s - 0 2.0 - 1 NaN - 2 5.0 - 3 -1.0 - 4 0.0 - dtype: float64 - - By default, NA values are ignored. - - >>> s.cummax() - 0 2.0 - 1 NaN - 2 5.0 - 3 5.0 - 4 5.0 - dtype: float64 - - To include NA values in the operation, use ``skipna=False`` - - >>> s.cummax(skipna=False) - 0 2.0 - 1 NaN - 2 NaN - 3 NaN - 4 NaN - dtype: float64 - - **DataFrame** - - >>> df = pd.DataFrame( - ... [[2.0, 1.0], [3.0, np.nan], [1.0, 0.0]], columns=list("AB") - ... ) - >>> df - A B - 0 2.0 1.0 - 1 3.0 NaN - 2 1.0 0.0 - - By default, iterates over rows and finds the maximum - in each column. This is equivalent to ``axis=None`` or ``axis='index'``. - - >>> df.cummax() - A B - 0 2.0 1.0 - 1 3.0 NaN - 2 3.0 1.0 - - To iterate over columns and find the maximum in each row, - use ``axis=1`` - - >>> df.cummax(axis=1) - A B - 0 2.0 2.0 - 1 3.0 NaN - 2 1.0 1.0 - """ data = self._get_numeric_data() if numeric_only else self return NDFrame.cummax(data, axis, skipna, *args, **kwargs) + @doc(make_doc("cumsum", ndim=2)) def cumsum( self, axis: Axis = 0, @@ -19246,107 +13378,10 @@ def cumsum( *args, **kwargs, ) -> Self: - """ - Return cumulative sum over a DataFrame or Series axis. - - Returns a DataFrame or Series of the same size containing the cumulative - sum. - - Parameters - ---------- - axis : {0 or 'index', 1 or 'columns'}, default 0 - The index or the name of the axis. 0 is equivalent to None or 'index'. - For `Series` this parameter is unused and defaults to 0. - skipna : bool, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA. - numeric_only : bool, default False - Include only float, int, boolean columns. - *args, **kwargs - Additional keywords have no effect but might be accepted for - compatibility with NumPy. - - Returns - ------- - Series or DataFrame - Return cumulative sum of Series or DataFrame. - - See Also - -------- - core.window.expanding.Expanding.sum : Similar functionality - but ignores ``NaN`` values. - DataFrame.sum : Return the sum over - DataFrame axis. - DataFrame.cummax : Return cumulative maximum over DataFrame axis. - DataFrame.cummin : Return cumulative minimum over DataFrame axis. - DataFrame.cumsum : Return cumulative sum over DataFrame axis. - DataFrame.cumprod : Return cumulative product over DataFrame axis. - - Examples - -------- - **Series** - - >>> s = pd.Series([2, np.nan, 5, -1, 0]) - >>> s - 0 2.0 - 1 NaN - 2 5.0 - 3 -1.0 - 4 0.0 - dtype: float64 - - By default, NA values are ignored. - - >>> s.cumsum() - 0 2.0 - 1 NaN - 2 7.0 - 3 6.0 - 4 6.0 - dtype: float64 - - To include NA values in the operation, use ``skipna=False`` - - >>> s.cumsum(skipna=False) - 0 2.0 - 1 NaN - 2 NaN - 3 NaN - 4 NaN - dtype: float64 - - **DataFrame** - - >>> df = pd.DataFrame( - ... [[2.0, 1.0], [3.0, np.nan], [1.0, 0.0]], columns=list("AB") - ... ) - >>> df - A B - 0 2.0 1.0 - 1 3.0 NaN - 2 1.0 0.0 - - By default, iterates over rows and finds the sum - in each column. This is equivalent to ``axis=None`` or ``axis='index'``. - - >>> df.cumsum() - A B - 0 2.0 1.0 - 1 5.0 NaN - 2 6.0 1.0 - - To iterate over columns and find the sum in each row, - use ``axis=1`` - - >>> df.cumsum(axis=1) - A B - 0 2.0 3.0 - 1 3.0 NaN - 2 1.0 1.0 - """ data = self._get_numeric_data() if numeric_only else self return NDFrame.cumsum(data, axis, skipna, *args, **kwargs) + @doc(make_doc("cumprod", 2)) def cumprod( self, axis: Axis = 0, @@ -19355,104 +13390,6 @@ def cumprod( *args, **kwargs, ) -> Self: - """ - Return cumulative product over a DataFrame or Series axis. - - Returns a DataFrame or Series of the same size containing the cumulative - product. - - Parameters - ---------- - axis : {0 or 'index', 1 or 'columns'}, default 0 - The index or the name of the axis. 0 is equivalent to None or 'index'. - For `Series` this parameter is unused and defaults to 0. - skipna : bool, default True - Exclude NA/null values. If an entire row/column is NA, the result - will be NA. - numeric_only : bool, default False - Include only float, int, boolean columns. - *args, **kwargs - Additional keywords have no effect but might be accepted for - compatibility with NumPy. - - Returns - ------- - Series or DataFrame - Return cumulative product of Series or DataFrame. - - See Also - -------- - core.window.expanding.Expanding.prod : Similar functionality - but ignores ``NaN`` values. - DataFrame.prod : Return the product over - DataFrame axis. - DataFrame.cummax : Return cumulative maximum over DataFrame axis. - DataFrame.cummin : Return cumulative minimum over DataFrame axis. - DataFrame.cumsum : Return cumulative sum over DataFrame axis. - DataFrame.cumprod : Return cumulative product over DataFrame axis. - - Examples - -------- - **Series** - - >>> s = pd.Series([2, np.nan, 5, -1, 0]) - >>> s - 0 2.0 - 1 NaN - 2 5.0 - 3 -1.0 - 4 0.0 - dtype: float64 - - By default, NA values are ignored. - - >>> s.cumprod() - 0 2.0 - 1 NaN - 2 10.0 - 3 -10.0 - 4 -0.0 - dtype: float64 - - To include NA values in the operation, use ``skipna=False`` - - >>> s.cumprod(skipna=False) - 0 2.0 - 1 NaN - 2 NaN - 3 NaN - 4 NaN - dtype: float64 - - **DataFrame** - - >>> df = pd.DataFrame( - ... [[2.0, 1.0], [3.0, np.nan], [1.0, 0.0]], columns=list("AB") - ... ) - >>> df - A B - 0 2.0 1.0 - 1 3.0 NaN - 2 1.0 0.0 - - By default, iterates over rows and finds the product - in each column. This is equivalent to ``axis=None`` or ``axis='index'``. - - >>> df.cumprod() - A B - 0 2.0 1.0 - 1 6.0 NaN - 2 6.0 0.0 - - To iterate over columns and find the product in each row, - use ``axis=1`` - - >>> df.cumprod(axis=1) - A B - 0 2.0 2.0 - 1 3.0 NaN - 2 1.0 0.0 - """ data = self._get_numeric_data() if numeric_only else self return NDFrame.cumprod(data, axis, skipna, *args, **kwargs) From d9504afcbf5d9cddb0e992f37257b35efadc41f1 Mon Sep 17 00:00:00 2001 From: zhangbowen-coder <2439796518@qq.com> Date: Sun, 14 Dec 2025 23:50:57 +0800 Subject: [PATCH 08/10] alignment fixed --- pandas/core/groupby/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 1738c6f1011d9..f96ddc5b23253 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -2658,7 +2658,7 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): ... ) >>> grouped = df.groupby("A")[["C", "D"]] >>> grouped.transform(lambda x: (x - x.mean()) / x.std()) - C D + C D 0 -1.154701 -0.577350 1 0.577350 0.000000 2 0.577350 1.154701 @@ -2678,7 +2678,7 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): 5 3.0 8.0 >>> grouped.transform("mean") - C D + C D 0 3.666667 4.0 1 4.000000 5.0 2 3.666667 4.0 @@ -2690,7 +2690,7 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): for example: >>> grouped.transform(lambda x: x.astype(int).max()) - C D + C D 0 5 8 1 5 9 2 5 8 From 632e00bebdcd08eb82cce0d94ca6264aabffdae5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=8D=9A=E9=97=BB?= <83496317+zhangbowen-coder@users.noreply.github.com> Date: Mon, 15 Dec 2025 09:57:50 +0800 Subject: [PATCH 09/10] update Co-authored-by: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index f96ddc5b23253..c03886a253e84 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -2678,7 +2678,7 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): 5 3.0 8.0 >>> grouped.transform("mean") - C D + C D 0 3.666667 4.0 1 4.000000 5.0 2 3.666667 4.0 From 7f7c3fb803663526f9f30b09f52946b4fc821c75 Mon Sep 17 00:00:00 2001 From: zhangbowen-coder <2439796518@qq.com> Date: Mon, 15 Dec 2025 10:01:13 +0800 Subject: [PATCH 10/10] aligned adjusted --- pandas/core/groupby/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index c03886a253e84..07168331b9ac7 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -2658,7 +2658,7 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): ... ) >>> grouped = df.groupby("A")[["C", "D"]] >>> grouped.transform(lambda x: (x - x.mean()) / x.std()) - C D + C D 0 -1.154701 -0.577350 1 0.577350 0.000000 2 0.577350 1.154701 @@ -2669,7 +2669,7 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): Broadcast result of the transformation >>> grouped.transform(lambda x: x.max() - x.min()) - C D + C D 0 4.0 6.0 1 3.0 8.0 2 4.0 6.0