@@ -498,6 +498,21 @@ def window(self, *exprs: Expr) -> DataFrame:
498498
499499 Returns:
500500 DataFrame with new window function columns appended.
501+
502+ Examples:
503+ Add a row number within each group:
504+
505+ >>> import datafusion.functions as f
506+ >>> from datafusion import col
507+ >>> ctx = dfn.SessionContext()
508+ >>> df = ctx.from_pydict({"a": [1, 2, 3], "b": ["x", "x", "y"]})
509+ >>> df = df.window(
510+ ... f.row_number(
511+ ... partition_by=[col("b")], order_by=[col("a")]
512+ ... ).alias("rn")
513+ ... )
514+ >>> "rn" in df.schema().names
515+ True
501516 """
502517 raw = expr_list_to_raw_expr_list (exprs )
503518 return DataFrame (self .df .window (* raw ))
@@ -967,6 +982,18 @@ def explain(
967982 analyze: If ``True``, the plan will run and metrics reported.
968983 format: Output format for the plan. Defaults to
969984 :py:attr:`ExplainFormat.INDENT`.
985+
986+ Examples:
987+ Show the plan in tree format:
988+
989+ >>> from datafusion import ExplainFormat
990+ >>> ctx = dfn.SessionContext()
991+ >>> df = ctx.from_pydict({"a": [1, 2, 3]})
992+ >>> df.explain(format=ExplainFormat.TREE) # doctest: +SKIP
993+
994+ Show plan with runtime metrics:
995+
996+ >>> df.explain(analyze=True) # doctest: +SKIP
970997 """
971998 fmt = format .value if format is not None else None
972999 self .df .explain (verbose , analyze , fmt )
@@ -1092,6 +1119,15 @@ def except_distinct(self, other: DataFrame) -> DataFrame:
10921119
10931120 Returns:
10941121 DataFrame after set difference with deduplication.
1122+
1123+ Examples:
1124+ Remove rows present in ``df2`` and deduplicate:
1125+
1126+ >>> ctx = dfn.SessionContext()
1127+ >>> df1 = ctx.from_pydict({"a": [1, 2, 3, 1], "b": [10, 20, 30, 10]})
1128+ >>> df2 = ctx.from_pydict({"a": [1, 2], "b": [10, 20]})
1129+ >>> df1.except_distinct(df2).sort("a").to_pydict()
1130+ {'a': [3], 'b': [30]}
10951131 """
10961132 return DataFrame (self .df .except_distinct (other .df ))
10971133
@@ -1108,6 +1144,15 @@ def intersect_distinct(self, other: DataFrame) -> DataFrame:
11081144
11091145 Returns:
11101146 DataFrame after intersection with deduplication.
1147+
1148+ Examples:
1149+ Find rows common to both DataFrames:
1150+
1151+ >>> ctx = dfn.SessionContext()
1152+ >>> df1 = ctx.from_pydict({"a": [1, 2, 3], "b": [10, 20, 30]})
1153+ >>> df2 = ctx.from_pydict({"a": [1, 4], "b": [10, 40]})
1154+ >>> df1.intersect_distinct(df2).to_pydict()
1155+ {'a': [1], 'b': [10]}
11111156 """
11121157 return DataFrame (self .df .intersect_distinct (other .df ))
11131158
@@ -1123,6 +1168,15 @@ def union_by_name(self, other: DataFrame) -> DataFrame:
11231168
11241169 Returns:
11251170 DataFrame after union by name.
1171+
1172+ Examples:
1173+ Combine DataFrames with different column orders:
1174+
1175+ >>> ctx = dfn.SessionContext()
1176+ >>> df1 = ctx.from_pydict({"a": [1], "b": [10]})
1177+ >>> df2 = ctx.from_pydict({"b": [20], "a": [2]})
1178+ >>> df1.union_by_name(df2).sort("a").to_pydict()
1179+ {'a': [1, 2], 'b': [10, 20]}
11261180 """
11271181 return DataFrame (self .df .union_by_name (other .df ))
11281182
@@ -1136,6 +1190,15 @@ def union_by_name_distinct(self, other: DataFrame) -> DataFrame:
11361190
11371191 Returns:
11381192 DataFrame after union by name with deduplication.
1193+
1194+ Examples:
1195+ Union by name and remove duplicate rows:
1196+
1197+ >>> ctx = dfn.SessionContext()
1198+ >>> df1 = ctx.from_pydict({"a": [1, 1], "b": [10, 10]})
1199+ >>> df2 = ctx.from_pydict({"b": [10], "a": [1]})
1200+ >>> df1.union_by_name_distinct(df2).to_pydict()
1201+ {'a': [1], 'b': [10]}
11391202 """
11401203 return DataFrame (self .df .union_by_name_distinct (other .df ))
11411204
@@ -1158,6 +1221,19 @@ def distinct_on(
11581221
11591222 Returns:
11601223 DataFrame after deduplication.
1224+
1225+ Examples:
1226+ Keep the row with the smallest ``b`` for each unique ``a``:
1227+
1228+ >>> from datafusion import col
1229+ >>> ctx = dfn.SessionContext()
1230+ >>> df = ctx.from_pydict({"a": [1, 1, 2, 2], "b": [10, 20, 30, 40]})
1231+ >>> df.distinct_on(
1232+ ... [col("a")],
1233+ ... [col("a"), col("b")],
1234+ ... [col("a").sort(ascending=True), col("b").sort(ascending=True)],
1235+ ... ).sort("a").to_pydict()
1236+ {'a': [1, 2], 'b': [10, 30]}
11611237 """
11621238 on_raw = expr_list_to_raw_expr_list (on_expr )
11631239 select_raw = expr_list_to_raw_expr_list (select_expr )
@@ -1176,6 +1252,14 @@ def sort_by(self, *exprs: Expr | str) -> DataFrame:
11761252
11771253 Returns:
11781254 DataFrame after sorting.
1255+
1256+ Examples:
1257+ Sort by a single column:
1258+
1259+ >>> ctx = dfn.SessionContext()
1260+ >>> df = ctx.from_pydict({"a": [3, 1, 2]})
1261+ >>> df.sort_by("a").to_pydict()
1262+ {'a': [1, 2, 3]}
11791263 """
11801264 exprs = [self .parse_sql_expr (e ) if isinstance (e , str ) else e for e in exprs ]
11811265 raw = expr_list_to_raw_expr_list (exprs )
@@ -1472,6 +1556,19 @@ def unnest_columns(
14721556
14731557 Returns:
14741558 A DataFrame with the columns expanded.
1559+
1560+ Examples:
1561+ Unnest an array column:
1562+
1563+ >>> ctx = dfn.SessionContext()
1564+ >>> df = ctx.from_pydict({"a": [[1, 2], [3]], "b": ["x", "y"]})
1565+ >>> df.unnest_columns("a").to_pydict()
1566+ {'a': [1, 2, 3], 'b': ['x', 'x', 'y']}
1567+
1568+ With explicit recursion depth:
1569+
1570+ >>> df.unnest_columns("a", recursions=[("a", "a", 1)]).to_pydict()
1571+ {'a': [1, 2, 3], 'b': ['x', 'x', 'y']}
14751572 """
14761573 columns = list (columns )
14771574 return DataFrame (
0 commit comments