Skip to content

Commit cff9b7c

Browse files
committed
test: add coalesce function tests for handling default values
1 parent 106555e commit cff9b7c

File tree

2 files changed

+44
-1
lines changed

2 files changed

+44
-1
lines changed

python/tests/test_dataframe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1207,7 +1207,7 @@ def test_fill_null(df):
12071207
assert result["d"] == [0, 0, 0]
12081208

12091209
# Test filling nulls with string value
1210-
df_with_nulls = df.with_column("d", literal(None).cast(pa.int64()))
1210+
df_with_nulls = df.with_column("d", literal(None).cast(pa.string()))
12111211
df_filled = df_with_nulls.fill_null("missing")
12121212
result = df_filled.to_pydict()
12131213
assert result["e"] == ["missing", "missing", "missing"]

python/tests/test_functions.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,3 +1173,46 @@ def test_between_default(df):
11731173

11741174
actual = df.collect()[0].to_pydict()
11751175
assert actual == expected
1176+
1177+
def test_coalesce(df):
1178+
# Create a DataFrame with null values
1179+
ctx = SessionContext()
1180+
batch = pa.RecordBatch.from_arrays(
1181+
[
1182+
pa.array(["Hello", None, "!"]), # string column with null
1183+
pa.array([4, None, 6]), # integer column with null
1184+
pa.array(["hello ", None, " !"]), # string column with null
1185+
pa.array([datetime(2022, 12, 31), None, datetime(2020, 7, 2)]), # datetime with null
1186+
pa.array([False, None, True]), # boolean column with null
1187+
],
1188+
names=["a", "b", "c", "d", "e"],
1189+
)
1190+
df_with_nulls = ctx.create_dataframe([[batch]])
1191+
1192+
# Test coalesce with different data types
1193+
result_df = df_with_nulls.select(
1194+
f.coalesce(column("a"), literal("default")).alias("a_coalesced"),
1195+
f.coalesce(column("b"), literal(0)).alias("b_coalesced"),
1196+
f.coalesce(column("c"), literal("default")).alias("c_coalesced"),
1197+
f.coalesce(column("d"), literal(datetime(2000, 1, 1))).alias("d_coalesced"),
1198+
f.coalesce(column("e"), literal(False)).alias("e_coalesced"),
1199+
)
1200+
1201+
result = result_df.collect()[0]
1202+
1203+
# Verify results
1204+
assert result.column(0) == pa.array(["Hello", "default", "!"], type=pa.string_view())
1205+
assert result.column(1) == pa.array([4, 0, 6], type=pa.int64())
1206+
assert result.column(2) == pa.array(["hello ", "default", " !"], type=pa.string_view())
1207+
assert result.column(3) == pa.array(
1208+
[datetime(2022, 12, 31), datetime(2000, 1, 1), datetime(2020, 7, 2)],
1209+
type=pa.timestamp("us"),
1210+
)
1211+
assert result.column(4) == pa.array([False, False, True], type=pa.bool_())
1212+
1213+
# Test multiple arguments
1214+
result_df = df_with_nulls.select(
1215+
f.coalesce(column("a"), literal(None), literal("fallback")).alias("multi_coalesce")
1216+
)
1217+
result = result_df.collect()[0]
1218+
assert result.column(0) == pa.array(["Hello", "fallback", "!"], type=pa.string_view())

0 commit comments

Comments
 (0)