@@ -1173,3 +1173,46 @@ def test_between_default(df):
11731173
11741174 actual = df .collect ()[0 ].to_pydict ()
11751175 assert actual == expected
1176+
1177+ def test_coalesce (df ):
1178+ # Create a DataFrame with null values
1179+ ctx = SessionContext ()
1180+ batch = pa .RecordBatch .from_arrays (
1181+ [
1182+ pa .array (["Hello" , None , "!" ]), # string column with null
1183+ pa .array ([4 , None , 6 ]), # integer column with null
1184+ pa .array (["hello " , None , " !" ]), # string column with null
1185+ pa .array ([datetime (2022 , 12 , 31 ), None , datetime (2020 , 7 , 2 )]), # datetime with null
1186+ pa .array ([False , None , True ]), # boolean column with null
1187+ ],
1188+ names = ["a" , "b" , "c" , "d" , "e" ],
1189+ )
1190+ df_with_nulls = ctx .create_dataframe ([[batch ]])
1191+
1192+ # Test coalesce with different data types
1193+ result_df = df_with_nulls .select (
1194+ f .coalesce (column ("a" ), literal ("default" )).alias ("a_coalesced" ),
1195+ f .coalesce (column ("b" ), literal (0 )).alias ("b_coalesced" ),
1196+ f .coalesce (column ("c" ), literal ("default" )).alias ("c_coalesced" ),
1197+ f .coalesce (column ("d" ), literal (datetime (2000 , 1 , 1 ))).alias ("d_coalesced" ),
1198+ f .coalesce (column ("e" ), literal (False )).alias ("e_coalesced" ),
1199+ )
1200+
1201+ result = result_df .collect ()[0 ]
1202+
1203+ # Verify results
1204+ assert result .column (0 ) == pa .array (["Hello" , "default" , "!" ], type = pa .string_view ())
1205+ assert result .column (1 ) == pa .array ([4 , 0 , 6 ], type = pa .int64 ())
1206+ assert result .column (2 ) == pa .array (["hello " , "default" , " !" ], type = pa .string_view ())
1207+ assert result .column (3 ) == pa .array (
1208+ [datetime (2022 , 12 , 31 ), datetime (2000 , 1 , 1 ), datetime (2020 , 7 , 2 )],
1209+ type = pa .timestamp ("us" ),
1210+ )
1211+ assert result .column (4 ) == pa .array ([False , False , True ], type = pa .bool_ ())
1212+
1213+ # Test multiple arguments
1214+ result_df = df_with_nulls .select (
1215+ f .coalesce (column ("a" ), literal (None ), literal ("fallback" )).alias ("multi_coalesce" )
1216+ )
1217+ result = result_df .collect ()[0 ]
1218+ assert result .column (0 ) == pa .array (["Hello" , "fallback" , "!" ], type = pa .string_view ())
0 commit comments