@@ -1196,3 +1196,72 @@ def test_dataframe_repr_html(df) -> None:
11961196
11971197 # Ignore whitespace just to make this test look cleaner
11981198 assert output .replace (" " , "" ) == ref_html .replace (" " , "" )
1199+
1200+
1201+
1202+ def test_fill_null (df ):
1203+ # Test filling nulls with integer value
1204+ df_with_nulls = df .with_column ("d" , literal (None ).cast (pa .int64 ()))
1205+ df_filled = df_with_nulls .fill_null (0 )
1206+ result = df_filled .to_pydict ()
1207+ assert result ["d" ] == [0 , 0 , 0 ]
1208+
1209+ # Test filling nulls with string value
1210+ df_with_nulls = df .with_column ("d" , literal (None ).cast (pa .int64 ()))
1211+ df_filled = df_with_nulls .fill_null ("missing" )
1212+ result = df_filled .to_pydict ()
1213+ assert result ["e" ] == ["missing" , "missing" , "missing" ]
1214+
1215+ # Test filling nulls with subset of columns
1216+ df_with_nulls = df .with_columns (
1217+ literal (None ).alias ("d" ),
1218+ literal (None ).alias ("e" ),
1219+ )
1220+ df_filled = df_with_nulls .fill_null ("missing" , subset = ["e" ])
1221+ result = df_filled .to_pydict ()
1222+ assert result ["d" ] == [None , None , None ]
1223+ assert result ["e" ] == ["missing" , "missing" , "missing" ]
1224+
1225+ # Test filling nulls with value that cannot be cast to column type
1226+ df_with_nulls = df .with_column ("d" , literal (None ))
1227+ df_filled = df_with_nulls .fill_null ("invalid" )
1228+ result = df_filled .to_pydict ()
1229+ assert result ["d" ] == [None , None , None ]
1230+
1231+ # Test filling nulls with value that can be cast to some columns but not others
1232+ df_with_nulls = df .with_columns (
1233+ literal (None ).alias ("d" ),
1234+ literal (None ).alias ("e" ),
1235+ )
1236+ df_filled = df_with_nulls .fill_null (0 )
1237+ result = df_filled .to_pydict ()
1238+ assert result ["d" ] == [0 , 0 , 0 ]
1239+ assert result ["e" ] == [None , None , None ]
1240+
1241+ # Test filling nulls with subset of columns where some casts fail
1242+ df_with_nulls = df .with_columns (
1243+ literal (None ).alias ("d" ),
1244+ literal (None ).alias ("e" ),
1245+ )
1246+ df_filled = df_with_nulls .fill_null (0 , subset = ["d" , "e" ])
1247+ result = df_filled .to_pydict ()
1248+ assert result ["d" ] == [0 , 0 , 0 ]
1249+ assert result ["e" ] == [None , None , None ]
1250+
1251+ # Test filling nulls with subset of columns where all casts succeed
1252+ df_with_nulls = df .with_columns (
1253+ literal (None ).alias ("d" ),
1254+ literal (None ).alias ("e" ),
1255+ )
1256+ df_filled = df_with_nulls .fill_null ("missing" , subset = ["e" ])
1257+ result = df_filled .to_pydict ()
1258+ assert result ["d" ] == [None , None , None ]
1259+ assert result ["e" ] == ["missing" , "missing" , "missing" ]
1260+
1261+ # Test filling nulls with subset of columns where some columns do not exist
1262+ df_with_nulls = df .with_columns (
1263+ literal (None ).alias ("d" ),
1264+ literal (None ).alias ("e" ),
1265+ )
1266+ with pytest .raises (ValueError , match = "Column 'f' not found in DataFrame" ):
1267+ df_with_nulls .fill_null ("missing" , subset = ["e" , "f" ])
0 commit comments