@@ -117,6 +117,22 @@ def clean_formatter_state():
117117 reset_formatter ()
118118
119119
120+ @pytest .fixture
121+ def null_df ():
122+ """Create a DataFrame with null values of different types."""
123+ ctx = SessionContext ()
124+
125+ # Create a RecordBatch with nulls across different types
126+ batch = pa .RecordBatch .from_arrays ([
127+ pa .array ([1 , None , 3 , None ], type = pa .int64 ()),
128+ pa .array ([4.5 , 6.7 , None , None ], type = pa .float64 ()),
129+ pa .array (["a" , None , "c" , None ], type = pa .string ()),
130+ pa .array ([True , None , False , None ], type = pa .bool_ ()),
131+ ], names = ["int_col" , "float_col" , "str_col" , "bool_col" ])
132+
133+ return ctx .create_dataframe ([[batch ]])
134+
135+
120136def test_select (df ):
121137 df_1 = df .select (
122138 column ("a" ) + column ("b" ),
@@ -1642,8 +1658,112 @@ def test_html_formatter_manual_format_html(clean_formatter_state):
16421658 local_formatter = DataFrameHtmlFormatter (use_shared_styles = False )
16431659
16441660 # Both calls should include styles
1661+
16451662 local_html_1 = local_formatter .format_html ([batch ], batch .schema )
16461663 local_html_2 = local_formatter .format_html ([batch ], batch .schema )
16471664
16481665 assert "<style>" in local_html_1
16491666 assert "<style>" in local_html_2
1667+
1668+
1669+ def test_fill_null_basic (null_df ):
1670+ """Test basic fill_null functionality with a single value."""
1671+ # Fill all nulls with 0
1672+ filled_df = null_df .fill_null (0 )
1673+
1674+ result = filled_df .collect ()[0 ]
1675+
1676+ # Check that nulls were filled with 0 (or equivalent)
1677+ assert result .column (0 ).to_pylist () == [1 , 0 , 3 , 0 ]
1678+ assert result .column (1 ).to_pylist () == [4.5 , 6.7 , 0.0 , 0.0 ]
1679+ # String column should be filled with "0"
1680+ assert result .column (2 ).to_pylist () == ["a" , "0" , "c" , "0" ]
1681+ # Boolean column should be filled with False (0 converted to bool)
1682+ assert result .column (3 ).to_pylist () == [True , False , False , False ]
1683+
1684+
1685+ def test_fill_null_subset (null_df ):
1686+ """Test filling nulls only in a subset of columns."""
1687+ # Fill nulls only in numeric columns
1688+ filled_df = null_df .fill_null (0 , subset = ["int_col" , "float_col" ])
1689+
1690+ result = filled_df .collect ()[0 ]
1691+
1692+ # Check that nulls were filled only in specified columns
1693+ assert result .column (0 ).to_pylist () == [1 , 0 , 3 , 0 ]
1694+ assert result .column (1 ).to_pylist () == [4.5 , 6.7 , 0.0 , 0.0 ]
1695+ # These should still have nulls
1696+ assert None in result .column (2 ).to_pylist ()
1697+ assert None in result .column (3 ).to_pylist ()
1698+
1699+
1700+ def test_fill_null_specific_types (null_df ):
1701+ """Test filling nulls with type-appropriate values."""
1702+ # Fill with type-specific values
1703+ filled_df = null_df .fill_null ("missing" )
1704+
1705+ result = filled_df .collect ()[0 ]
1706+
1707+ # Check that nulls were filled appropriately by type
1708+ assert result .column (0 ).to_pylist () == [1 , 0 , 3 , 0 ] # Int gets 0 from "missing" conversion
1709+ assert result .column (1 ).to_pylist () == [4.5 , 6.7 , 0.0 , 0.0 ] # Float gets 0.0
1710+ assert result .column (2 ).to_pylist () == ["a" , "missing" , "c" , "missing" ] # String gets "missing"
1711+ assert result .column (3 ).to_pylist () == [True , False , False , False ] # Bool gets False
1712+
1713+
1714+ def test_fill_null_immutability (null_df ):
1715+ """Test that original DataFrame is unchanged after fill_null."""
1716+ # Get original values with nulls
1717+ original = null_df .collect ()[0 ]
1718+ original_int_nulls = original .column (0 ).to_pylist ().count (None )
1719+
1720+ # Apply fill_null
1721+ filled_df = null_df .fill_null (0 )
1722+
1723+ # Check that original is unchanged
1724+ new_original = null_df .collect ()[0 ]
1725+ new_original_int_nulls = new_original .column (0 ).to_pylist ().count (None )
1726+
1727+ assert original_int_nulls == new_original_int_nulls
1728+ assert original_int_nulls > 0 # Ensure we actually had nulls in the first place
1729+
1730+
1731+ def test_fill_null_empty_df (ctx ):
1732+ """Test fill_null on empty DataFrame."""
1733+ # Create an empty DataFrame with schema
1734+ batch = pa .RecordBatch .from_arrays (
1735+ [pa .array ([], type = pa .int64 ()), pa .array ([], type = pa .string ())],
1736+ names = ["a" , "b" ]
1737+ )
1738+ empty_df = ctx .create_dataframe ([[batch ]])
1739+
1740+ # Fill nulls (should work without errors)
1741+ filled_df = empty_df .fill_null (0 )
1742+
1743+ # Should still be empty but with same schema
1744+ result = filled_df .collect ()[0 ]
1745+ assert len (result .column (0 )) == 0
1746+ assert len (result .column (1 )) == 0
1747+ assert result .schema .field (0 ).name == "a"
1748+ assert result .schema .field (1 ).name == "b"
1749+
1750+
1751+ def test_fill_null_all_null_column (ctx ):
1752+ """Test fill_null on a column with all nulls."""
1753+ # Create DataFrame with a column of all nulls
1754+ batch = pa .RecordBatch .from_arrays (
1755+ [pa .array ([1 , 2 , 3 ]), pa .array ([None , None , None ], type = pa .string ())],
1756+ names = ["a" , "b" ]
1757+ )
1758+ all_null_df = ctx .create_dataframe ([[batch ]])
1759+
1760+ # Fill nulls with a value
1761+ filled_df = all_null_df .fill_null ("filled" )
1762+
1763+ # Check that all nulls were filled
1764+ result = filled_df .collect ()[0 ]
1765+ assert result .column (1 ).to_pylist () == ["filled" , "filled" , "filled" ]
1766+
1767+ # Original should be unchanged
1768+ original = all_null_df .collect ()[0 ]
1769+ assert original .column (1 ).null_count == 3
0 commit comments