Skip to content

Commit ced084c

Browse files
committed
Add fixture for empty DataFrame and parameterize repr_html test
- Introduced `empty_df` fixture to create an empty RecordBatch with the same schema as existing DataFrames. - Parameterized `test_dataframe_repr_html` to test multiple DataFrame scenarios, including the new empty DataFrame. - Updated assertions to ensure correct HTML representation for both empty and non-empty DataFrames.
1 parent d034685 commit ced084c

File tree

2 files changed

+223
-127
lines changed

2 files changed

+223
-127
lines changed

python/tests/test_dataframe.py

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from datafusion import functions as f
3232
from datafusion.expr import Window
3333
from pyarrow.csv import write_csv
34+
from tests.generic import data
3435

3536

3637
@pytest.fixture
@@ -51,6 +52,23 @@ def df():
5152
return ctx.from_arrow(batch)
5253

5354

55+
@pytest.fixture
56+
def empty_df():
57+
ctx = SessionContext()
58+
59+
# Create an empty RecordBatch with the same schema as df
60+
batch = pa.RecordBatch.from_arrays(
61+
[
62+
pa.array([], type=pa.int64()),
63+
pa.array([], type=pa.int64()),
64+
pa.array([], type=pa.int64()),
65+
],
66+
names=["a", "b", "c"],
67+
)
68+
69+
return ctx.from_arrow(batch)
70+
71+
5472
@pytest.fixture
5573
def struct_df():
5674
ctx = SessionContext()
@@ -1189,7 +1207,12 @@ def add_with_parameter(df_internal, value: Any) -> DataFrame:
11891207
assert result["new_col"] == [3 for _i in range(3)]
11901208

11911209

1192-
def test_dataframe_repr_html(df) -> None:
1210+
@pytest.mark.parametrize(
1211+
"dataframe_fixture",
1212+
["empty_df", "df", "nested_df", "struct_df", "partitioned_df", "aggregate_df"],
1213+
)
1214+
def test_dataframe_repr_html(request, dataframe_fixture) -> None:
1215+
df = request.getfixturevalue(dataframe_fixture)
11931216
output = df._repr_html_()
11941217

11951218
# Since we've added a fair bit of processing to the html output, lets just verify
@@ -1202,7 +1225,15 @@ def test_dataframe_repr_html(df) -> None:
12021225
header_pattern = "(.*?)".join(headers)
12031226
assert len(re.findall(header_pattern, output, re.DOTALL)) == 1
12041227

1205-
body_data = [[1, 4, 8], [2, 5, 5], [3, 6, 8]]
1206-
body_lines = [f"<td(.*?)>{v}</td>" for inner in body_data for v in inner]
1207-
body_pattern = "(.*?)".join(body_lines)
1208-
assert len(re.findall(body_pattern, output, re.DOTALL)) == 1
1228+
if dataframe_fixture == "empty_df":
1229+
# For empty dataframe, make sure we don't have any data cells but do have header
1230+
assert "<tbody>" in output
1231+
assert "</tbody>" in output
1232+
# Check there are no data cells
1233+
assert len(re.findall(r"<td.*?>.*?</td>", output, re.DOTALL)) == 0
1234+
else:
1235+
# For non-empty dataframe, verify data rows
1236+
body_data = [[1, 4, 8], [2, 5, 5], [3, 6, 8]]
1237+
body_lines = [f"<td(.*?)>{v}</td>" for inner in body_data for v in inner]
1238+
body_pattern = "(.*?)".join(body_lines)
1239+
assert len(re.findall(body_pattern, output, re.DOTALL)) == 1

0 commit comments

Comments
 (0)