Skip to content

Commit 52091ce

Browse files
committed
fix: Update record batch display logic to use min_table_rows from config
1 parent 727914d commit 52091ce

File tree

2 files changed

+10
-52
lines changed

2 files changed

+10
-52
lines changed

python/tests/test_dataframe.py

Lines changed: 4 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1478,63 +1478,16 @@ def _count_lines_in_str(repr_str):
14781478
Returns:
14791479
Number of rows that appear in the string representation.
14801480
"""
1481-
# Find all lines that match the pattern of a number at the beginning of a row
1482-
# This is more robust than checking for specific numbers
1481+
# DataFrame tables are formatted with | value | patterns
1482+
# Count lines that match actual data rows (not headers or separators)
14831483
value_lines = 0
14841484
for line in repr_str.split("\n"):
1485-
# Look for lines that contain numeric values (row data)
1486-
if re.search(r"^\s*\d+\s", line):
1485+
# Look for lines like "| 0 |", "| 1 |", etc.
1486+
if re.search(r"\|\s*\d+\s*\|", line):
14871487
value_lines += 1
14881488
return value_lines
14891489

14901490

1491-
def test_display_config_integrated(ctx):
1492-
"""Test all display config options together in an integrated test."""
1493-
# Create a dataframe with:
1494-
# - Many rows (to test min_table_rows)
1495-
# - Large data (to test max_table_bytes)
1496-
# - Long strings (to test max_cell_length)
1497-
rows = 50
1498-
ids = list(range(rows))
1499-
# Generate strings of increasing length
1500-
texts = [f"{'A' * i}" for i in range(1, rows + 1)]
1501-
1502-
batch = pa.RecordBatch.from_arrays(
1503-
[pa.array(ids), pa.array(texts)], names=["id", "text"]
1504-
)
1505-
1506-
df = ctx.create_dataframe([[batch]])
1507-
1508-
# Set custom display configuration
1509-
df.configure_display(
1510-
max_table_bytes=2000, # Limit bytes to display
1511-
min_table_rows=15, # Show at least 15 rows
1512-
max_cell_length=10, # Truncate cells longer than 10 chars
1513-
)
1514-
1515-
# Get HTML representation
1516-
html_output = df._repr_html_()
1517-
1518-
# Check row count
1519-
row_count = html_output.count("<tr>") - 1 # subtract header
1520-
assert row_count >= 15, f"Should display at least 15 rows, got {row_count}"
1521-
1522-
# Check for truncation
1523-
assert "expandable-container" in html_output
1524-
assert "expand-btn" in html_output
1525-
1526-
# Should be truncated (not all rows displayed)
1527-
assert "Data truncated" in html_output
1528-
1529-
# Now with default settings
1530-
df.reset_display_config()
1531-
default_html = df._repr_html_()
1532-
default_row_count = default_html.count("<tr>") - 1
1533-
1534-
# Default settings should show more data
1535-
assert default_row_count > row_count
1536-
1537-
15381491
def _create_numeric_test_df(ctx, rows):
15391492
"""Create a test dataframe with numeric values from 0 to rows-1.
15401493

src/dataframe.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,12 @@ impl PyDataFrame {
162162
fn __repr__(&self, py: Python) -> PyDataFusionResult<String> {
163163
let (batches, has_more) = wait_for_future(
164164
py,
165-
collect_record_batches_to_display(self.df.as_ref().clone(), 10, 10, &self.config),
165+
collect_record_batches_to_display(
166+
self.df.as_ref().clone(),
167+
self.config.min_table_rows,
168+
10,
169+
&self.config,
170+
),
166171
)?;
167172
if batches.is_empty() {
168173
// This should not be reached, but do it for safety since we index into the vector below

0 commit comments

Comments
 (0)