@@ -1433,40 +1433,59 @@ def test_max_cell_length_display(ctx):
14331433def test_display_config_repr_string (ctx ):
14341434 """Test that __repr__ respects display configuration."""
14351435 # Create a dataframe with more rows than we want to show
1436- rows = 30
1437- data = list (range (rows ))
1438- batch = pa .RecordBatch .from_arrays ([pa .array (data )], names = ["values" ])
1439- df = ctx .create_dataframe ([[batch ]])
1436+ # df.__repr__ returns max 10 rows only, so we start test with 7 rows
1437+ rows = 7
1438+ df = _create_numeric_test_df (ctx , rows )
14401439
14411440 # Configure to show only 5 rows in string representation
1442- df .configure_display (min_table_rows = 5 )
1441+ min_table_rows_in_display = 5
1442+ df .configure_display (min_table_rows = min_table_rows_in_display )
14431443
14441444 # Get the string representation
14451445 repr_str = df .__repr__ ()
14461446
1447- # The string should contain "Data truncated"
1448- assert "Data truncated" in repr_str
1449-
1450- # Count the number of rows (each value should be on a separate line)
1451- # This is an approximation since we don't parse the actual ASCII table
1452- value_lines = 0
1453- for i in range (rows ):
1454- if str (i ) in repr_str :
1455- value_lines += 1
1447+ # Count the number of rows using helper function
1448+ lines_count = _count_lines_in_str (repr_str )
14561449
14571450 # Should be fewer rows than the total
1458- assert value_lines < rows
1451+ assert lines_count <= rows
1452+ assert lines_count >= min_table_rows_in_display
14591453
14601454 # Now set min_rows higher and see if more rows appear
1461- df .configure_display (min_table_rows = 20 )
1455+ min_table_rows_in_display = 7
1456+ rows = 11
1457+ df = _create_numeric_test_df (ctx , rows ) # Recreate to reset the state
1458+ df .configure_display (min_table_rows = min_table_rows_in_display )
1459+
14621460 repr_str_more = df .__repr__ ()
1461+ # The string should contain "Data truncated"
1462+ assert "Data truncated" in repr_str_more
1463+
1464+ # Count lines again
1465+ lines_count2 = _count_lines_in_str (repr_str_more )
1466+
1467+ # Should show more rows now
1468+ assert lines_count2 > lines_count
1469+ assert lines_count2 >= min_table_rows_in_display
14631470
1464- value_lines_more = 0
1465- for i in range (rows ):
1466- if str (i ) in repr_str_more :
1467- value_lines_more += 1
14681471
1469- assert value_lines_more > value_lines
1472+ def _count_lines_in_str (repr_str ):
1473+ """Count the number of rows displayed in a string representation.
1474+
1475+ Args:
1476+ repr_str: String representation of the DataFrame.
1477+
1478+ Returns:
1479+ Number of rows that appear in the string representation.
1480+ """
1481+ # Find all lines that match the pattern of a number at the beginning of a row
1482+ # This is more robust than checking for specific numbers
1483+ value_lines = 0
1484+ for line in repr_str .split ("\n " ):
1485+ # Look for lines that contain numeric values (row data)
1486+ if re .search (r"^\s*\d+\s" , line ):
1487+ value_lines += 1
1488+ return value_lines
14701489
14711490
14721491def test_display_config_integrated (ctx ):
@@ -1514,3 +1533,18 @@ def test_display_config_integrated(ctx):
15141533
15151534 # Default settings should show more data
15161535 assert default_row_count > row_count
1536+
1537+
1538+ def _create_numeric_test_df (ctx , rows ):
1539+ """Create a test dataframe with numeric values from 0 to rows-1.
1540+
1541+ Args:
1542+ ctx: SessionContext to use for creating the dataframe.
1543+ rows: Number of rows to create.
1544+
1545+ Returns:
1546+ DataFrame with a single column "values" containing numbers 0 to rows-1.
1547+ """
1548+ data = list (range (rows ))
1549+ batch = pa .RecordBatch .from_arrays ([pa .array (data )], names = ["values" ])
1550+ return ctx .create_dataframe ([[batch ]])
0 commit comments