Skip to content

Commit 6581b4f

Browse files
timsaucerclaude
andcommitted
Consolidate array/list function tests using pytest parametrize
Reduce 26 individual tests to 14 test functions with parametrized cases, eliminating boilerplate while maintaining full coverage. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 84d7393 commit 6581b4f

File tree

1 file changed

+61
-112
lines changed

1 file changed

+61
-112
lines changed

python/tests/test_functions.py

Lines changed: 61 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -1481,94 +1481,62 @@ def test_array_any_value():
14811481
assert values[2] == 1
14821482

14831483

1484-
def test_list_any_value():
1484+
@pytest.mark.parametrize("func", [f.array_any_value, f.list_any_value])
1485+
def test_any_value_aliases(func):
14851486
ctx = SessionContext()
14861487
df = ctx.from_pydict({"a": [[None, 5]]})
1487-
result = df.select(f.list_any_value(column("a")).alias("v")).collect()
1488+
result = df.select(func(column("a")).alias("v")).collect()
14881489
assert result[0].column(0)[0].as_py() == 5
14891490

14901491

1491-
def test_array_distance():
1492+
@pytest.mark.parametrize("func", [f.array_distance, f.list_distance])
1493+
def test_array_distance_aliases(func):
14921494
ctx = SessionContext()
14931495
df = ctx.from_pydict({"a": [[1.0, 2.0]], "b": [[1.0, 4.0]]})
1494-
result = df.select(f.array_distance(column("a"), column("b")).alias("v")).collect()
1496+
result = df.select(func(column("a"), column("b")).alias("v")).collect()
14951497
assert result[0].column(0)[0].as_py() == pytest.approx(2.0)
14961498

14971499

1498-
def test_list_distance():
1499-
ctx = SessionContext()
1500-
df = ctx.from_pydict({"a": [[3.0, 0.0]], "b": [[0.0, 4.0]]})
1501-
result = df.select(f.list_distance(column("a"), column("b")).alias("v")).collect()
1502-
assert result[0].column(0)[0].as_py() == pytest.approx(5.0)
1503-
1504-
1505-
def test_array_max():
1506-
ctx = SessionContext()
1507-
df = ctx.from_pydict({"a": [[1, 5, 3], [10, 2]]})
1508-
result = df.select(f.array_max(column("a")).alias("v")).collect()
1509-
values = [row.as_py() for row in result[0].column(0)]
1510-
assert values == [5, 10]
1511-
1512-
1513-
def test_list_max():
1514-
ctx = SessionContext()
1515-
df = ctx.from_pydict({"a": [[7, 2, 9]]})
1516-
result = df.select(f.list_max(column("a")).alias("v")).collect()
1517-
assert result[0].column(0)[0].as_py() == 9
1518-
1519-
1520-
def test_array_min():
1500+
@pytest.mark.parametrize(
1501+
("func", "expected"),
1502+
[
1503+
(f.array_max, [5, 10]),
1504+
(f.list_max, [5, 10]),
1505+
(f.array_min, [1, 2]),
1506+
(f.list_min, [1, 2]),
1507+
],
1508+
)
1509+
def test_array_min_max(func, expected):
15211510
ctx = SessionContext()
15221511
df = ctx.from_pydict({"a": [[1, 5, 3], [10, 2]]})
1523-
result = df.select(f.array_min(column("a")).alias("v")).collect()
1512+
result = df.select(func(column("a")).alias("v")).collect()
15241513
values = [row.as_py() for row in result[0].column(0)]
1525-
assert values == [1, 2]
1514+
assert values == expected
15261515

15271516

1528-
def test_list_min():
1529-
ctx = SessionContext()
1530-
df = ctx.from_pydict({"a": [[7, 2, 9]]})
1531-
result = df.select(f.list_min(column("a")).alias("v")).collect()
1532-
assert result[0].column(0)[0].as_py() == 2
1533-
1534-
1535-
def test_array_reverse():
1517+
@pytest.mark.parametrize("func", [f.array_reverse, f.list_reverse])
1518+
def test_array_reverse_aliases(func):
15361519
ctx = SessionContext()
15371520
df = ctx.from_pydict({"a": [[1, 2, 3], [4, 5]]})
1538-
result = df.select(f.array_reverse(column("a")).alias("v")).collect()
1521+
result = df.select(func(column("a")).alias("v")).collect()
15391522
values = [row.as_py() for row in result[0].column(0)]
15401523
assert values == [[3, 2, 1], [5, 4]]
15411524

15421525

1543-
def test_list_reverse():
1544-
ctx = SessionContext()
1545-
df = ctx.from_pydict({"a": [[10, 20, 30]]})
1546-
result = df.select(f.list_reverse(column("a")).alias("v")).collect()
1547-
assert result[0].column(0)[0].as_py() == [30, 20, 10]
1548-
1549-
1550-
def test_arrays_zip():
1551-
ctx = SessionContext()
1552-
df = ctx.from_pydict({"a": [[1, 2]], "b": [[3, 4]]})
1553-
result = df.select(f.arrays_zip(column("a"), column("b")).alias("v")).collect()
1554-
values = result[0].column(0)[0].as_py()
1555-
assert values == [{"c0": 1, "c1": 3}, {"c0": 2, "c1": 4}]
1556-
1557-
1558-
def test_list_zip():
1526+
@pytest.mark.parametrize("func", [f.arrays_zip, f.list_zip])
1527+
def test_arrays_zip_aliases(func):
15591528
ctx = SessionContext()
15601529
df = ctx.from_pydict({"a": [[1, 2]], "b": [[3, 4]]})
1561-
result = df.select(f.list_zip(column("a"), column("b")).alias("v")).collect()
1530+
result = df.select(func(column("a"), column("b")).alias("v")).collect()
15621531
values = result[0].column(0)[0].as_py()
15631532
assert values == [{"c0": 1, "c1": 3}, {"c0": 2, "c1": 4}]
15641533

15651534

1566-
def test_string_to_array():
1535+
@pytest.mark.parametrize("func", [f.string_to_array, f.string_to_list])
1536+
def test_string_to_array_aliases(func):
15671537
ctx = SessionContext()
15681538
df = ctx.from_pydict({"a": ["hello,world,foo"]})
1569-
result = df.select(
1570-
f.string_to_array(column("a"), literal(",")).alias("v")
1571-
).collect()
1539+
result = df.select(func(column("a"), literal(",")).alias("v")).collect()
15721540
assert result[0].column(0)[0].as_py() == ["hello", "world", "foo"]
15731541

15741542

@@ -1582,17 +1550,11 @@ def test_string_to_array_with_null_string():
15821550
assert values == ["hello", None, "world"]
15831551

15841552

1585-
def test_string_to_list():
1586-
ctx = SessionContext()
1587-
df = ctx.from_pydict({"a": ["a-b-c"]})
1588-
result = df.select(f.string_to_list(column("a"), literal("-")).alias("v")).collect()
1589-
assert result[0].column(0)[0].as_py() == ["a", "b", "c"]
1590-
1591-
1592-
def test_gen_series():
1553+
@pytest.mark.parametrize("func", [f.gen_series, f.generate_series])
1554+
def test_gen_series_aliases(func):
15931555
ctx = SessionContext()
15941556
df = ctx.from_pydict({"a": [0]})
1595-
result = df.select(f.gen_series(literal(1), literal(5)).alias("v")).collect()
1557+
result = df.select(func(literal(1), literal(5)).alias("v")).collect()
15961558
assert result[0].column(0)[0].as_py() == [1, 2, 3, 4, 5]
15971559

15981560

@@ -1605,25 +1567,37 @@ def test_gen_series_with_step():
16051567
assert result[0].column(0)[0].as_py() == [1, 4, 7, 10]
16061568

16071569

1608-
def test_generate_series():
1570+
@pytest.mark.parametrize(
1571+
("func", "element", "expected"),
1572+
[
1573+
(f.array_contains, literal(2), True),
1574+
(f.list_contains, literal(99), False),
1575+
(f.list_has, literal(2), True),
1576+
],
1577+
)
1578+
def test_element_containment(func, element, expected):
16091579
ctx = SessionContext()
1610-
df = ctx.from_pydict({"a": [0]})
1611-
result = df.select(f.generate_series(literal(1), literal(3)).alias("v")).collect()
1612-
assert result[0].column(0)[0].as_py() == [1, 2, 3]
1580+
df = ctx.from_pydict({"a": [[1, 2, 3]]})
1581+
result = df.select(func(column("a"), element).alias("v")).collect()
1582+
assert result[0].column(0)[0].as_py() is expected
16131583

16141584

1615-
def test_array_contains():
1585+
def test_list_has_all():
16161586
ctx = SessionContext()
16171587
df = ctx.from_pydict({"a": [[1, 2, 3]]})
1618-
result = df.select(f.array_contains(column("a"), literal(2)).alias("v")).collect()
1588+
result = df.select(
1589+
f.list_has_all(column("a"), f.make_array(literal(1), literal(2))).alias("v")
1590+
).collect()
16191591
assert result[0].column(0)[0].as_py() is True
16201592

16211593

1622-
def test_list_contains():
1594+
def test_list_has_any():
16231595
ctx = SessionContext()
16241596
df = ctx.from_pydict({"a": [[1, 2, 3]]})
1625-
result = df.select(f.list_contains(column("a"), literal(99)).alias("v")).collect()
1626-
assert result[0].column(0)[0].as_py() is False
1597+
result = df.select(
1598+
f.list_has_any(column("a"), f.make_array(literal(5), literal(2))).alias("v")
1599+
).collect()
1600+
assert result[0].column(0)[0].as_py() is True
16271601

16281602

16291603
def test_list_empty():
@@ -1634,40 +1608,15 @@ def test_list_empty():
16341608
assert values == [True, False]
16351609

16361610

1637-
def test_list_pop_back():
1638-
ctx = SessionContext()
1639-
df = ctx.from_pydict({"a": [[1, 2, 3]]})
1640-
result = df.select(f.list_pop_back(column("a")).alias("v")).collect()
1641-
assert result[0].column(0)[0].as_py() == [1, 2]
1642-
1643-
1644-
def test_list_pop_front():
1645-
ctx = SessionContext()
1646-
df = ctx.from_pydict({"a": [[1, 2, 3]]})
1647-
result = df.select(f.list_pop_front(column("a")).alias("v")).collect()
1648-
assert result[0].column(0)[0].as_py() == [2, 3]
1649-
1650-
1651-
def test_list_has():
1652-
ctx = SessionContext()
1653-
df = ctx.from_pydict({"a": [[1, 2, 3]]})
1654-
result = df.select(f.list_has(column("a"), literal(2)).alias("v")).collect()
1655-
assert result[0].column(0)[0].as_py() is True
1656-
1657-
1658-
def test_list_has_all():
1659-
ctx = SessionContext()
1660-
df = ctx.from_pydict({"a": [[1, 2, 3]]})
1661-
result = df.select(
1662-
f.list_has_all(column("a"), f.make_array(literal(1), literal(2))).alias("v")
1663-
).collect()
1664-
assert result[0].column(0)[0].as_py() is True
1665-
1666-
1667-
def test_list_has_any():
1611+
@pytest.mark.parametrize(
1612+
("func", "expected"),
1613+
[
1614+
(f.list_pop_back, [1, 2]),
1615+
(f.list_pop_front, [2, 3]),
1616+
],
1617+
)
1618+
def test_list_pop(func, expected):
16681619
ctx = SessionContext()
16691620
df = ctx.from_pydict({"a": [[1, 2, 3]]})
1670-
result = df.select(
1671-
f.list_has_any(column("a"), f.make_array(literal(5), literal(2))).alias("v")
1672-
).collect()
1673-
assert result[0].column(0)[0].as_py() is True
1621+
result = df.select(func(column("a")).alias("v")).collect()
1622+
assert result[0].column(0)[0].as_py() == expected

0 commit comments

Comments
 (0)