Add unit test to check field inputs

timsaucer · timsaucer · commit f1cf7a185fe2 · 2026-02-04T07:44:29.000-05:00
diff --git a/python/tests/test_udf.py b/python/tests/test_udf.py
@@ -17,7 +17,7 @@
 
 import pyarrow as pa
 import pytest
-from datafusion import column, udf
+from datafusion import SessionContext, column, udf
 from datafusion import functions as f
 
 
@@ -148,3 +148,63 @@ def uuid_version(uuid):
     )
 
     assert results[0][0].to_pylist() == [4, 4, 4, 4, 4]
+
+
+def test_udf_with_nullability(ctx: SessionContext) -> None:
+    import pyarrow.compute as pc
+
+    field_nullable_i64 = pa.field("with_nulls", type=pa.int64(), nullable=True)
+    field_non_nullable_i64 = pa.field("no_nulls", type=pa.int64(), nullable=False)
+
+    @udf([field_nullable_i64], field_nullable_i64, "stable")
+    def nullable_abs(input_col):
+        return pc.abs(input_col)
+
+    @udf([field_non_nullable_i64], field_non_nullable_i64, "stable")
+    def non_nullable_abs(input_col):
+        return pc.abs(input_col)
+
+    batch = pa.record_batch(
+        {
+            "with_nulls": pa.array([-2, None, 0, 1, 2]),
+            "no_nulls": pa.array([-2, -1, 0, 1, 2]),
+        },
+        schema=pa.schema(
+            [
+                field_nullable_i64,
+                field_non_nullable_i64,
+            ]
+        ),
+    )
+    ctx.register_record_batches("t", [[batch]])
+    df = ctx.table("t")
+
+    # Input matches expected, nullable
+    df_result = df.select(nullable_abs(column("with_nulls")))
+    returned_field = df_result.schema().field(0)
+    assert returned_field.nullable
+    results = df_result.collect()
+    assert results[0][0].to_pylist() == [2, None, 0, 1, 2]
+
+    # Input coercible to expected, nullable
+    df_result = df.select(nullable_abs(column("no_nulls")))
+    returned_field = df_result.schema().field(0)
+    assert returned_field.nullable
+    results = df_result.collect()
+    assert results[0][0].to_pylist() == [2, 1, 0, 1, 2]
+
+    # Input matches expected, no nulls
+    df_result = df.select(non_nullable_abs(column("no_nulls")))
+    returned_field = df_result.schema().field(0)
+    assert not returned_field.nullable
+    results = df_result.collect()
+    assert results[0][0].to_pylist() == [2, 1, 0, 1, 2]
+
+    # Invalid - requires non-nullable input but that is not possible
+    df_result = df.select(non_nullable_abs(column("with_nulls")))
+    returned_field = df_result.schema().field(0)
+    assert not returned_field.nullable
+
+    with pytest.raises(Exception) as e_info:
+        _results = df_result.collect()
+    assert "InvalidArgumentError" in str(e_info)