From adb7b5d07d0fe637f5190cafbed261c3703f30fb Mon Sep 17 00:00:00 2001
From: Min-Gu Yoo <mgyoo86@gmail.com>
Date: Tue, 16 Dec 2025 11:05:10 -0800
Subject: [PATCH 01/20] feat: add convenience functions zeros!, ones!, similar!

- Add zeros!(pool, [T], dims...) for zero-initialized arrays
- Add ones!(pool, [T], dims...) for one-initialized arrays
- Add similar!(pool, array, [T], [dims...]) for template-based allocation
- Update macros.jl with _impl! transformation for macro optimization
- Support typed checkpoint extraction for convenience functions
---
 src/AdaptiveArrayPools.jl |   4 +
 src/convenience.jl        | 218 ++++++++++++++++++++++++++++++++++++++
 src/macros.jl             | 123 ++++++++++++++++-----
 3 files changed, 319 insertions(+), 26 deletions(-)
 create mode 100644 src/convenience.jl

diff --git a/src/AdaptiveArrayPools.jl b/src/AdaptiveArrayPools.jl
index a23970b..c02afdf 100644
--- a/src/AdaptiveArrayPools.jl
+++ b/src/AdaptiveArrayPools.jl
@@ -5,6 +5,7 @@ using Printf
 # Public API
 export AdaptiveArrayPool, acquire!, unsafe_acquire!, pool_stats, get_task_local_pool
 export acquire_view!, acquire_array!  # Explicit naming aliases
+export zeros!, ones!, similar!  # Convenience functions
 export @with_pool, @maybe_with_pool
 export USE_POOLING, MAYBE_POOLING_ENABLED, POOL_DEBUG
 export checkpoint!, rewind!, reset!
@@ -24,6 +25,9 @@ include("utils.jl")
 # Acquisition operations: get_view!, acquire!, unsafe_acquire!, aliases
 include("acquire.jl")
 
+# Convenience functions: zeros!, ones!, similar!
+include("convenience.jl")
+
 # State management: checkpoint!, rewind!, reset!, empty!
 include("state.jl")
 
diff --git a/src/convenience.jl b/src/convenience.jl
new file mode 100644
index 0000000..421db6b
--- /dev/null
+++ b/src/convenience.jl
@@ -0,0 +1,218 @@
+# ==============================================================================
+# Convenience Functions (zeros!, ones!, similar!)
+# ==============================================================================
+
+# ==============================================================================
+# zeros! - Acquire zero-initialized arrays from pool
+# ==============================================================================
+
+"""
+    zeros!(pool, dims...) -> view
+    zeros!(pool, T, dims...) -> view
+    zeros!(pool, dims::Tuple) -> view
+    zeros!(pool, T, dims::Tuple) -> view
+
+Acquire a zero-initialized array from the pool.
+
+Equivalent to `acquire!(pool, T, dims...)` followed by `fill!(arr, zero(T))`.
+Default element type is `Float64` when not specified.
+
+## Example
+```julia
+@with_pool pool begin
+    v = zeros!(pool, 100)              # Vector{Float64} view, all zeros
+    m = zeros!(pool, Float32, 10, 10)  # Matrix{Float32} view, all zeros
+end
+```
+
+See also: [`ones!`](@ref), [`similar!`](@ref), [`acquire!`](@ref)
+"""
+@inline function zeros!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
+    _mark_untracked!(pool)
+    _zeros_impl!(pool, T, dims...)
+end
+
+@inline function zeros!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
+    _mark_untracked!(pool)
+    _zeros_impl!(pool, Float64, dims...)
+end
+
+@inline function zeros!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N}
+    _mark_untracked!(pool)
+    _zeros_impl!(pool, T, dims...)
+end
+
+@inline function zeros!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N}
+    _mark_untracked!(pool)
+    _zeros_impl!(pool, Float64, dims...)
+end
+
+# Internal implementation (for macro transformation)
+@inline function _zeros_impl!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
+    arr = _acquire_impl!(pool, T, dims...)
+    fill!(arr, zero(T))
+    arr
+end
+
+@inline function _zeros_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N}
+    _zeros_impl!(pool, T, dims...)
+end
+
+# Default type (Float64) overloads for macro transformation
+@inline function _zeros_impl!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
+    _zeros_impl!(pool, Float64, dims...)
+end
+
+@inline function _zeros_impl!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N}
+    _zeros_impl!(pool, Float64, dims...)
+end
+
+# Nothing fallback (pooling disabled)
+@inline zeros!(::Nothing, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = zeros(T, dims...)
+@inline zeros!(::Nothing, dims::Vararg{Int,N}) where {N} = zeros(Float64, dims...)
+@inline zeros!(::Nothing, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = zeros(T, dims...)
+@inline zeros!(::Nothing, dims::NTuple{N,Int}) where {N} = zeros(Float64, dims...)
+
+# ==============================================================================
+# ones! - Acquire one-initialized arrays from pool
+# ==============================================================================
+
+"""
+    ones!(pool, dims...) -> view
+    ones!(pool, T, dims...) -> view
+    ones!(pool, dims::Tuple) -> view
+    ones!(pool, T, dims::Tuple) -> view
+
+Acquire a one-initialized array from the pool.
+
+Equivalent to `acquire!(pool, T, dims...)` followed by `fill!(arr, one(T))`.
+Default element type is `Float64` when not specified.
+
+## Example
+```julia
+@with_pool pool begin
+    v = ones!(pool, 100)              # Vector{Float64} view, all ones
+    m = ones!(pool, Float32, 10, 10)  # Matrix{Float32} view, all ones
+end
+```
+
+See also: [`zeros!`](@ref), [`similar!`](@ref), [`acquire!`](@ref)
+"""
+@inline function ones!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
+    _mark_untracked!(pool)
+    _ones_impl!(pool, T, dims...)
+end
+
+@inline function ones!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
+    _mark_untracked!(pool)
+    _ones_impl!(pool, Float64, dims...)
+end
+
+@inline function ones!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N}
+    _mark_untracked!(pool)
+    _ones_impl!(pool, T, dims...)
+end
+
+@inline function ones!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N}
+    _mark_untracked!(pool)
+    _ones_impl!(pool, Float64, dims...)
+end
+
+# Internal implementation (for macro transformation)
+@inline function _ones_impl!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
+    arr = _acquire_impl!(pool, T, dims...)
+    fill!(arr, one(T))
+    arr
+end
+
+@inline function _ones_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N}
+    _ones_impl!(pool, T, dims...)
+end
+
+# Default type (Float64) overloads for macro transformation
+@inline function _ones_impl!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
+    _ones_impl!(pool, Float64, dims...)
+end
+
+@inline function _ones_impl!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N}
+    _ones_impl!(pool, Float64, dims...)
+end
+
+# Nothing fallback (pooling disabled)
+@inline ones!(::Nothing, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = ones(T, dims...)
+@inline ones!(::Nothing, dims::Vararg{Int,N}) where {N} = ones(Float64, dims...)
+@inline ones!(::Nothing, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = ones(T, dims...)
+@inline ones!(::Nothing, dims::NTuple{N,Int}) where {N} = ones(Float64, dims...)
+
+# ==============================================================================
+# similar! - Acquire arrays with same type/size as template
+# ==============================================================================
+
+"""
+    similar!(pool, array) -> view
+    similar!(pool, array, T) -> view
+    similar!(pool, array, dims...) -> view
+    similar!(pool, array, T, dims...) -> view
+
+Acquire an uninitialized array from the pool, using a template array for defaults.
+
+- `similar!(pool, A)`: same element type and size as `A`
+- `similar!(pool, A, T)`: element type `T`, same size as `A`
+- `similar!(pool, A, dims...)`: same element type as `A`, specified dimensions
+- `similar!(pool, A, T, dims...)`: element type `T`, specified dimensions
+
+## Example
+```julia
+A = rand(10, 10)
+@with_pool pool begin
+    B = similar!(pool, A)              # Same type and size
+    C = similar!(pool, A, Float32)     # Float32, same size
+    D = similar!(pool, A, 5, 5)        # Same type, different size
+    E = similar!(pool, A, Int, 20)     # Int, 1D
+end
+```
+
+See also: [`zeros!`](@ref), [`ones!`](@ref), [`acquire!`](@ref)
+"""
+@inline function similar!(pool::AbstractArrayPool, x::AbstractArray)
+    _mark_untracked!(pool)
+    _similar_impl!(pool, x)
+end
+
+@inline function similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}) where {T}
+    _mark_untracked!(pool)
+    _similar_impl!(pool, x, T)
+end
+
+@inline function similar!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int,N}) where {N}
+    _mark_untracked!(pool)
+    _similar_impl!(pool, x, dims...)
+end
+
+@inline function similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
+    _mark_untracked!(pool)
+    _similar_impl!(pool, x, T, dims...)
+end
+
+# Internal implementation (for macro transformation)
+@inline function _similar_impl!(pool::AbstractArrayPool, x::AbstractArray)
+    _acquire_impl!(pool, eltype(x), size(x))
+end
+
+@inline function _similar_impl!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}) where {T}
+    _acquire_impl!(pool, T, size(x))
+end
+
+@inline function _similar_impl!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int,N}) where {N}
+    _acquire_impl!(pool, eltype(x), dims...)
+end
+
+@inline function _similar_impl!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
+    _acquire_impl!(pool, T, dims...)
+end
+
+# Nothing fallback (pooling disabled)
+@inline similar!(::Nothing, x::AbstractArray) = similar(x)
+@inline similar!(::Nothing, x::AbstractArray, ::Type{T}) where {T} = similar(x, T)
+@inline similar!(::Nothing, x::AbstractArray, dims::Vararg{Int,N}) where {N} = similar(x, dims...)
+@inline similar!(::Nothing, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = similar(x, T, dims...)
diff --git a/src/macros.jl b/src/macros.jl
index 1b47fe1..8f824dc 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -561,46 +561,81 @@ end
 """
     _extract_acquire_types(expr, target_pool) -> Set{Any}
 
-Extract type arguments from acquire function calls in an expression.
+Extract type arguments from acquire/convenience function calls in an expression.
 Only extracts types from calls where the first argument matches `target_pool`.
 This prevents AST pollution when multiple pools are used in the same block.
 
 Supported functions:
 - `acquire!` and its alias `acquire_view!`
 - `unsafe_acquire!` and its alias `acquire_array!`
-
-Handles two forms:
-- `[unsafe_]acquire!(pool, Type, dims...)` (3+ func args): extracts Type (2nd arg) directly
-- `acquire!(pool, x)` (2 func args): generates `eltype(x)` expression for the array
-  (Note: `unsafe_acquire!` / `acquire_array!` does not have the 2-arg form)
+- `zeros!`, `ones!`, `similar!`
+
+Handles various forms:
+- `[unsafe_]acquire!(pool, Type, dims...)`: extracts Type directly
+- `acquire!(pool, x)`: generates `eltype(x)` expression
+- `zeros!(pool, dims...)` / `ones!(pool, dims...)`: Float64 (default)
+- `zeros!(pool, Type, dims...)` / `ones!(pool, Type, dims...)`: extracts Type
+- `similar!(pool, x)`: generates `eltype(x)` expression
+- `similar!(pool, x, Type, ...)`: extracts Type
 """
 function _extract_acquire_types(expr, target_pool, types=Set{Any}())
     if expr isa Expr
-        # Match: acquire!/acquire_view!/unsafe_acquire!/acquire_array!(pool, ...)
+        # Match: function calls with pool argument
         if expr.head == :call && length(expr.args) >= 3
             fn = expr.args[1]
-            # All acquire function names (including aliases)
-            acquire_names = (:acquire!, :unsafe_acquire!, :acquire_view!, :acquire_array!)
-            acquire_quotenodes = (QuoteNode(:acquire!), QuoteNode(:unsafe_acquire!),
-                                  QuoteNode(:acquire_view!), QuoteNode(:acquire_array!))
-            is_acquire = fn in acquire_names ||
-                         (fn isa Expr && fn.head == :. && length(fn.args) >= 2 &&
-                          fn.args[end] in acquire_quotenodes)
-            if is_acquire
-                # Check if the pool argument matches our target pool
-                pool_arg = expr.args[2]
-                if pool_arg == target_pool
-                    nargs = length(expr.args)
+            pool_arg = expr.args[2]
+
+            # Only process if pool argument matches our target pool
+            if pool_arg == target_pool
+                # All acquire function names (including aliases)
+                acquire_names = (:acquire!, :unsafe_acquire!, :acquire_view!, :acquire_array!)
+
+                # Get function name (handle qualified names)
+                fn_name = fn
+                if fn isa Expr && fn.head == :. && length(fn.args) >= 2
+                    qn = fn.args[end]
+                    if qn isa QuoteNode
+                        fn_name = qn.value
+                    end
+                end
+
+                nargs = length(expr.args)
+
+                # acquire!/unsafe_acquire!/acquire_view!/acquire_array!
+                if fn in acquire_names || fn_name in acquire_names
                     if nargs >= 4
                         # acquire!(pool, Type, dims...) - traditional form
-                        type_arg = expr.args[3]
-                        push!(types, type_arg)
+                        push!(types, expr.args[3])
                     elseif nargs == 3
                         # acquire!(pool, x) - similar-style form
-                        # Type is eltype of the array argument
-                        array_arg = expr.args[3]
-                        type_expr = Expr(:call, :eltype, array_arg)
-                        push!(types, type_expr)
+                        push!(types, Expr(:call, :eltype, expr.args[3]))
+                    end
+                # zeros!/ones!
+                elseif fn == :zeros! || fn == :ones! || fn_name == :zeros! || fn_name == :ones!
+                    if nargs >= 3
+                        third_arg = expr.args[3]
+                        # Check if third arg looks like a type (Symbol starting with uppercase or curly)
+                        if _looks_like_type(third_arg)
+                            push!(types, third_arg)
+                        else
+                            # No type specified, default is Float64
+                            push!(types, :Float64)
+                        end
+                    end
+                # similar!
+                elseif fn == :similar! || fn_name == :similar!
+                    if nargs == 3
+                        # similar!(pool, x) - same type as x
+                        push!(types, Expr(:call, :eltype, expr.args[3]))
+                    elseif nargs >= 4
+                        fourth_arg = expr.args[4]
+                        if _looks_like_type(fourth_arg)
+                            # similar!(pool, x, Type, ...) - explicit type
+                            push!(types, fourth_arg)
+                        else
+                            # similar!(pool, x, dims...) - same type as x
+                            push!(types, Expr(:call, :eltype, expr.args[3]))
+                        end
                     end
                 end
             end
@@ -613,6 +648,24 @@ function _extract_acquire_types(expr, target_pool, types=Set{Any}())
     return types
 end
 
+"""
+    _looks_like_type(expr) -> Bool
+
+Heuristic to check if an expression looks like a type.
+Returns true for: uppercase Symbols (Float64, Int), curly expressions (Vector{T}), GlobalRef to types.
+"""
+function _looks_like_type(expr)
+    if expr isa Symbol
+        s = string(expr)
+        return !isempty(s) && isuppercase(first(s))
+    elseif expr isa Expr && expr.head == :curly
+        return true
+    elseif expr isa GlobalRef
+        return true
+    end
+    return false
+end
+
 """
     _uses_local_var(expr, local_vars) -> Bool
 
@@ -733,7 +786,7 @@ end
 """
     _transform_acquire_calls(expr, pool_name) -> Expr
 
-Transform acquire!/unsafe_acquire! calls to their _impl! counterparts.
+Transform acquire!/unsafe_acquire!/convenience function calls to their _impl! counterparts.
 Only transforms calls where the first argument matches `pool_name`.
 
 This allows macro-transformed code to bypass the untracked marking overhead,
@@ -744,11 +797,17 @@ Transformation rules:
 - `acquire_view!(pool, ...)` → `_acquire_impl!(pool, ...)`
 - `unsafe_acquire!(pool, ...)` → `_unsafe_acquire_impl!(pool, ...)`
 - `acquire_array!(pool, ...)` → `_unsafe_acquire_impl!(pool, ...)`
+- `zeros!(pool, ...)` → `_zeros_impl!(pool, ...)`
+- `ones!(pool, ...)` → `_ones_impl!(pool, ...)`
+- `similar!(pool, ...)` → `_similar_impl!(pool, ...)`
 """
 # Module-qualified references for transformed acquire calls
 # Using GlobalRef ensures the function is looked up in AdaptiveArrayPools, not the caller's module
 const _ACQUIRE_IMPL_REF = GlobalRef(@__MODULE__, :_acquire_impl!)
 const _UNSAFE_ACQUIRE_IMPL_REF = GlobalRef(@__MODULE__, :_unsafe_acquire_impl!)
+const _ZEROS_IMPL_REF = GlobalRef(@__MODULE__, :_zeros_impl!)
+const _ONES_IMPL_REF = GlobalRef(@__MODULE__, :_ones_impl!)
+const _SIMILAR_IMPL_REF = GlobalRef(@__MODULE__, :_similar_impl!)
 
 function _transform_acquire_calls(expr, pool_name)
     if expr isa Expr
@@ -764,6 +823,12 @@ function _transform_acquire_calls(expr, pool_name)
                     expr = Expr(:call, _ACQUIRE_IMPL_REF, expr.args[2:end]...)
                 elseif fn == :unsafe_acquire! || fn == :acquire_array!
                     expr = Expr(:call, _UNSAFE_ACQUIRE_IMPL_REF, expr.args[2:end]...)
+                elseif fn == :zeros!
+                    expr = Expr(:call, _ZEROS_IMPL_REF, expr.args[2:end]...)
+                elseif fn == :ones!
+                    expr = Expr(:call, _ONES_IMPL_REF, expr.args[2:end]...)
+                elseif fn == :similar!
+                    expr = Expr(:call, _SIMILAR_IMPL_REF, expr.args[2:end]...)
                 elseif fn isa Expr && fn.head == :. && length(fn.args) >= 2
                     # Qualified name: AdaptiveArrayPools.acquire! etc.
                     qn = fn.args[end]
@@ -771,6 +836,12 @@ function _transform_acquire_calls(expr, pool_name)
                         expr = Expr(:call, _ACQUIRE_IMPL_REF, expr.args[2:end]...)
                     elseif qn == QuoteNode(:unsafe_acquire!) || qn == QuoteNode(:acquire_array!)
                         expr = Expr(:call, _UNSAFE_ACQUIRE_IMPL_REF, expr.args[2:end]...)
+                    elseif qn == QuoteNode(:zeros!)
+                        expr = Expr(:call, _ZEROS_IMPL_REF, expr.args[2:end]...)
+                    elseif qn == QuoteNode(:ones!)
+                        expr = Expr(:call, _ONES_IMPL_REF, expr.args[2:end]...)
+                    elseif qn == QuoteNode(:similar!)
+                        expr = Expr(:call, _SIMILAR_IMPL_REF, expr.args[2:end]...)
                     end
                 end
             end

From 138f863a2bf02b216b54cc79e45134cb0f09906d Mon Sep 17 00:00:00 2001
From: Min-Gu Yoo <mgyoo86@gmail.com>
Date: Tue, 16 Dec 2025 11:14:37 -0800
Subject: [PATCH 02/20] refactor(convenience): remove unreachable NTuple _impl!
 overloads

User-facing functions always splat tuples into Vararg before calling
_impl!, making the NTuple versions dead code. Removed:
- _zeros_impl!(pool, T, dims::NTuple)
- _zeros_impl!(pool, dims::NTuple)
- _ones_impl!(pool, T, dims::NTuple)
- _ones_impl!(pool, dims::NTuple)
---
 src/convenience.jl | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

diff --git a/src/convenience.jl b/src/convenience.jl
index 421db6b..0b757f7 100644
--- a/src/convenience.jl
+++ b/src/convenience.jl
@@ -54,19 +54,11 @@ end
     arr
 end
 
-@inline function _zeros_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N}
-    _zeros_impl!(pool, T, dims...)
-end
-
-# Default type (Float64) overloads for macro transformation
+# Default type (Float64) overload for macro transformation
 @inline function _zeros_impl!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
     _zeros_impl!(pool, Float64, dims...)
 end
 
-@inline function _zeros_impl!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N}
-    _zeros_impl!(pool, Float64, dims...)
-end
-
 # Nothing fallback (pooling disabled)
 @inline zeros!(::Nothing, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = zeros(T, dims...)
 @inline zeros!(::Nothing, dims::Vararg{Int,N}) where {N} = zeros(Float64, dims...)
@@ -125,19 +117,11 @@ end
     arr
 end
 
-@inline function _ones_impl!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N}
-    _ones_impl!(pool, T, dims...)
-end
-
-# Default type (Float64) overloads for macro transformation
+# Default type (Float64) overload for macro transformation
 @inline function _ones_impl!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
     _ones_impl!(pool, Float64, dims...)
 end
 
-@inline function _ones_impl!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N}
-    _ones_impl!(pool, Float64, dims...)
-end
-
 # Nothing fallback (pooling disabled)
 @inline ones!(::Nothing, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = ones(T, dims...)
 @inline ones!(::Nothing, dims::Vararg{Int,N}) where {N} = ones(Float64, dims...)

From 493ade9047aa4636028089bad234017cafebb79d Mon Sep 17 00:00:00 2001
From: Min-Gu Yoo <mgyoo86@gmail.com>
Date: Tue, 16 Dec 2025 11:15:08 -0800
Subject: [PATCH 03/20] test: add comprehensive tests for convenience functions

Add test_convenience.jl with tests for zeros!, ones!, similar!:
- All API signatures (explicit type, default Float64, NTuple)
- Nothing fallbacks for disabled pooling
- Integration with @with_pool macro
- Pool state management verification
---
 test/runtests.jl         |   1 +
 test/test_convenience.jl | 254 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 255 insertions(+)
 create mode 100644 test/test_convenience.jl

diff --git a/test/runtests.jl b/test/runtests.jl
index 36d1d17..4782a8a 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -24,6 +24,7 @@ else
     include("test_nway_cache.jl")
     include("test_fixed_slots.jl")
     include("test_backend_macro_expansion.jl")
+    include("test_convenience.jl")
 
     # CUDA extension tests (auto-detect, skip with TEST_CUDA=false)
     if get(ENV, "TEST_CUDA", "true") != "false"
diff --git a/test/test_convenience.jl b/test/test_convenience.jl
new file mode 100644
index 0000000..af87231
--- /dev/null
+++ b/test/test_convenience.jl
@@ -0,0 +1,254 @@
+@testset "Convenience Functions" begin
+
+    @testset "zeros!" begin
+        pool = AdaptiveArrayPool()
+
+        @testset "with explicit type" begin
+            v = zeros!(pool, Float64, 10)
+            @test length(v) == 10
+            @test eltype(v) == Float64
+            @test all(v .== 0.0)
+
+            v32 = zeros!(pool, Float32, 5)
+            @test length(v32) == 5
+            @test eltype(v32) == Float32
+            @test all(v32 .== 0.0f0)
+
+            vi = zeros!(pool, Int64, 8)
+            @test length(vi) == 8
+            @test eltype(vi) == Int64
+            @test all(vi .== 0)
+        end
+
+        @testset "default type (Float64)" begin
+            v = zeros!(pool, 10)
+            @test length(v) == 10
+            @test eltype(v) == Float64
+            @test all(v .== 0.0)
+        end
+
+        @testset "multi-dimensional" begin
+            m = zeros!(pool, Float64, 3, 4)
+            @test size(m) == (3, 4)
+            @test eltype(m) == Float64
+            @test all(m .== 0.0)
+
+            m32 = zeros!(pool, Float32, 2, 3, 4)
+            @test size(m32) == (2, 3, 4)
+            @test all(m32 .== 0.0f0)
+        end
+
+        @testset "tuple form" begin
+            dims = (5, 6)
+            m = zeros!(pool, dims)
+            @test size(m) == dims
+            @test eltype(m) == Float64
+            @test all(m .== 0.0)
+
+            m32 = zeros!(pool, Float32, dims)
+            @test size(m32) == dims
+            @test eltype(m32) == Float32
+        end
+
+        @testset "Nothing fallback" begin
+            v = zeros!(nothing, Float64, 10)
+            @test v isa Array{Float64}
+            @test length(v) == 10
+            @test all(v .== 0.0)
+
+            v2 = zeros!(nothing, 5, 5)
+            @test v2 isa Matrix{Float64}
+            @test size(v2) == (5, 5)
+
+            # NTuple fallbacks
+            dims = (3, 4)
+            v3 = zeros!(nothing, Float32, dims)
+            @test v3 isa Array{Float32}
+            @test size(v3) == dims
+
+            v4 = zeros!(nothing, dims)
+            @test v4 isa Array{Float64}
+            @test size(v4) == dims
+        end
+    end
+
+    @testset "ones!" begin
+        pool = AdaptiveArrayPool()
+
+        @testset "with explicit type" begin
+            v = ones!(pool, Float64, 10)
+            @test length(v) == 10
+            @test eltype(v) == Float64
+            @test all(v .== 1.0)
+
+            v32 = ones!(pool, Float32, 5)
+            @test length(v32) == 5
+            @test eltype(v32) == Float32
+            @test all(v32 .== 1.0f0)
+
+            vi = ones!(pool, Int64, 8)
+            @test length(vi) == 8
+            @test eltype(vi) == Int64
+            @test all(vi .== 1)
+        end
+
+        @testset "default type (Float64)" begin
+            v = ones!(pool, 10)
+            @test length(v) == 10
+            @test eltype(v) == Float64
+            @test all(v .== 1.0)
+        end
+
+        @testset "multi-dimensional" begin
+            m = ones!(pool, Float64, 3, 4)
+            @test size(m) == (3, 4)
+            @test eltype(m) == Float64
+            @test all(m .== 1.0)
+        end
+
+        @testset "tuple form" begin
+            dims = (5, 6)
+            m = ones!(pool, dims)
+            @test size(m) == dims
+            @test eltype(m) == Float64
+            @test all(m .== 1.0)
+
+            # NTuple with explicit type
+            m32 = ones!(pool, Float32, dims)
+            @test size(m32) == dims
+            @test eltype(m32) == Float32
+            @test all(m32 .== 1.0f0)
+        end
+
+        @testset "Nothing fallback" begin
+            v = ones!(nothing, Float64, 10)
+            @test v isa Array{Float64}
+            @test length(v) == 10
+            @test all(v .== 1.0)
+
+            # Vararg without type
+            v2 = ones!(nothing, 5, 5)
+            @test v2 isa Matrix{Float64}
+            @test size(v2) == (5, 5)
+
+            # NTuple fallbacks
+            dims = (3, 4)
+            v3 = ones!(nothing, Float32, dims)
+            @test v3 isa Array{Float32}
+            @test size(v3) == dims
+
+            v4 = ones!(nothing, dims)
+            @test v4 isa Array{Float64}
+            @test size(v4) == dims
+        end
+    end
+
+    @testset "similar!" begin
+        pool = AdaptiveArrayPool()
+        template = rand(Float64, 10, 10)
+
+        @testset "same type and size" begin
+            v = similar!(pool, template)
+            @test size(v) == size(template)
+            @test eltype(v) == eltype(template)
+        end
+
+        @testset "different type" begin
+            v = similar!(pool, template, Float32)
+            @test size(v) == size(template)
+            @test eltype(v) == Float32
+        end
+
+        @testset "different size" begin
+            v = similar!(pool, template, 5, 5)
+            @test size(v) == (5, 5)
+            @test eltype(v) == eltype(template)
+        end
+
+        @testset "different type and size" begin
+            v = similar!(pool, template, Int32, 3, 4)
+            @test size(v) == (3, 4)
+            @test eltype(v) == Int32
+        end
+
+        @testset "1D template" begin
+            template1d = rand(20)
+            v = similar!(pool, template1d)
+            @test length(v) == 20
+            @test eltype(v) == Float64
+        end
+
+        @testset "Nothing fallback" begin
+            v = similar!(nothing, template)
+            @test v isa Array{Float64}
+            @test size(v) == size(template)
+
+            v2 = similar!(nothing, template, Int64)
+            @test v2 isa Array{Int64}
+            @test size(v2) == size(template)
+
+            # Vararg with different size (same type)
+            v3 = similar!(nothing, template, 5, 5)
+            @test v3 isa Array{Float64}
+            @test size(v3) == (5, 5)
+
+            # Vararg with different type and size
+            v4 = similar!(nothing, template, Int32, 3, 4)
+            @test v4 isa Array{Int32}
+            @test size(v4) == (3, 4)
+        end
+    end
+
+    @testset "Integration with @with_pool" begin
+        @testset "zeros! in macro" begin
+            result = @with_pool pool begin
+                v = zeros!(pool, Float64, 100)
+                v .+= 1.0
+                sum(v)
+            end
+            @test result == 100.0
+        end
+
+        @testset "ones! in macro" begin
+            result = @with_pool pool begin
+                v = ones!(pool, Float64, 50)
+                sum(v)
+            end
+            @test result == 50.0
+        end
+
+        @testset "similar! in macro" begin
+            template = rand(10)
+            result = @with_pool pool begin
+                v = similar!(pool, template)
+                v .= 2.0
+                sum(v)
+            end
+            @test result == 20.0
+        end
+
+        @testset "mixed usage" begin
+            result = @with_pool pool begin
+                a = zeros!(pool, 10)
+                b = ones!(pool, 10)
+                c = acquire!(pool, Float64, 10)
+                c .= a .+ b
+                sum(c)
+            end
+            @test result == 10.0
+        end
+    end
+
+    @testset "Pool state management" begin
+        pool = AdaptiveArrayPool()
+        checkpoint!(pool)
+
+        v1 = zeros!(pool, Float64, 10)
+        v2 = ones!(pool, Float64, 10)
+        @test pool.float64.n_active == 2
+
+        rewind!(pool)
+        @test pool.float64.n_active == 0
+    end
+
+end # Convenience Functions

From 8fc550b756ac9749a7aeb635834394fa77fdaf27 Mon Sep 17 00:00:00 2001
From: Min-Gu Yoo <mgyoo86@gmail.com>
Date: Tue, 16 Dec 2025 11:19:24 -0800
Subject: [PATCH 04/20] test: add type extraction tests for convenience
 functions

Add _extract_acquire_types tests for zeros!, ones!, similar!:
- zeros!/ones! with default type (Float64) and explicit type
- similar! with same type as template (nargs == 3)
- similar! with explicit type (nargs >= 4, type arg)
- similar! with dims only (nargs >= 4, dims only)
- Mixed convenience functions integration test
---
 test/test_macro_internals.jl | 84 ++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)

diff --git a/test/test_macro_internals.jl b/test/test_macro_internals.jl
index 9b8de5c..557ddf6 100644
--- a/test/test_macro_internals.jl
+++ b/test/test_macro_internals.jl
@@ -1053,6 +1053,90 @@ import AdaptiveArrayPools: _extract_local_assignments, _filter_static_types, _ex
                 @test length(static_types) == 5
                 @test !has_dynamic
             end
+
+            # ==================================================================
+            # Convenience functions (zeros!, ones!, similar!)
+            # ==================================================================
+
+            @testset "zeros! default type (Float64)" begin
+                expr = :(v = zeros!(pool, 10))
+                types = _extract_acquire_types(expr, :pool)
+                @test :Float64 in types
+                @test length(types) == 1
+            end
+
+            @testset "zeros! explicit type" begin
+                expr = :(v = zeros!(pool, Float32, 10, 10))
+                types = _extract_acquire_types(expr, :pool)
+                @test :Float32 in types
+                @test length(types) == 1
+            end
+
+            @testset "ones! default type (Float64)" begin
+                expr = :(v = ones!(pool, 10))
+                types = _extract_acquire_types(expr, :pool)
+                @test :Float64 in types
+                @test length(types) == 1
+            end
+
+            @testset "ones! explicit type" begin
+                expr = :(v = ones!(pool, Int64, 5, 5))
+                types = _extract_acquire_types(expr, :pool)
+                @test :Int64 in types
+                @test length(types) == 1
+            end
+
+            @testset "similar! same type as template (nargs == 3)" begin
+                expr = :(v = similar!(pool, template))
+                types = _extract_acquire_types(expr, :pool)
+                @test length(types) == 1
+                type_expr = first(types)
+                @test type_expr isa Expr
+                @test type_expr.head == :call
+                @test type_expr.args[1] == :eltype
+                @test type_expr.args[2] == :template
+            end
+
+            @testset "similar! explicit type (nargs >= 4, type arg)" begin
+                expr = :(v = similar!(pool, template, Float32))
+                types = _extract_acquire_types(expr, :pool)
+                @test :Float32 in types
+                @test length(types) == 1
+            end
+
+            @testset "similar! explicit type with dims (nargs >= 4, type + dims)" begin
+                expr = :(v = similar!(pool, template, Int64, 10, 10))
+                types = _extract_acquire_types(expr, :pool)
+                @test :Int64 in types
+                @test length(types) == 1
+            end
+
+            @testset "similar! same type with different dims (nargs >= 4, dims only)" begin
+                expr = :(v = similar!(pool, template, 5, 5))
+                types = _extract_acquire_types(expr, :pool)
+                @test length(types) == 1
+                type_expr = first(types)
+                @test type_expr isa Expr
+                @test type_expr.head == :call
+                @test type_expr.args[1] == :eltype
+                @test type_expr.args[2] == :template
+            end
+
+            @testset "mixed convenience functions" begin
+                expr = quote
+                    v1 = zeros!(pool, Float64, 10)
+                    v2 = ones!(pool, Float32, 5)
+                    v3 = similar!(pool, template)
+                    v4 = similar!(pool, template, Int64)
+                end
+                types = _extract_acquire_types(expr, :pool)
+                @test :Float64 in types
+                @test :Float32 in types
+                @test :Int64 in types
+                has_eltype = any(t -> t isa Expr && t.head == :call && t.args[1] == :eltype, types)
+                @test has_eltype
+                @test length(types) == 4
+            end
         end
 
     end

From 9ed60704f1be262af4b530e502fb54daf997f213 Mon Sep 17 00:00:00 2001
From: Min-Gu Yoo <mgyoo86@gmail.com>
Date: Tue, 16 Dec 2025 11:23:44 -0800
Subject: [PATCH 05/20] test: add tests for convenience function
 transformations in macro internals

---
 test/test_macro_internals.jl | 62 ++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/test/test_macro_internals.jl b/test/test_macro_internals.jl
index 557ddf6..6e62f29 100644
--- a/test/test_macro_internals.jl
+++ b/test/test_macro_internals.jl
@@ -1304,14 +1304,76 @@ import AdaptiveArrayPools: _extract_local_assignments, _filter_static_types, _ex
             end
         end
 
+        @testset "convenience function transformation" begin
+            using AdaptiveArrayPools: _ZEROS_IMPL_REF, _ONES_IMPL_REF, _SIMILAR_IMPL_REF
+
+            @testset "zeros! → _zeros_impl!" begin
+                expr = :(zeros!(pool, Float64, 10))
+                transformed = _transform_acquire_calls(expr, :pool)
+                @test transformed.args[1] == _ZEROS_IMPL_REF
+                @test transformed.args[2] == :pool
+                @test transformed.args[3] == :Float64
+            end
+
+            @testset "ones! → _ones_impl!" begin
+                expr = :(ones!(pool, Int64, 5, 5))
+                transformed = _transform_acquire_calls(expr, :pool)
+                @test transformed.args[1] == _ONES_IMPL_REF
+                @test transformed.args[2] == :pool
+                @test transformed.args[3] == :Int64
+            end
+
+            @testset "similar! → _similar_impl!" begin
+                expr = :(similar!(pool, template))
+                transformed = _transform_acquire_calls(expr, :pool)
+                @test transformed.args[1] == _SIMILAR_IMPL_REF
+                @test transformed.args[2] == :pool
+                @test transformed.args[3] == :template
+            end
+
+            @testset "qualified zeros! → _zeros_impl!" begin
+                expr = :(AAP.zeros!(pool, Float32, 10))
+                transformed = _transform_acquire_calls(expr, :pool)
+                @test transformed.args[1] == _ZEROS_IMPL_REF
+                @test transformed.args[2] == :pool
+            end
+
+            @testset "qualified ones! → _ones_impl!" begin
+                expr = :(AAP.ones!(pool, Int32, 5))
+                transformed = _transform_acquire_calls(expr, :pool)
+                @test transformed.args[1] == _ONES_IMPL_REF
+                @test transformed.args[2] == :pool
+            end
+
+            @testset "qualified similar! → _similar_impl!" begin
+                expr = :(AAP.similar!(pool, arr, Float64))
+                transformed = _transform_acquire_calls(expr, :pool)
+                @test transformed.args[1] == _SIMILAR_IMPL_REF
+                @test transformed.args[2] == :pool
+            end
+        end
+
         @testset "GlobalRef verification" begin
+            using AdaptiveArrayPools: _ZEROS_IMPL_REF, _ONES_IMPL_REF, _SIMILAR_IMPL_REF
+
             # Verify that GlobalRef points to AdaptiveArrayPools module
             @test _ACQUIRE_IMPL_REF isa GlobalRef
             @test _UNSAFE_ACQUIRE_IMPL_REF isa GlobalRef
+            @test _ZEROS_IMPL_REF isa GlobalRef
+            @test _ONES_IMPL_REF isa GlobalRef
+            @test _SIMILAR_IMPL_REF isa GlobalRef
+
             @test _ACQUIRE_IMPL_REF.mod == AdaptiveArrayPools
             @test _UNSAFE_ACQUIRE_IMPL_REF.mod == AdaptiveArrayPools
+            @test _ZEROS_IMPL_REF.mod == AdaptiveArrayPools
+            @test _ONES_IMPL_REF.mod == AdaptiveArrayPools
+            @test _SIMILAR_IMPL_REF.mod == AdaptiveArrayPools
+
             @test _ACQUIRE_IMPL_REF.name == :_acquire_impl!
             @test _UNSAFE_ACQUIRE_IMPL_REF.name == :_unsafe_acquire_impl!
+            @test _ZEROS_IMPL_REF.name == :_zeros_impl!
+            @test _ONES_IMPL_REF.name == :_ones_impl!
+            @test _SIMILAR_IMPL_REF.name == :_similar_impl!
         end
     end
 

From 50daf7bc8610f87b41aa939202a28b37e759f8a9 Mon Sep 17 00:00:00 2001
From: Min-Gu Yoo <mgyoo86@gmail.com>
Date: Tue, 16 Dec 2025 11:26:09 -0800
Subject: [PATCH 06/20] test: add tests for _get_pool_for_backend function

---
 test/test_macro_internals.jl | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/test/test_macro_internals.jl b/test/test_macro_internals.jl
index 6e62f29..73c4201 100644
--- a/test/test_macro_internals.jl
+++ b/test/test_macro_internals.jl
@@ -1377,4 +1377,33 @@ import AdaptiveArrayPools: _extract_local_assignments, _filter_static_types, _ex
         end
     end
 
+    # ==========================================================================
+    # _get_pool_for_backend Tests
+    # ==========================================================================
+
+    @testset "_get_pool_for_backend" begin
+        using AdaptiveArrayPools: _get_pool_for_backend
+
+        @testset "CPU backend returns task-local pool" begin
+            pool = _get_pool_for_backend(Val(:cpu))
+            @test pool isa AdaptiveArrayPool
+            # Should return same instance (task-local)
+            pool2 = _get_pool_for_backend(Val(:cpu))
+            @test pool === pool2
+        end
+
+        @testset "Unknown backend throws error" begin
+            @test_throws ErrorException _get_pool_for_backend(Val(:unknown_backend))
+            @test_throws ErrorException _get_pool_for_backend(Val(:rocm))
+
+            # Check error message contains backend name
+            try
+                _get_pool_for_backend(Val(:foo))
+            catch e
+                @test occursin("foo", e.msg)
+                @test occursin("not available", e.msg)
+            end
+        end
+    end
+
 end # Macro Internals
\ No newline at end of file

From e5f385db16c9905e758eb9ddab6787ad5d49d65c Mon Sep 17 00:00:00 2001
From: Min-Gu Yoo <mgyoo86@gmail.com>
Date: Tue, 16 Dec 2025 11:26:56 -0800
Subject: [PATCH 07/20] refactor(types): remove unused storage type accessor
 functions

---
 src/types.jl | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/types.jl b/src/types.jl
index 3e03625..06bee76 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -79,10 +79,6 @@ Abstract base for multi-type array pools.
 """
 abstract type AbstractArrayPool end
 
-# Storage type accessor
-storage_type(::AbstractTypedPool{T,V}) where {T,V} = V
-storage_type(::Type{<:AbstractTypedPool{T,V}}) where {T,V} = V
-
 # ==============================================================================
 # Core Data Structures
 # ==============================================================================

From 6c608edc208b2e2d94ca1ffc0cbb666983274491 Mon Sep 17 00:00:00 2001
From: Min-Gu Yoo <mgyoo86@gmail.com>
Date: Tue, 16 Dec 2025 12:09:10 -0800
Subject: [PATCH 08/20] feat(cuda): add convenience functions with Float32
 default

- Add zeros!, ones! for CuAdaptiveArrayPool with Float32 as default type
  (matching CUDA.zeros() behavior)
- Clean up redundant imports in extension module
  (each sub-file handles its own imports)
- Add comprehensive CUDA convenience function tests
---
 .../AdaptiveArrayPoolsCUDAExt.jl              |  7 +-
 ext/AdaptiveArrayPoolsCUDAExt/convenience.jl  | 41 ++++++++
 test/cuda/runtests.jl                         |  1 +
 test/cuda/test_convenience.jl                 | 94 +++++++++++++++++++
 4 files changed, 140 insertions(+), 3 deletions(-)
 create mode 100644 ext/AdaptiveArrayPoolsCUDAExt/convenience.jl
 create mode 100644 test/cuda/test_convenience.jl

diff --git a/ext/AdaptiveArrayPoolsCUDAExt/AdaptiveArrayPoolsCUDAExt.jl b/ext/AdaptiveArrayPoolsCUDAExt/AdaptiveArrayPoolsCUDAExt.jl
index 7ea911e..238b0e2 100644
--- a/ext/AdaptiveArrayPoolsCUDAExt/AdaptiveArrayPoolsCUDAExt.jl
+++ b/ext/AdaptiveArrayPoolsCUDAExt/AdaptiveArrayPoolsCUDAExt.jl
@@ -9,9 +9,7 @@ Loaded automatically when `using CUDA` with AdaptiveArrayPools.
 module AdaptiveArrayPoolsCUDAExt
 
 using AdaptiveArrayPools
-using AdaptiveArrayPools: AbstractTypedPool, AbstractArrayPool, CACHE_WAYS,
-                          allocate_vector, wrap_array, get_typed_pool!, get_view!,
-                          foreach_fixed_slot, _get_pool_for_backend
+using AdaptiveArrayPools: AbstractTypedPool, AbstractArrayPool, CACHE_WAYS
 using CUDA
 
 # Type definitions
@@ -35,6 +33,9 @@ include("utils.jl")
 # Macro support (@with_pool :cuda)
 include("macros.jl")
 
+# Convenience functions (Float32 default for zeros!/ones!)
+include("convenience.jl")
+
 # Exports (types only - functions are exported from main module)
 export CuTypedPool, CuAdaptiveArrayPool
 export GPU_FIXED_SLOT_FIELDS
diff --git a/ext/AdaptiveArrayPoolsCUDAExt/convenience.jl b/ext/AdaptiveArrayPoolsCUDAExt/convenience.jl
new file mode 100644
index 0000000..bd99fff
--- /dev/null
+++ b/ext/AdaptiveArrayPoolsCUDAExt/convenience.jl
@@ -0,0 +1,41 @@
+# ==============================================================================
+# CUDA Convenience Functions (Float32 default)
+# ==============================================================================
+# Override default-type versions only; explicit type versions use base AbstractArrayPool methods.
+# This matches CUDA.zeros() behavior which defaults to Float32.
+
+using AdaptiveArrayPools: _mark_untracked!, _zeros_impl!, _ones_impl!
+
+# ==============================================================================
+# zeros! - Float32 default for CUDA (instead of Float64)
+# ==============================================================================
+
+@inline function AdaptiveArrayPools.zeros!(pool::CuAdaptiveArrayPool, dims::Vararg{Int})
+    _mark_untracked!(pool)
+    _zeros_impl!(pool, Float32, dims...)
+end
+
+@inline function AdaptiveArrayPools.zeros!(pool::CuAdaptiveArrayPool, dims::Tuple{Vararg{Int}})
+    _mark_untracked!(pool)
+    _zeros_impl!(pool, Float32, dims...)
+end
+
+# ==============================================================================
+# ones! - Float32 default for CUDA (instead of Float64)
+# ==============================================================================
+
+@inline function AdaptiveArrayPools.ones!(pool::CuAdaptiveArrayPool, dims::Vararg{Int})
+    _mark_untracked!(pool)
+    _ones_impl!(pool, Float32, dims...)
+end
+
+@inline function AdaptiveArrayPools.ones!(pool::CuAdaptiveArrayPool, dims::Tuple{Vararg{Int}})
+    _mark_untracked!(pool)
+    _ones_impl!(pool, Float32, dims...)
+end
+
+# ==============================================================================
+# similar! - No override needed
+# ==============================================================================
+# similar! uses eltype(template_array) as default, which is backend-agnostic.
+# The base AbstractArrayPool methods work correctly for CUDA pools.
diff --git a/test/cuda/runtests.jl b/test/cuda/runtests.jl
index 2be6590..1bfab0c 100644
--- a/test/cuda/runtests.jl
+++ b/test/cuda/runtests.jl
@@ -39,4 +39,5 @@ else
     include("test_allocation.jl")
     include("test_nway_cache.jl")
     include("test_display.jl")
+    include("test_convenience.jl")
 end
diff --git a/test/cuda/test_convenience.jl b/test/cuda/test_convenience.jl
new file mode 100644
index 0000000..eef479a
--- /dev/null
+++ b/test/cuda/test_convenience.jl
@@ -0,0 +1,94 @@
+@testset "CUDA Convenience Functions" begin
+    pool = get_task_local_cuda_pool()
+    checkpoint!(pool)
+
+    @testset "zeros! default type is Float32" begin
+        v = zeros!(pool, 10)
+        @test v isa CuArray{Float32}
+        @test length(v) == 10
+        @test all(v .== 0.0f0)
+
+        m = zeros!(pool, 3, 4)
+        @test m isa CuArray{Float32,2}
+        @test size(m) == (3, 4)
+        @test all(m .== 0.0f0)
+
+        # Tuple form
+        dims = (2, 3)
+        t = zeros!(pool, dims)
+        @test t isa CuArray{Float32,2}
+        @test size(t) == dims
+    end
+
+    @testset "zeros! explicit type" begin
+        v64 = zeros!(pool, Float64, 10)
+        @test v64 isa CuArray{Float64}
+        @test all(v64 .== 0.0)
+
+        v16 = zeros!(pool, Float16, 5)
+        @test v16 isa CuArray{Float16}
+
+        vi = zeros!(pool, Int32, 8)
+        @test vi isa CuArray{Int32}
+        @test all(vi .== 0)
+    end
+
+    @testset "ones! default type is Float32" begin
+        v = ones!(pool, 10)
+        @test v isa CuArray{Float32}
+        @test length(v) == 10
+        @test all(v .== 1.0f0)
+
+        m = ones!(pool, 3, 4)
+        @test m isa CuArray{Float32,2}
+        @test size(m) == (3, 4)
+        @test all(m .== 1.0f0)
+
+        # Tuple form
+        dims = (2, 3)
+        t = ones!(pool, dims)
+        @test t isa CuArray{Float32,2}
+        @test size(t) == dims
+    end
+
+    @testset "ones! explicit type" begin
+        v64 = ones!(pool, Float64, 10)
+        @test v64 isa CuArray{Float64}
+        @test all(v64 .== 1.0)
+
+        vi = ones!(pool, Int32, 8)
+        @test vi isa CuArray{Int32}
+        @test all(vi .== 1)
+    end
+
+    @testset "similar!" begin
+        # Float32 template
+        template32 = CUDA.rand(Float32, 5, 5)
+        v = similar!(pool, template32)
+        @test v isa CuArray{Float32,2}
+        @test size(v) == (5, 5)
+
+        # Float64 template
+        template64 = CUDA.rand(Float64, 3, 4)
+        v64 = similar!(pool, template64)
+        @test v64 isa CuArray{Float64,2}
+        @test size(v64) == (3, 4)
+
+        # Different type
+        v_int = similar!(pool, template32, Int32)
+        @test v_int isa CuArray{Int32,2}
+        @test size(v_int) == (5, 5)
+
+        # Different dims
+        v_dims = similar!(pool, template32, 10)
+        @test v_dims isa CuArray{Float32,1}
+        @test length(v_dims) == 10
+
+        # Different type and dims
+        v_both = similar!(pool, template32, Float64, 2, 3)
+        @test v_both isa CuArray{Float64,2}
+        @test size(v_both) == (2, 3)
+    end
+
+    rewind!(pool)
+end

From a4cd29754a6ed4c76f05156c49e21655fc35708b Mon Sep 17 00:00:00 2001
From: Min-Gu Yoo <mgyoo86@gmail.com>
Date: Tue, 16 Dec 2025 12:22:40 -0800
Subject: [PATCH 09/20] feat: add unsafe_zeros!, unsafe_ones!, unsafe_similar!
 convenience functions

Add raw-array (non-view) variants of initialized convenience functions:

- unsafe_zeros!(pool, [T,] dims...): zero-initialized raw arrays
- unsafe_ones!(pool, [T,] dims...): one-initialized raw arrays
- unsafe_similar!(pool, template, [T,] [dims...]): raw arrays from template

Implementation includes:
- Full macro transformation support (_impl! functions)
- CUDA Float32 defaults (matching zeros!/ones! behavior)
- Nothing fallbacks for disabled pooling
- Comprehensive test coverage for CPU and CUDA backends
---
 ext/AdaptiveArrayPoolsCUDAExt/convenience.jl |  34 +++-
 src/AdaptiveArrayPools.jl                    |   1 +
 src/convenience.jl                           | 198 +++++++++++++++++++
 src/macros.jl                                |  24 ++-
 test/cuda/test_convenience.jl                |  88 +++++++++
 test/test_convenience.jl                     | 168 ++++++++++++++++
 6 files changed, 506 insertions(+), 7 deletions(-)

diff --git a/ext/AdaptiveArrayPoolsCUDAExt/convenience.jl b/ext/AdaptiveArrayPoolsCUDAExt/convenience.jl
index bd99fff..86bb4a2 100644
--- a/ext/AdaptiveArrayPoolsCUDAExt/convenience.jl
+++ b/ext/AdaptiveArrayPoolsCUDAExt/convenience.jl
@@ -4,7 +4,7 @@
 # Override default-type versions only; explicit type versions use base AbstractArrayPool methods.
 # This matches CUDA.zeros() behavior which defaults to Float32.
 
-using AdaptiveArrayPools: _mark_untracked!, _zeros_impl!, _ones_impl!
+using AdaptiveArrayPools: _mark_untracked!, _zeros_impl!, _ones_impl!, _unsafe_zeros_impl!, _unsafe_ones_impl!
 
 # ==============================================================================
 # zeros! - Float32 default for CUDA (instead of Float64)
@@ -35,7 +35,35 @@ end
 end
 
 # ==============================================================================
-# similar! - No override needed
+# unsafe_zeros! - Float32 default for CUDA (instead of Float64)
 # ==============================================================================
-# similar! uses eltype(template_array) as default, which is backend-agnostic.
+
+@inline function AdaptiveArrayPools.unsafe_zeros!(pool::CuAdaptiveArrayPool, dims::Vararg{Int})
+    _mark_untracked!(pool)
+    _unsafe_zeros_impl!(pool, Float32, dims...)
+end
+
+@inline function AdaptiveArrayPools.unsafe_zeros!(pool::CuAdaptiveArrayPool, dims::Tuple{Vararg{Int}})
+    _mark_untracked!(pool)
+    _unsafe_zeros_impl!(pool, Float32, dims...)
+end
+
+# ==============================================================================
+# unsafe_ones! - Float32 default for CUDA (instead of Float64)
+# ==============================================================================
+
+@inline function AdaptiveArrayPools.unsafe_ones!(pool::CuAdaptiveArrayPool, dims::Vararg{Int})
+    _mark_untracked!(pool)
+    _unsafe_ones_impl!(pool, Float32, dims...)
+end
+
+@inline function AdaptiveArrayPools.unsafe_ones!(pool::CuAdaptiveArrayPool, dims::Tuple{Vararg{Int}})
+    _mark_untracked!(pool)
+    _unsafe_ones_impl!(pool, Float32, dims...)
+end
+
+# ==============================================================================
+# similar! / unsafe_similar! - No override needed
+# ==============================================================================
+# These functions use eltype(template_array) as default, which is backend-agnostic.
 # The base AbstractArrayPool methods work correctly for CUDA pools.
diff --git a/src/AdaptiveArrayPools.jl b/src/AdaptiveArrayPools.jl
index c02afdf..6e9b3ec 100644
--- a/src/AdaptiveArrayPools.jl
+++ b/src/AdaptiveArrayPools.jl
@@ -6,6 +6,7 @@ using Printf
 export AdaptiveArrayPool, acquire!, unsafe_acquire!, pool_stats, get_task_local_pool
 export acquire_view!, acquire_array!  # Explicit naming aliases
 export zeros!, ones!, similar!  # Convenience functions
+export unsafe_zeros!, unsafe_ones!, unsafe_similar!  # Unsafe convenience functions
 export @with_pool, @maybe_with_pool
 export USE_POOLING, MAYBE_POOLING_ENABLED, POOL_DEBUG
 export checkpoint!, rewind!, reset!
diff --git a/src/convenience.jl b/src/convenience.jl
index 0b757f7..0c8eb86 100644
--- a/src/convenience.jl
+++ b/src/convenience.jl
@@ -200,3 +200,201 @@ end
 @inline similar!(::Nothing, x::AbstractArray, ::Type{T}) where {T} = similar(x, T)
 @inline similar!(::Nothing, x::AbstractArray, dims::Vararg{Int,N}) where {N} = similar(x, dims...)
 @inline similar!(::Nothing, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = similar(x, T, dims...)
+
+# ==============================================================================
+# unsafe_zeros! - Acquire zero-initialized raw arrays from pool
+# ==============================================================================
+
+"""
+    unsafe_zeros!(pool, dims...) -> Array
+    unsafe_zeros!(pool, T, dims...) -> Array
+    unsafe_zeros!(pool, dims::Tuple) -> Array
+    unsafe_zeros!(pool, T, dims::Tuple) -> Array
+
+Acquire a zero-initialized raw array (not a view) from the pool.
+
+Equivalent to `unsafe_acquire!(pool, T, dims...)` followed by `fill!(arr, zero(T))`.
+Default element type is `Float64` when not specified.
+
+## Example
+```julia
+@with_pool pool begin
+    v = unsafe_zeros!(pool, 100)              # Array{Float64}, all zeros
+    m = unsafe_zeros!(pool, Float32, 10, 10)  # Array{Float32}, all zeros
+end
+```
+
+See also: [`unsafe_ones!`](@ref), [`zeros!`](@ref), [`unsafe_acquire!`](@ref)
+"""
+@inline function unsafe_zeros!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
+    _mark_untracked!(pool)
+    _unsafe_zeros_impl!(pool, T, dims...)
+end
+
+@inline function unsafe_zeros!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
+    _mark_untracked!(pool)
+    _unsafe_zeros_impl!(pool, Float64, dims...)
+end
+
+@inline function unsafe_zeros!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N}
+    _mark_untracked!(pool)
+    _unsafe_zeros_impl!(pool, T, dims...)
+end
+
+@inline function unsafe_zeros!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N}
+    _mark_untracked!(pool)
+    _unsafe_zeros_impl!(pool, Float64, dims...)
+end
+
+# Internal implementation (for macro transformation)
+@inline function _unsafe_zeros_impl!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
+    arr = _unsafe_acquire_impl!(pool, T, dims...)
+    fill!(arr, zero(T))
+    arr
+end
+
+# Default type (Float64) overload for macro transformation
+@inline function _unsafe_zeros_impl!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
+    _unsafe_zeros_impl!(pool, Float64, dims...)
+end
+
+# Nothing fallback (pooling disabled)
+@inline unsafe_zeros!(::Nothing, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = zeros(T, dims...)
+@inline unsafe_zeros!(::Nothing, dims::Vararg{Int,N}) where {N} = zeros(Float64, dims...)
+@inline unsafe_zeros!(::Nothing, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = zeros(T, dims...)
+@inline unsafe_zeros!(::Nothing, dims::NTuple{N,Int}) where {N} = zeros(Float64, dims...)
+
+# ==============================================================================
+# unsafe_ones! - Acquire one-initialized raw arrays from pool
+# ==============================================================================
+
+"""
+    unsafe_ones!(pool, dims...) -> Array
+    unsafe_ones!(pool, T, dims...) -> Array
+    unsafe_ones!(pool, dims::Tuple) -> Array
+    unsafe_ones!(pool, T, dims::Tuple) -> Array
+
+Acquire a one-initialized raw array (not a view) from the pool.
+
+Equivalent to `unsafe_acquire!(pool, T, dims...)` followed by `fill!(arr, one(T))`.
+Default element type is `Float64` when not specified.
+
+## Example
+```julia
+@with_pool pool begin
+    v = unsafe_ones!(pool, 100)              # Array{Float64}, all ones
+    m = unsafe_ones!(pool, Float32, 10, 10)  # Array{Float32}, all ones
+end
+```
+
+See also: [`unsafe_zeros!`](@ref), [`ones!`](@ref), [`unsafe_acquire!`](@ref)
+"""
+@inline function unsafe_ones!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
+    _mark_untracked!(pool)
+    _unsafe_ones_impl!(pool, T, dims...)
+end
+
+@inline function unsafe_ones!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
+    _mark_untracked!(pool)
+    _unsafe_ones_impl!(pool, Float64, dims...)
+end
+
+@inline function unsafe_ones!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N}
+    _mark_untracked!(pool)
+    _unsafe_ones_impl!(pool, T, dims...)
+end
+
+@inline function unsafe_ones!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N}
+    _mark_untracked!(pool)
+    _unsafe_ones_impl!(pool, Float64, dims...)
+end
+
+# Internal implementation (for macro transformation)
+@inline function _unsafe_ones_impl!(pool::AbstractArrayPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
+    arr = _unsafe_acquire_impl!(pool, T, dims...)
+    fill!(arr, one(T))
+    arr
+end
+
+# Default type (Float64) overload for macro transformation
+@inline function _unsafe_ones_impl!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
+    _unsafe_ones_impl!(pool, Float64, dims...)
+end
+
+# Nothing fallback (pooling disabled)
+@inline unsafe_ones!(::Nothing, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = ones(T, dims...)
+@inline unsafe_ones!(::Nothing, dims::Vararg{Int,N}) where {N} = ones(Float64, dims...)
+@inline unsafe_ones!(::Nothing, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = ones(T, dims...)
+@inline unsafe_ones!(::Nothing, dims::NTuple{N,Int}) where {N} = ones(Float64, dims...)
+
+# ==============================================================================
+# unsafe_similar! - Acquire raw arrays with same type/size as template
+# ==============================================================================
+
+"""
+    unsafe_similar!(pool, array) -> Array
+    unsafe_similar!(pool, array, T) -> Array
+    unsafe_similar!(pool, array, dims...) -> Array
+    unsafe_similar!(pool, array, T, dims...) -> Array
+
+Acquire an uninitialized raw array (not a view) from the pool, using a template array for defaults.
+
+- `unsafe_similar!(pool, A)`: same element type and size as `A`
+- `unsafe_similar!(pool, A, T)`: element type `T`, same size as `A`
+- `unsafe_similar!(pool, A, dims...)`: same element type as `A`, specified dimensions
+- `unsafe_similar!(pool, A, T, dims...)`: element type `T`, specified dimensions
+
+## Example
+```julia
+A = rand(10, 10)
+@with_pool pool begin
+    B = unsafe_similar!(pool, A)              # Same type and size, raw array
+    C = unsafe_similar!(pool, A, Float32)     # Float32, same size
+    D = unsafe_similar!(pool, A, 5, 5)        # Same type, different size
+end
+```
+
+See also: [`similar!`](@ref), [`unsafe_acquire!`](@ref)
+"""
+@inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray)
+    _mark_untracked!(pool)
+    _unsafe_similar_impl!(pool, x)
+end
+
+@inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}) where {T}
+    _mark_untracked!(pool)
+    _unsafe_similar_impl!(pool, x, T)
+end
+
+@inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int,N}) where {N}
+    _mark_untracked!(pool)
+    _unsafe_similar_impl!(pool, x, dims...)
+end
+
+@inline function unsafe_similar!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
+    _mark_untracked!(pool)
+    _unsafe_similar_impl!(pool, x, T, dims...)
+end
+
+# Internal implementation (for macro transformation)
+@inline function _unsafe_similar_impl!(pool::AbstractArrayPool, x::AbstractArray)
+    _unsafe_acquire_impl!(pool, eltype(x), size(x))
+end
+
+@inline function _unsafe_similar_impl!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}) where {T}
+    _unsafe_acquire_impl!(pool, T, size(x))
+end
+
+@inline function _unsafe_similar_impl!(pool::AbstractArrayPool, x::AbstractArray, dims::Vararg{Int,N}) where {N}
+    _unsafe_acquire_impl!(pool, eltype(x), dims...)
+end
+
+@inline function _unsafe_similar_impl!(pool::AbstractArrayPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N}
+    _unsafe_acquire_impl!(pool, T, dims...)
+end
+
+# Nothing fallback (pooling disabled)
+@inline unsafe_similar!(::Nothing, x::AbstractArray) = similar(x)
+@inline unsafe_similar!(::Nothing, x::AbstractArray, ::Type{T}) where {T} = similar(x, T)
+@inline unsafe_similar!(::Nothing, x::AbstractArray, dims::Vararg{Int,N}) where {N} = similar(x, dims...)
+@inline unsafe_similar!(::Nothing, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = similar(x, T, dims...)
diff --git a/src/macros.jl b/src/macros.jl
index 8f824dc..39db98c 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -569,6 +569,7 @@ Supported functions:
 - `acquire!` and its alias `acquire_view!`
 - `unsafe_acquire!` and its alias `acquire_array!`
 - `zeros!`, `ones!`, `similar!`
+- `unsafe_zeros!`, `unsafe_ones!`, `unsafe_similar!`
 
 Handles various forms:
 - `[unsafe_]acquire!(pool, Type, dims...)`: extracts Type directly
@@ -610,8 +611,8 @@ function _extract_acquire_types(expr, target_pool, types=Set{Any}())
                         # acquire!(pool, x) - similar-style form
                         push!(types, Expr(:call, :eltype, expr.args[3]))
                     end
-                # zeros!/ones!
-                elseif fn == :zeros! || fn == :ones! || fn_name == :zeros! || fn_name == :ones!
+                # zeros!/ones!/unsafe_zeros!/unsafe_ones!
+                elseif fn in (:zeros!, :ones!, :unsafe_zeros!, :unsafe_ones!) || fn_name in (:zeros!, :ones!, :unsafe_zeros!, :unsafe_ones!)
                     if nargs >= 3
                         third_arg = expr.args[3]
                         # Check if third arg looks like a type (Symbol starting with uppercase or curly)
@@ -622,8 +623,8 @@ function _extract_acquire_types(expr, target_pool, types=Set{Any}())
                             push!(types, :Float64)
                         end
                     end
-                # similar!
-                elseif fn == :similar! || fn_name == :similar!
+                # similar!/unsafe_similar!
+                elseif fn in (:similar!, :unsafe_similar!) || fn_name in (:similar!, :unsafe_similar!)
                     if nargs == 3
                         # similar!(pool, x) - same type as x
                         push!(types, Expr(:call, :eltype, expr.args[3]))
@@ -808,6 +809,9 @@ const _UNSAFE_ACQUIRE_IMPL_REF = GlobalRef(@__MODULE__, :_unsafe_acquire_impl!)
 const _ZEROS_IMPL_REF = GlobalRef(@__MODULE__, :_zeros_impl!)
 const _ONES_IMPL_REF = GlobalRef(@__MODULE__, :_ones_impl!)
 const _SIMILAR_IMPL_REF = GlobalRef(@__MODULE__, :_similar_impl!)
+const _UNSAFE_ZEROS_IMPL_REF = GlobalRef(@__MODULE__, :_unsafe_zeros_impl!)
+const _UNSAFE_ONES_IMPL_REF = GlobalRef(@__MODULE__, :_unsafe_ones_impl!)
+const _UNSAFE_SIMILAR_IMPL_REF = GlobalRef(@__MODULE__, :_unsafe_similar_impl!)
 
 function _transform_acquire_calls(expr, pool_name)
     if expr isa Expr
@@ -829,6 +833,12 @@ function _transform_acquire_calls(expr, pool_name)
                     expr = Expr(:call, _ONES_IMPL_REF, expr.args[2:end]...)
                 elseif fn == :similar!
                     expr = Expr(:call, _SIMILAR_IMPL_REF, expr.args[2:end]...)
+                elseif fn == :unsafe_zeros!
+                    expr = Expr(:call, _UNSAFE_ZEROS_IMPL_REF, expr.args[2:end]...)
+                elseif fn == :unsafe_ones!
+                    expr = Expr(:call, _UNSAFE_ONES_IMPL_REF, expr.args[2:end]...)
+                elseif fn == :unsafe_similar!
+                    expr = Expr(:call, _UNSAFE_SIMILAR_IMPL_REF, expr.args[2:end]...)
                 elseif fn isa Expr && fn.head == :. && length(fn.args) >= 2
                     # Qualified name: AdaptiveArrayPools.acquire! etc.
                     qn = fn.args[end]
@@ -842,6 +852,12 @@ function _transform_acquire_calls(expr, pool_name)
                         expr = Expr(:call, _ONES_IMPL_REF, expr.args[2:end]...)
                     elseif qn == QuoteNode(:similar!)
                         expr = Expr(:call, _SIMILAR_IMPL_REF, expr.args[2:end]...)
+                    elseif qn == QuoteNode(:unsafe_zeros!)
+                        expr = Expr(:call, _UNSAFE_ZEROS_IMPL_REF, expr.args[2:end]...)
+                    elseif qn == QuoteNode(:unsafe_ones!)
+                        expr = Expr(:call, _UNSAFE_ONES_IMPL_REF, expr.args[2:end]...)
+                    elseif qn == QuoteNode(:unsafe_similar!)
+                        expr = Expr(:call, _UNSAFE_SIMILAR_IMPL_REF, expr.args[2:end]...)
                     end
                 end
             end
diff --git a/test/cuda/test_convenience.jl b/test/cuda/test_convenience.jl
index eef479a..8bb7858 100644
--- a/test/cuda/test_convenience.jl
+++ b/test/cuda/test_convenience.jl
@@ -90,5 +90,93 @@
         @test size(v_both) == (2, 3)
     end
 
+    @testset "unsafe_zeros! default type is Float32" begin
+        v = unsafe_zeros!(pool, 10)
+        @test v isa CuArray{Float32,1}
+        @test !(v isa SubArray)  # Raw array, not view
+        @test length(v) == 10
+        @test all(v .== 0.0f0)
+
+        m = unsafe_zeros!(pool, 3, 4)
+        @test m isa CuArray{Float32,2}
+        @test !(m isa SubArray)
+        @test size(m) == (3, 4)
+        @test all(m .== 0.0f0)
+
+        # Tuple form
+        dims = (2, 3)
+        t = unsafe_zeros!(pool, dims)
+        @test t isa CuArray{Float32,2}
+        @test size(t) == dims
+    end
+
+    @testset "unsafe_zeros! explicit type" begin
+        v64 = unsafe_zeros!(pool, Float64, 10)
+        @test v64 isa CuArray{Float64}
+        @test !(v64 isa SubArray)
+        @test all(v64 .== 0.0)
+    end
+
+    @testset "unsafe_ones! default type is Float32" begin
+        v = unsafe_ones!(pool, 10)
+        @test v isa CuArray{Float32,1}
+        @test !(v isa SubArray)
+        @test length(v) == 10
+        @test all(v .== 1.0f0)
+
+        m = unsafe_ones!(pool, 3, 4)
+        @test m isa CuArray{Float32,2}
+        @test !(m isa SubArray)
+        @test size(m) == (3, 4)
+        @test all(m .== 1.0f0)
+
+        # Tuple form
+        dims = (2, 3)
+        t = unsafe_ones!(pool, dims)
+        @test t isa CuArray{Float32,2}
+        @test size(t) == dims
+    end
+
+    @testset "unsafe_ones! explicit type" begin
+        v64 = unsafe_ones!(pool, Float64, 10)
+        @test v64 isa CuArray{Float64}
+        @test !(v64 isa SubArray)
+        @test all(v64 .== 1.0)
+    end
+
+    @testset "unsafe_similar!" begin
+        # Float32 template
+        template32 = CUDA.rand(Float32, 5, 5)
+        v = unsafe_similar!(pool, template32)
+        @test v isa CuArray{Float32,2}
+        @test !(v isa SubArray)
+        @test size(v) == (5, 5)
+
+        # Float64 template
+        template64 = CUDA.rand(Float64, 3, 4)
+        v64 = unsafe_similar!(pool, template64)
+        @test v64 isa CuArray{Float64,2}
+        @test !(v64 isa SubArray)
+        @test size(v64) == (3, 4)
+
+        # Different type
+        v_int = unsafe_similar!(pool, template32, Int32)
+        @test v_int isa CuArray{Int32,2}
+        @test !(v_int isa SubArray)
+        @test size(v_int) == (5, 5)
+
+        # Different dims
+        v_dims = unsafe_similar!(pool, template32, 10)
+        @test v_dims isa CuArray{Float32,1}
+        @test !(v_dims isa SubArray)
+        @test length(v_dims) == 10
+
+        # Different type and dims
+        v_both = unsafe_similar!(pool, template32, Float64, 2, 3)
+        @test v_both isa CuArray{Float64,2}
+        @test !(v_both isa SubArray)
+        @test size(v_both) == (2, 3)
+    end
+
     rewind!(pool)
 end
diff --git a/test/test_convenience.jl b/test/test_convenience.jl
index af87231..b0b50e4 100644
--- a/test/test_convenience.jl
+++ b/test/test_convenience.jl
@@ -251,4 +251,172 @@
         @test pool.float64.n_active == 0
     end
 
+    @testset "unsafe_zeros!" begin
+        pool = AdaptiveArrayPool()
+
+        @testset "returns raw array (not view)" begin
+            v = unsafe_zeros!(pool, Float64, 10)
+            @test v isa Array{Float64,1}
+            @test !(v isa SubArray)
+            @test length(v) == 10
+            @test all(v .== 0.0)
+        end
+
+        @testset "default type (Float64)" begin
+            v = unsafe_zeros!(pool, 10)
+            @test v isa Array{Float64,1}
+            @test !(v isa SubArray)
+            @test eltype(v) == Float64
+            @test all(v .== 0.0)
+        end
+
+        @testset "multi-dimensional" begin
+            m = unsafe_zeros!(pool, Float64, 3, 4)
+            @test m isa Array{Float64,2}
+            @test !(m isa SubArray)
+            @test size(m) == (3, 4)
+            @test all(m .== 0.0)
+        end
+
+        @testset "tuple form" begin
+            dims = (5, 6)
+            m = unsafe_zeros!(pool, dims)
+            @test size(m) == dims
+            @test !(m isa SubArray)
+
+            m32 = unsafe_zeros!(pool, Float32, dims)
+            @test size(m32) == dims
+            @test eltype(m32) == Float32
+        end
+
+        @testset "Nothing fallback" begin
+            v = unsafe_zeros!(nothing, Float64, 10)
+            @test v isa Array{Float64}
+            @test all(v .== 0.0)
+        end
+    end
+
+    @testset "unsafe_ones!" begin
+        pool = AdaptiveArrayPool()
+
+        @testset "returns raw array (not view)" begin
+            v = unsafe_ones!(pool, Float64, 10)
+            @test v isa Array{Float64,1}
+            @test !(v isa SubArray)
+            @test length(v) == 10
+            @test all(v .== 1.0)
+        end
+
+        @testset "default type (Float64)" begin
+            v = unsafe_ones!(pool, 10)
+            @test v isa Array{Float64,1}
+            @test !(v isa SubArray)
+            @test eltype(v) == Float64
+            @test all(v .== 1.0)
+        end
+
+        @testset "multi-dimensional" begin
+            m = unsafe_ones!(pool, Float64, 3, 4)
+            @test m isa Array{Float64,2}
+            @test !(m isa SubArray)
+            @test size(m) == (3, 4)
+            @test all(m .== 1.0)
+        end
+
+        @testset "tuple form" begin
+            dims = (5, 6)
+            m = unsafe_ones!(pool, dims)
+            @test size(m) == dims
+            @test !(m isa SubArray)
+
+            m32 = unsafe_ones!(pool, Float32, dims)
+            @test size(m32) == dims
+            @test eltype(m32) == Float32
+            @test all(m32 .== 1.0f0)
+        end
+
+        @testset "Nothing fallback" begin
+            v = unsafe_ones!(nothing, Float64, 10)
+            @test v isa Array{Float64}
+            @test all(v .== 1.0)
+        end
+    end
+
+    @testset "unsafe_similar!" begin
+        pool = AdaptiveArrayPool()
+        template = rand(Float64, 10, 10)
+
+        @testset "returns raw array (not view)" begin
+            v = unsafe_similar!(pool, template)
+            @test v isa Array{Float64,2}
+            @test !(v isa SubArray)
+            @test size(v) == size(template)
+        end
+
+        @testset "different type" begin
+            v = unsafe_similar!(pool, template, Float32)
+            @test v isa Array{Float32,2}
+            @test !(v isa SubArray)
+            @test size(v) == size(template)
+        end
+
+        @testset "different size" begin
+            v = unsafe_similar!(pool, template, 5, 5)
+            @test v isa Array{Float64,2}
+            @test !(v isa SubArray)
+            @test size(v) == (5, 5)
+        end
+
+        @testset "different type and size" begin
+            v = unsafe_similar!(pool, template, Int32, 3, 4)
+            @test v isa Array{Int32,2}
+            @test !(v isa SubArray)
+            @test size(v) == (3, 4)
+        end
+
+        @testset "Nothing fallback" begin
+            v = unsafe_similar!(nothing, template)
+            @test v isa Array{Float64}
+            @test size(v) == size(template)
+
+            v2 = unsafe_similar!(nothing, template, Int64)
+            @test v2 isa Array{Int64}
+        end
+    end
+
+    @testset "Integration unsafe functions with @with_pool" begin
+        @testset "unsafe_zeros! in macro" begin
+            result = @with_pool pool begin
+                v = unsafe_zeros!(pool, Float64, 100)
+                @test v isa Array{Float64,1}
+                @test !(v isa SubArray)
+                v .+= 1.0
+                sum(v)
+            end
+            @test result == 100.0
+        end
+
+        @testset "unsafe_ones! in macro" begin
+            result = @with_pool pool begin
+                v = unsafe_ones!(pool, Float64, 50)
+                @test v isa Array{Float64,1}
+                @test !(v isa SubArray)
+                sum(v)
+            end
+            @test result == 50.0
+        end
+
+        @testset "unsafe_similar! in macro" begin
+            template = rand(10)
+            result = @with_pool pool begin
+                v = unsafe_similar!(pool, template)
+                @test v isa Array{Float64,1}
+                @test !(v isa SubArray)
+                v .= 2.0
+                sum(v)
+            end
+            @test result == 20.0
+        end
+    end
+
 end # Convenience Functions

From b2e6c4e898bd6dc5aa66dd0d0ae1a324e613c5f3 Mon Sep 17 00:00:00 2001
From: Min-Gu Yoo <mgyoo86@gmail.com>
Date: Tue, 16 Dec 2025 12:32:08 -0800
Subject: [PATCH 10/20] docs: add convenience functions documentation and
 simplify README

- Add Convenience Functions subsection to README with zeros!/ones!/similar!
- Simplify thread-safety section to one line with link to multi-threading docs
- Document all convenience functions in api.md (view-returning and array-returning)
- Note CUDA Float32 default behavior matching CUDA.zeros()
---
 README.md   | 21 ++++++++++++---------
 docs/api.md | 27 +++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 0f20461..0cd9ef5 100644
--- a/README.md
+++ b/README.md
@@ -75,16 +75,19 @@ This automatic checkpoint/rewind cycle is what enables zero allocation on repeat
 
 > **Note**: Keeping acquired arrays inside the scope is your responsibility. Return computed values (scalars, copies), not the arrays themselves. See [Safety Guide](docs/safety.md).
 
-**Thread-safe by design**: Each Julia Task gets its own independent pool, so `@with_pool` inside threaded code is automatically safe:
+**Thread-safe by design**: Each Julia Task gets its own independent pool—no locks needed. See [Multi-Threading](docs/multi-threading.md) for patterns.
 
-```julia
-Threads.@threads for i in 1:N
-    @with_pool pool begin
-        a = acquire!(pool, Float64, 100)
-        # each thread has its own pool — no race conditions
-    end
-end
-```
+### Convenience Functions
+
+Common initialization patterns have convenience functions:
+
+| Function | Equivalent to |
+|----------|---------------|
+| `zeros!(pool, 10)` | `acquire!` + `fill!(0)` |
+| `ones!(pool, Float32, 3, 3)` | `acquire!` + `fill!(1)` |
+| `similar!(pool, A)` | `acquire!` matching `eltype(A)`, `size(A)` |
+
+These return views like `acquire!`. For raw `Array` types, use `unsafe_acquire!` or its convenience variants (`unsafe_zeros!`, `unsafe_ones!`, `unsafe_similar!`). See [API Reference](docs/api.md#convenience-functions).
 
 ## Installation
 
diff --git a/docs/api.md b/docs/api.md
index 798e4b6..734c8f1 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -27,6 +27,33 @@
 | `get_task_local_pool()` | Returns the task-local pool instance. |
 | `empty!(pool)` | Clears all internal storage, releasing all memory. |
 
+## Convenience Functions
+
+Shortcuts for common `acquire!` + initialization patterns. Default element type is `Float64` (CPU) or `Float32` (CUDA).
+
+### View-returning (like `acquire!`)
+
+| Function | Description |
+|----------|-------------|
+| `zeros!(pool, [T,] dims...)` | Zero-initialized view. Equivalent to `acquire!` + `fill!(0)`. |
+| `ones!(pool, [T,] dims...)` | One-initialized view. Equivalent to `acquire!` + `fill!(1)`. |
+| `similar!(pool, A)` | View matching `eltype(A)` and `size(A)`. |
+| `similar!(pool, A, T)` | View with type `T`, size from `A`. |
+| `similar!(pool, A, dims...)` | View with `eltype(A)`, specified dimensions. |
+| `similar!(pool, A, T, dims...)` | View with type `T`, specified dimensions. |
+
+### Array-returning (like `unsafe_acquire!`)
+
+| Function | Description |
+|----------|-------------|
+| `unsafe_zeros!(pool, [T,] dims...)` | Zero-initialized raw `Array`. |
+| `unsafe_ones!(pool, [T,] dims...)` | One-initialized raw `Array`. |
+| `unsafe_similar!(pool, A, ...)` | Raw `Array` with same signatures as `similar!`. |
+
+All convenience functions support tuple dimensions: `zeros!(pool, (3, 4))`.
+
+**CUDA note**: Default type is `Float32` to match `CUDA.zeros()` behavior.
+
 ## Types
 
 | Type | Description |

From 95a6f2a61a9ad81a5916faab23bbf8d1c476baac Mon Sep 17 00:00:00 2001
From: Min-Gu Yoo <mgyoo86@gmail.com>
Date: Tue, 16 Dec 2025 12:53:53 -0800
Subject: [PATCH 11/20] docs(readme): update benchmarks and expand problem
 section
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Fix benchmark numbers: 91 MiB → 2.75 GiB (90k allocations, 31% GC)
- Expand why manual buffer passing is impractical:
  - API pollution, nested calls, dynamic shapes, package boundaries
- Update solution example to use similar! convenience function
- Simplify comparison table (Naive vs AdaptiveArrayPools)
- Add emoji annotations for visual emphasis (⚠️/✅)
---
 README.md | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 0cd9ef5..0bddf40 100644
--- a/README.md
+++ b/README.md
@@ -24,23 +24,28 @@ function compute_naive(n)
 end
 
 for i in 1:10_000
-    compute_naive(100)  # 91 MiB total, 17% GC time
+    compute_naive(100)  # ⚠️ 90k allocations, 2.75 GiB, 31% GC time
 end
 ```
 
-The traditional fix—passing pre-allocated buffers through your call stack—works but requires invasive refactoring and clutters your APIs.
+The traditional fix—passing pre-allocated buffers—works for simple cases but quickly becomes impractical:
+
+- **API pollution**: Every function needs extra buffer arguments, breaking clean interfaces
+- **Nested calls**: Buffers must be threaded through entire call stacks, even third-party code
+- **Dynamic shapes**: Hard to pre-allocate when array sizes depend on runtime values
+- **Package boundaries**: You can't easily pass buffers into library functions you don't control
 
 ## The Solution
 
-Wrap your function with `@with_pool` and use `acquire!` instead of allocation:
+Wrap your function with `@with_pool` and replace allocations with `acquire!` or convenience functions:
 
 ```julia
 using AdaptiveArrayPools, LinearAlgebra, Random
 
 @with_pool pool function compute_pooled(n)
     A = acquire!(pool, Float64, n, n)  # reuses memory from pool
-    B = acquire!(pool, Float64, n, n)
-    C = acquire!(pool, Float64, n, n)
+    B = similar!(pool, A)
+    C = similar!(pool, A)
 
     rand!(A); rand!(B)
     mul!(C, A, B)
@@ -49,15 +54,15 @@ end
 
 compute_pooled(100)  # warmup
 for i in 1:10_000
-    compute_pooled(100)  # 0 bytes, 0% GC
+    compute_pooled(100) # ✅ Zero allocations, 0% GC
 end
 ```
 
-| Approach | Memory | GC Time | Code Complexity |
-|----------|--------|---------|-----------------|
-| Naive allocation | 91 MiB | 17% | Simple |
-| Manual buffer passing | 0 | 0% | Complex, invasive refactor |
-| **AdaptiveArrayPools** | **0** | **0%** | **Minimal change** |
+| | Naive | AdaptiveArrayPools |
+|-------------|-------|---------------------|
+| **Time** | 787 ms | 525 ms |
+| **Allocations** | 90k (2.75 GiB) | 0 |
+| **GC Time** | 31% | 0% |
 
 > **CUDA support**: Same API—just use `@with_pool :cuda pool`. See [CUDA Backend](docs/cuda.md).
 

From ce9f11d32d7174bfe8829fc4b2d24d5f0c0cf0f0 Mon Sep 17 00:00:00 2001
From: Min-Gu Yoo <mgyoo86@gmail.com>
Date: Tue, 16 Dec 2025 13:41:05 -0800
Subject: [PATCH 12/20] refactor(convenience): introduce default_eltype for
 backend-flexible type dispatch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add default_eltype(pool) function that returns the default element type for
convenience functions when type is not specified:
- CPU (AbstractArrayPool): Float64
- CUDA (CuAdaptiveArrayPool): Float32

This fixes a bug where macro transformation bypassed CUDA's Float32 default
by transforming zeros!(pool, 10) → _zeros_impl!(pool, 10) which only existed
with hardcoded Float64 in the CPU module.

Changes:
- Add default_eltype to src/convenience.jl with AbstractArrayPool → Float64
- Override in CUDA extension with CuAdaptiveArrayPool → Float32
- Update _*_impl! functions to use default_eltype(pool) instead of Float64
- Simplify CUDA extension from 69 lines to 14 lines (remove redundant overrides)
- Update macro type extraction to generate default_eltype(pool) expressions
- Add _filter_static_types handling for default_eltype expressions
- Export default_eltype from main module
- Add macroexpand tests for convenience function expansion
- Update test_macro_internals to expect default_eltype(pool) expressions

Zero performance overhead verified via LLVM IR analysis - default_eltype(pool)
is fully constant-folded at compile time.
---
 ext/AdaptiveArrayPoolsCUDAExt/convenience.jl | 75 +++-----------------
 src/AdaptiveArrayPools.jl                    |  2 +-
 src/convenience.jl                           | 30 +++++---
 src/macros.jl                                | 15 +++-
 test/test_macro_expansion.jl                 | 73 +++++++++++++++++++
 test/test_macro_internals.jl                 | 18 +++--
 6 files changed, 133 insertions(+), 80 deletions(-)

diff --git a/ext/AdaptiveArrayPoolsCUDAExt/convenience.jl b/ext/AdaptiveArrayPoolsCUDAExt/convenience.jl
index 86bb4a2..041510a 100644
--- a/ext/AdaptiveArrayPoolsCUDAExt/convenience.jl
+++ b/ext/AdaptiveArrayPoolsCUDAExt/convenience.jl
@@ -1,69 +1,14 @@
 # ==============================================================================
-# CUDA Convenience Functions (Float32 default)
+# CUDA Default Element Type
 # ==============================================================================
-# Override default-type versions only; explicit type versions use base AbstractArrayPool methods.
-# This matches CUDA.zeros() behavior which defaults to Float32.
+# CUDA pools default to Float32 (matching CUDA.zeros() behavior).
+# All convenience functions (zeros!, ones!, etc.) dispatch through _*_impl!
+# which calls default_eltype(pool) for the default type.
 
-using AdaptiveArrayPools: _mark_untracked!, _zeros_impl!, _ones_impl!, _unsafe_zeros_impl!, _unsafe_ones_impl!
+"""
+    default_eltype(::CuAdaptiveArrayPool) -> Type
 
-# ==============================================================================
-# zeros! - Float32 default for CUDA (instead of Float64)
-# ==============================================================================
-
-@inline function AdaptiveArrayPools.zeros!(pool::CuAdaptiveArrayPool, dims::Vararg{Int})
-    _mark_untracked!(pool)
-    _zeros_impl!(pool, Float32, dims...)
-end
-
-@inline function AdaptiveArrayPools.zeros!(pool::CuAdaptiveArrayPool, dims::Tuple{Vararg{Int}})
-    _mark_untracked!(pool)
-    _zeros_impl!(pool, Float32, dims...)
-end
-
-# ==============================================================================
-# ones! - Float32 default for CUDA (instead of Float64)
-# ==============================================================================
-
-@inline function AdaptiveArrayPools.ones!(pool::CuAdaptiveArrayPool, dims::Vararg{Int})
-    _mark_untracked!(pool)
-    _ones_impl!(pool, Float32, dims...)
-end
-
-@inline function AdaptiveArrayPools.ones!(pool::CuAdaptiveArrayPool, dims::Tuple{Vararg{Int}})
-    _mark_untracked!(pool)
-    _ones_impl!(pool, Float32, dims...)
-end
-
-# ==============================================================================
-# unsafe_zeros! - Float32 default for CUDA (instead of Float64)
-# ==============================================================================
-
-@inline function AdaptiveArrayPools.unsafe_zeros!(pool::CuAdaptiveArrayPool, dims::Vararg{Int})
-    _mark_untracked!(pool)
-    _unsafe_zeros_impl!(pool, Float32, dims...)
-end
-
-@inline function AdaptiveArrayPools.unsafe_zeros!(pool::CuAdaptiveArrayPool, dims::Tuple{Vararg{Int}})
-    _mark_untracked!(pool)
-    _unsafe_zeros_impl!(pool, Float32, dims...)
-end
-
-# ==============================================================================
-# unsafe_ones! - Float32 default for CUDA (instead of Float64)
-# ==============================================================================
-
-@inline function AdaptiveArrayPools.unsafe_ones!(pool::CuAdaptiveArrayPool, dims::Vararg{Int})
-    _mark_untracked!(pool)
-    _unsafe_ones_impl!(pool, Float32, dims...)
-end
-
-@inline function AdaptiveArrayPools.unsafe_ones!(pool::CuAdaptiveArrayPool, dims::Tuple{Vararg{Int}})
-    _mark_untracked!(pool)
-    _unsafe_ones_impl!(pool, Float32, dims...)
-end
-
-# ==============================================================================
-# similar! / unsafe_similar! - No override needed
-# ==============================================================================
-# These functions use eltype(template_array) as default, which is backend-agnostic.
-# The base AbstractArrayPool methods work correctly for CUDA pools.
+Returns `Float32` as the default element type for CUDA pools.
+This matches `CUDA.zeros()` behavior.
+"""
+AdaptiveArrayPools.default_eltype(::CuAdaptiveArrayPool) = Float32
diff --git a/src/AdaptiveArrayPools.jl b/src/AdaptiveArrayPools.jl
index 6e9b3ec..cf3cdee 100644
--- a/src/AdaptiveArrayPools.jl
+++ b/src/AdaptiveArrayPools.jl
@@ -5,7 +5,7 @@ using Printf
 # Public API
 export AdaptiveArrayPool, acquire!, unsafe_acquire!, pool_stats, get_task_local_pool
 export acquire_view!, acquire_array!  # Explicit naming aliases
-export zeros!, ones!, similar!  # Convenience functions
+export zeros!, ones!, similar!, default_eltype  # Convenience functions
 export unsafe_zeros!, unsafe_ones!, unsafe_similar!  # Unsafe convenience functions
 export @with_pool, @maybe_with_pool
 export USE_POOLING, MAYBE_POOLING_ENABLED, POOL_DEBUG
diff --git a/src/convenience.jl b/src/convenience.jl
index 0c8eb86..796de64 100644
--- a/src/convenience.jl
+++ b/src/convenience.jl
@@ -2,6 +2,20 @@
 # Convenience Functions (zeros!, ones!, similar!)
 # ==============================================================================
 
+# ==============================================================================
+# Default Element Type
+# ==============================================================================
+
+"""
+    default_eltype(pool) -> Type
+
+Default element type for convenience functions when type is not specified.
+CPU pools default to `Float64`, CUDA pools to `Float32`.
+
+Backends can override this to provide appropriate defaults.
+"""
+default_eltype(::AbstractArrayPool) = Float64
+
 # ==============================================================================
 # zeros! - Acquire zero-initialized arrays from pool
 # ==============================================================================
@@ -54,9 +68,9 @@ end
     arr
 end
 
-# Default type (Float64) overload for macro transformation
+# Default type overload for macro transformation (uses default_eltype for backend flexibility)
 @inline function _zeros_impl!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
-    _zeros_impl!(pool, Float64, dims...)
+    _zeros_impl!(pool, default_eltype(pool), dims...)
 end
 
 # Nothing fallback (pooling disabled)
@@ -117,9 +131,9 @@ end
     arr
 end
 
-# Default type (Float64) overload for macro transformation
+# Default type overload for macro transformation (uses default_eltype for backend flexibility)
 @inline function _ones_impl!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
-    _ones_impl!(pool, Float64, dims...)
+    _ones_impl!(pool, default_eltype(pool), dims...)
 end
 
 # Nothing fallback (pooling disabled)
@@ -253,9 +267,9 @@ end
     arr
 end
 
-# Default type (Float64) overload for macro transformation
+# Default type overload for macro transformation (uses default_eltype for backend flexibility)
 @inline function _unsafe_zeros_impl!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
-    _unsafe_zeros_impl!(pool, Float64, dims...)
+    _unsafe_zeros_impl!(pool, default_eltype(pool), dims...)
 end
 
 # Nothing fallback (pooling disabled)
@@ -316,9 +330,9 @@ end
     arr
 end
 
-# Default type (Float64) overload for macro transformation
+# Default type overload for macro transformation (uses default_eltype for backend flexibility)
 @inline function _unsafe_ones_impl!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
-    _unsafe_ones_impl!(pool, Float64, dims...)
+    _unsafe_ones_impl!(pool, default_eltype(pool), dims...)
 end
 
 # Nothing fallback (pooling disabled)
diff --git a/src/macros.jl b/src/macros.jl
index 39db98c..aa4d64b 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -619,8 +619,9 @@ function _extract_acquire_types(expr, target_pool, types=Set{Any}())
                         if _looks_like_type(third_arg)
                             push!(types, third_arg)
                         else
-                            # No type specified, default is Float64
-                            push!(types, :Float64)
+                            # No type specified, use default_eltype(pool) - resolved at compile time
+                            # CPU: Float64, CUDA: Float32 (via default_eltype dispatch)
+                            push!(types, Expr(:call, :default_eltype, target_pool))
                         end
                     end
                 # similar!/unsafe_similar!
@@ -737,6 +738,16 @@ function _filter_static_types(types, local_vars=Set{Symbol}())
                     # x is external (function param, global, etc.) - safe to use
                     push!(static_types, t)
                 end
+            elseif t.head == :call && length(t.args) >= 2 && t.args[1] == :default_eltype
+                # default_eltype(pool) expression from zeros!(pool, 10) etc.
+                # This is a compile-time constant (Float64 for CPU, Float32 for CUDA)
+                # Safe to use - pool type is known at compile time
+                inner_arg = t.args[2]
+                if _uses_local_var(inner_arg, local_vars)
+                    has_dynamic = true
+                else
+                    push!(static_types, t)
+                end
             else
                 # Other expressions - treat as dynamic
                 has_dynamic = true
diff --git a/test/test_macro_expansion.jl b/test/test_macro_expansion.jl
index b25489d..520597c 100644
--- a/test/test_macro_expansion.jl
+++ b/test/test_macro_expansion.jl
@@ -250,4 +250,77 @@
 
     end
 
+    @testset "Convenience functions expansion" begin
+
+        @testset "zeros! default type uses default_eltype(pool)" begin
+            expr = @macroexpand @with_pool pool begin
+                v = zeros!(pool, 10)
+            end
+
+            expr_str = string(expr)
+
+            # Should contain default_eltype(pool) for backend-flexible type detection
+            @test occursin("default_eltype", expr_str)
+            @test occursin("pool", expr_str)
+        end
+
+        @testset "zeros! explicit type uses that type" begin
+            expr = @macroexpand @with_pool pool begin
+                v = zeros!(pool, Float32, 10)
+            end
+
+            expr_str = string(expr)
+
+            # Should contain Float32 directly (not default_eltype)
+            @test occursin("Float32", expr_str)
+        end
+
+        @testset "ones! default type uses default_eltype(pool)" begin
+            expr = @macroexpand @with_pool pool begin
+                v = ones!(pool, 10)
+            end
+
+            expr_str = string(expr)
+
+            @test occursin("default_eltype", expr_str)
+        end
+
+        @testset "unsafe_zeros! default type uses default_eltype(pool)" begin
+            expr = @macroexpand @with_pool pool begin
+                v = unsafe_zeros!(pool, 10)
+            end
+
+            expr_str = string(expr)
+
+            @test occursin("default_eltype", expr_str)
+        end
+
+        @testset "unsafe_ones! default type uses default_eltype(pool)" begin
+            expr = @macroexpand @with_pool pool begin
+                v = unsafe_ones!(pool, 10)
+            end
+
+            expr_str = string(expr)
+
+            @test occursin("default_eltype", expr_str)
+        end
+
+        @testset "mixed convenience with explicit and default types" begin
+            expr = @macroexpand @with_pool pool begin
+                v1 = zeros!(pool, Float64, 10)  # explicit
+                v2 = ones!(pool, 5)              # default
+                v3 = zeros!(pool, Float32, 3)   # explicit
+            end
+
+            expr_str = string(expr)
+
+            # Explicit types present
+            @test occursin("Float64", expr_str)
+            @test occursin("Float32", expr_str)
+            # default_eltype for untyped ones!
+            @test occursin("default_eltype", expr_str)
+        end
+
+    end
+
 end # Macro Expansion Details
\ No newline at end of file
diff --git a/test/test_macro_internals.jl b/test/test_macro_internals.jl
index 73c4201..9156afe 100644
--- a/test/test_macro_internals.jl
+++ b/test/test_macro_internals.jl
@@ -1058,11 +1058,16 @@ import AdaptiveArrayPools: _extract_local_assignments, _filter_static_types, _ex
             # Convenience functions (zeros!, ones!, similar!)
             # ==================================================================
 
-            @testset "zeros! default type (Float64)" begin
+            @testset "zeros! default type (default_eltype dispatch)" begin
                 expr = :(v = zeros!(pool, 10))
                 types = _extract_acquire_types(expr, :pool)
-                @test :Float64 in types
                 @test length(types) == 1
+                type_expr = first(types)
+                # Should be default_eltype(pool) expression for backend flexibility
+                @test type_expr isa Expr
+                @test type_expr.head == :call
+                @test type_expr.args[1] == :default_eltype
+                @test type_expr.args[2] == :pool
             end
 
             @testset "zeros! explicit type" begin
@@ -1072,11 +1077,16 @@ import AdaptiveArrayPools: _extract_local_assignments, _filter_static_types, _ex
                 @test length(types) == 1
             end
 
-            @testset "ones! default type (Float64)" begin
+            @testset "ones! default type (default_eltype dispatch)" begin
                 expr = :(v = ones!(pool, 10))
                 types = _extract_acquire_types(expr, :pool)
-                @test :Float64 in types
                 @test length(types) == 1
+                type_expr = first(types)
+                # Should be default_eltype(pool) expression for backend flexibility
+                @test type_expr isa Expr
+                @test type_expr.head == :call
+                @test type_expr.args[1] == :default_eltype
+                @test type_expr.args[2] == :pool
             end
 
             @testset "ones! explicit type" begin

From 4930df419977aacfd461236b76c9508fa33c9652 Mon Sep 17 00:00:00 2001
From: Min-Gu Yoo <mgyoo86@gmail.com>
Date: Tue, 16 Dec 2025 13:46:28 -0800
Subject: [PATCH 13/20] perf(state): deduplicate types in checkpoint!/rewind!
 at compile time

Add compile-time type deduplication in @generated checkpoint! and rewind!
functions. When duplicate types are passed (e.g., Float64, Float64), the
generated code now only calls _checkpoint_typed_pool!/_rewind_typed_pool!
once per unique type.

This optimization eliminates redundant push/pop operations that occurred
when zeros!(pool, 10) + zeros!(pool, Float64, 10) generated
checkpoint!(pool, default_eltype(pool), Float64) which resolved to
checkpoint!(pool, Float64, Float64) on CPU pools.

The deduplication happens entirely at compile time via the @generated
function machinery, with zero runtime overhead.
---
 src/state.jl | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/src/state.jl b/src/state.jl
index fd258d6..34455cf 100644
--- a/src/state.jl
+++ b/src/state.jl
@@ -55,7 +55,16 @@ Save state for multiple specific types. Uses @generated for zero-overhead
 compile-time unrolling. Increments _current_depth once for all types.
 """
 @generated function checkpoint!(pool::AdaptiveArrayPool, types::Type...)
-    checkpoint_exprs = [:(_checkpoint_typed_pool!(get_typed_pool!(pool, types[$i]), pool._current_depth)) for i in 1:length(types)]
+    # Deduplicate types at compile time (e.g., Float64, Float64 → Float64)
+    seen = Set{Any}()
+    unique_indices = Int[]
+    for i in eachindex(types)
+        if !(types[i] in seen)
+            push!(seen, types[i])
+            push!(unique_indices, i)
+        end
+    end
+    checkpoint_exprs = [:(_checkpoint_typed_pool!(get_typed_pool!(pool, types[$i]), pool._current_depth)) for i in unique_indices]
     quote
         pool._current_depth += 1
         push!(pool._untracked_flags, false)
@@ -144,8 +153,17 @@ Restore state for multiple specific types in reverse order.
 Decrements _current_depth once after all types are rewound.
 """
 @generated function rewind!(pool::AdaptiveArrayPool, types::Type...)
-    rewind_exprs = [:(_rewind_typed_pool!(get_typed_pool!(pool, types[$i]), pool._current_depth)) for i in length(types):-1:1]
-    reset_exprs = [:(reset!(get_typed_pool!(pool, types[$i]))) for i in 1:length(types)]
+    # Deduplicate types at compile time (e.g., Float64, Float64 → Float64)
+    seen = Set{Any}()
+    unique_indices = Int[]
+    for i in eachindex(types)
+        if !(types[i] in seen)
+            push!(seen, types[i])
+            push!(unique_indices, i)
+        end
+    end
+    rewind_exprs = [:(_rewind_typed_pool!(get_typed_pool!(pool, types[$i]), pool._current_depth)) for i in reverse(unique_indices)]
+    reset_exprs = [:(reset!(get_typed_pool!(pool, types[$i]))) for i in unique_indices]
     quote
         # Safety guard: at global scope (depth=1), delegate to reset!
         if pool._current_depth == 1

From ace46197c855362917223b33afcc9196aaadf0be Mon Sep 17 00:00:00 2001
From: Min-Gu Yoo <mgyoo86@gmail.com>
Date: Tue, 16 Dec 2025 14:30:18 -0800
Subject: [PATCH 14/20] feat(pool): add DisabledPool{Backend} to preserve
 backend context
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When USE_POOLING=false or MAYBE_POOLING_ENABLED[]=false, macros now
return DisabledPool{backend}() instead of nothing. This preserves
backend semantics:

- DisabledPool{:cpu} → zeros/ones return Array (Julia default)
- DisabledPool{:cuda} → zeros/ones return CuArray (CUDA default)

Changes:
- Add DisabledPool{B} parametric type and DISABLED_CPU singleton
- Add pooling_enabled(pool) predicate for backward compatibility
- Add BackendNotLoadedError for explicit failure on unknown backends
- Update @with_pool and @maybe_with_pool to emit DisabledPool
- Add @maybe_with_pool :backend variants for backend-specific macros
- Add DisabledPool{:cpu} fallbacks for all convenience/acquire functions
- Add DisabledPool{:cuda} fallbacks in CUDA extension
- Add state management no-ops (checkpoint!, rewind!, reset!, empty!)
- Update tests to use pooling_enabled() instead of pool === nothing

This fixes the issue where @maybe_with_pool :cuda with USE_POOLING=false
would silently return CPU Array instead of CuArray.
---
 ext/AdaptiveArrayPoolsCUDAExt/convenience.jl |  83 ++++++++++
 src/AdaptiveArrayPools.jl                    |   1 +
 src/acquire.jl                               |  24 +++
 src/convenience.jl                           | 165 +++++++++++++++----
 src/macros.jl                                | 113 +++++++++++--
 src/state.jl                                 |  19 +++
 src/types.jl                                 |  59 +++++++
 test/test_disabled_pooling.jl                |  10 +-
 test/test_macro_expansion.jl                 |   4 +-
 test/test_macros.jl                          |   6 +-
 test/test_task_local_pool.jl                 |   4 +-
 11 files changed, 439 insertions(+), 49 deletions(-)

diff --git a/ext/AdaptiveArrayPoolsCUDAExt/convenience.jl b/ext/AdaptiveArrayPoolsCUDAExt/convenience.jl
index 041510a..3c54a11 100644
--- a/ext/AdaptiveArrayPoolsCUDAExt/convenience.jl
+++ b/ext/AdaptiveArrayPoolsCUDAExt/convenience.jl
@@ -12,3 +12,86 @@ Returns `Float32` as the default element type for CUDA pools.
 This matches `CUDA.zeros()` behavior.
 """
 AdaptiveArrayPools.default_eltype(::CuAdaptiveArrayPool) = Float32
+
+# ==============================================================================
+# DisabledPool{:cuda} Fallbacks
+# ==============================================================================
+# When pooling is disabled but :cuda backend is specified, these methods ensure
+# proper CuArray allocation instead of falling back to CPU arrays.
+
+using AdaptiveArrayPools: DisabledPool
+
+"""
+    DISABLED_CUDA
+
+Singleton instance for disabled CUDA pooling.
+Used by macros when `USE_POOLING=false` with `:cuda` backend.
+"""
+const DISABLED_CUDA = DisabledPool{:cuda}()
+
+"""
+    default_eltype(::DisabledPool{:cuda}) -> Float32
+
+Default element type for disabled CUDA pools (matches CUDA.zeros() default).
+"""
+AdaptiveArrayPools.default_eltype(::DisabledPool{:cuda}) = Float32
+
+# --- zeros! for DisabledPool{:cuda} ---
+@inline AdaptiveArrayPools.zeros!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CUDA.zeros(T, dims...)
+@inline AdaptiveArrayPools.zeros!(p::DisabledPool{:cuda}, dims::Vararg{Int,N}) where {N} = CUDA.zeros(AdaptiveArrayPools.default_eltype(p), dims...)
+@inline AdaptiveArrayPools.zeros!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = CUDA.zeros(T, dims...)
+@inline AdaptiveArrayPools.zeros!(p::DisabledPool{:cuda}, dims::NTuple{N,Int}) where {N} = CUDA.zeros(AdaptiveArrayPools.default_eltype(p), dims...)
+
+# --- ones! for DisabledPool{:cuda} ---
+@inline AdaptiveArrayPools.ones!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CUDA.ones(T, dims...)
+@inline AdaptiveArrayPools.ones!(p::DisabledPool{:cuda}, dims::Vararg{Int,N}) where {N} = CUDA.ones(AdaptiveArrayPools.default_eltype(p), dims...)
+@inline AdaptiveArrayPools.ones!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = CUDA.ones(T, dims...)
+@inline AdaptiveArrayPools.ones!(p::DisabledPool{:cuda}, dims::NTuple{N,Int}) where {N} = CUDA.ones(AdaptiveArrayPools.default_eltype(p), dims...)
+
+# --- similar! for DisabledPool{:cuda} ---
+@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::CuArray) = CUDA.similar(x)
+@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::CuArray, ::Type{T}) where {T} = CUDA.similar(x, T)
+@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::CuArray, dims::Vararg{Int,N}) where {N} = CUDA.similar(x, dims...)
+@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::CuArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CUDA.similar(x, T, dims...)
+# Fallback for non-CuArray inputs (creates CuArray from AbstractArray)
+@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::AbstractArray) = CuArray{eltype(x)}(undef, size(x))
+@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::AbstractArray, ::Type{T}) where {T} = CuArray{T}(undef, size(x))
+@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::AbstractArray, dims::Vararg{Int,N}) where {N} = CuArray{eltype(x)}(undef, dims)
+@inline AdaptiveArrayPools.similar!(::DisabledPool{:cuda}, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CuArray{T}(undef, dims)
+
+# --- unsafe_zeros! for DisabledPool{:cuda} ---
+@inline AdaptiveArrayPools.unsafe_zeros!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CUDA.zeros(T, dims...)
+@inline AdaptiveArrayPools.unsafe_zeros!(p::DisabledPool{:cuda}, dims::Vararg{Int,N}) where {N} = CUDA.zeros(AdaptiveArrayPools.default_eltype(p), dims...)
+@inline AdaptiveArrayPools.unsafe_zeros!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = CUDA.zeros(T, dims...)
+@inline AdaptiveArrayPools.unsafe_zeros!(p::DisabledPool{:cuda}, dims::NTuple{N,Int}) where {N} = CUDA.zeros(AdaptiveArrayPools.default_eltype(p), dims...)
+
+# --- unsafe_ones! for DisabledPool{:cuda} ---
+@inline AdaptiveArrayPools.unsafe_ones!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CUDA.ones(T, dims...)
+@inline AdaptiveArrayPools.unsafe_ones!(p::DisabledPool{:cuda}, dims::Vararg{Int,N}) where {N} = CUDA.ones(AdaptiveArrayPools.default_eltype(p), dims...)
+@inline AdaptiveArrayPools.unsafe_ones!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = CUDA.ones(T, dims...)
+@inline AdaptiveArrayPools.unsafe_ones!(p::DisabledPool{:cuda}, dims::NTuple{N,Int}) where {N} = CUDA.ones(AdaptiveArrayPools.default_eltype(p), dims...)
+
+# --- unsafe_similar! for DisabledPool{:cuda} ---
+@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::CuArray) = CUDA.similar(x)
+@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::CuArray, ::Type{T}) where {T} = CUDA.similar(x, T)
+@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::CuArray, dims::Vararg{Int,N}) where {N} = CUDA.similar(x, dims...)
+@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::CuArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CUDA.similar(x, T, dims...)
+# Fallback for non-CuArray inputs
+@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::AbstractArray) = CuArray{eltype(x)}(undef, size(x))
+@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::AbstractArray, ::Type{T}) where {T} = CuArray{T}(undef, size(x))
+@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::AbstractArray, dims::Vararg{Int,N}) where {N} = CuArray{eltype(x)}(undef, dims)
+@inline AdaptiveArrayPools.unsafe_similar!(::DisabledPool{:cuda}, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CuArray{T}(undef, dims)
+
+# --- acquire! for DisabledPool{:cuda} ---
+@inline AdaptiveArrayPools.acquire!(::DisabledPool{:cuda}, ::Type{T}, n::Int) where {T} = CuVector{T}(undef, n)
+@inline AdaptiveArrayPools.acquire!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CuArray{T,N}(undef, dims)
+@inline AdaptiveArrayPools.acquire!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = CuArray{T,N}(undef, dims)
+@inline AdaptiveArrayPools.acquire!(::DisabledPool{:cuda}, x::CuArray) = CUDA.similar(x)
+@inline AdaptiveArrayPools.acquire!(::DisabledPool{:cuda}, x::AbstractArray) = CuArray{eltype(x)}(undef, size(x))
+
+# --- unsafe_acquire! for DisabledPool{:cuda} ---
+@inline AdaptiveArrayPools.unsafe_acquire!(::DisabledPool{:cuda}, ::Type{T}, n::Int) where {T} = CuVector{T}(undef, n)
+@inline AdaptiveArrayPools.unsafe_acquire!(::DisabledPool{:cuda}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = CuArray{T,N}(undef, dims)
+@inline AdaptiveArrayPools.unsafe_acquire!(::DisabledPool{:cuda}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = CuArray{T,N}(undef, dims)
+@inline AdaptiveArrayPools.unsafe_acquire!(::DisabledPool{:cuda}, x::CuArray) = CUDA.similar(x)
+@inline AdaptiveArrayPools.unsafe_acquire!(::DisabledPool{:cuda}, x::AbstractArray) = CuArray{eltype(x)}(undef, size(x))
diff --git a/src/AdaptiveArrayPools.jl b/src/AdaptiveArrayPools.jl
index cf3cdee..b28aa2e 100644
--- a/src/AdaptiveArrayPools.jl
+++ b/src/AdaptiveArrayPools.jl
@@ -15,6 +15,7 @@ export get_task_local_cuda_pool, get_task_local_cuda_pools  # CUDA (stubs, overr
 
 # Extension API (for GPU backends)
 export AbstractTypedPool, AbstractArrayPool  # For subtyping
+export DisabledPool, DISABLED_CPU, pooling_enabled  # Disabled pool support
 # Note: Extensions add methods to _get_pool_for_backend(::Val{:backend}) directly
 
 # Core data structures
diff --git a/src/acquire.jl b/src/acquire.jl
index 6510ac4..2a23768 100644
--- a/src/acquire.jl
+++ b/src/acquire.jl
@@ -441,3 +441,27 @@ const acquire_array! = unsafe_acquire!
 # Internal implementation aliases (for macro transformation)
 const _acquire_view_impl! = _acquire_impl!
 const _acquire_array_impl! = _unsafe_acquire_impl!
+
+# ==============================================================================
+# DisabledPool Acquire Fallbacks (pooling disabled with backend context)
+# ==============================================================================
+
+# --- acquire! for DisabledPool{:cpu} ---
+@inline acquire!(::DisabledPool{:cpu}, ::Type{T}, n::Int) where {T} = Vector{T}(undef, n)
+@inline acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = Array{T,N}(undef, dims)
+@inline acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = Array{T,N}(undef, dims)
+@inline acquire!(::DisabledPool{:cpu}, x::AbstractArray) = similar(x)
+
+# --- unsafe_acquire! for DisabledPool{:cpu} ---
+@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, n::Int) where {T} = Vector{T}(undef, n)
+@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = Array{T,N}(undef, dims)
+@inline unsafe_acquire!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = Array{T,N}(undef, dims)
+@inline unsafe_acquire!(::DisabledPool{:cpu}, x::AbstractArray) = similar(x)
+
+# --- Generic DisabledPool fallbacks (unknown backend → error) ---
+@inline acquire!(p::DisabledPool{B}, args...) where {B} = _throw_backend_not_loaded(B)
+@inline unsafe_acquire!(p::DisabledPool{B}, args...) where {B} = _throw_backend_not_loaded(B)
+
+# --- _impl! delegators for DisabledPool (macro transformation support) ---
+@inline _acquire_impl!(p::DisabledPool, args...) = acquire!(p, args...)
+@inline _unsafe_acquire_impl!(p::DisabledPool, args...) = unsafe_acquire!(p, args...)
diff --git a/src/convenience.jl b/src/convenience.jl
index 796de64..ee6794a 100644
--- a/src/convenience.jl
+++ b/src/convenience.jl
@@ -29,12 +29,13 @@ default_eltype(::AbstractArrayPool) = Float64
 Acquire a zero-initialized array from the pool.
 
 Equivalent to `acquire!(pool, T, dims...)` followed by `fill!(arr, zero(T))`.
-Default element type is `Float64` when not specified.
+Default element type depends on pool backend (CPU: `Float64`, CUDA: `Float32`).
+See [`default_eltype`](@ref).
 
 ## Example
 ```julia
 @with_pool pool begin
-    v = zeros!(pool, 100)              # Vector{Float64} view, all zeros
+    v = zeros!(pool, 100)              # Uses default_eltype(pool)
     m = zeros!(pool, Float32, 10, 10)  # Matrix{Float32} view, all zeros
 end
 ```
@@ -48,7 +49,7 @@ end
 
 @inline function zeros!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
     _mark_untracked!(pool)
-    _zeros_impl!(pool, Float64, dims...)
+    _zeros_impl!(pool, default_eltype(pool), dims...)
 end
 
 @inline function zeros!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N}
@@ -58,7 +59,7 @@ end
 
 @inline function zeros!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N}
     _mark_untracked!(pool)
-    _zeros_impl!(pool, Float64, dims...)
+    _zeros_impl!(pool, default_eltype(pool), dims...)
 end
 
 # Internal implementation (for macro transformation)
@@ -73,11 +74,11 @@ end
     _zeros_impl!(pool, default_eltype(pool), dims...)
 end
 
-# Nothing fallback (pooling disabled)
+# Nothing fallback (pooling disabled - uses Julia's default Float64)
 @inline zeros!(::Nothing, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = zeros(T, dims...)
-@inline zeros!(::Nothing, dims::Vararg{Int,N}) where {N} = zeros(Float64, dims...)
+@inline zeros!(::Nothing, dims::Vararg{Int,N}) where {N} = zeros(dims...)
 @inline zeros!(::Nothing, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = zeros(T, dims...)
-@inline zeros!(::Nothing, dims::NTuple{N,Int}) where {N} = zeros(Float64, dims...)
+@inline zeros!(::Nothing, dims::NTuple{N,Int}) where {N} = zeros(dims...)
 
 # ==============================================================================
 # ones! - Acquire one-initialized arrays from pool
@@ -92,12 +93,13 @@ end
 Acquire a one-initialized array from the pool.
 
 Equivalent to `acquire!(pool, T, dims...)` followed by `fill!(arr, one(T))`.
-Default element type is `Float64` when not specified.
+Default element type depends on pool backend (CPU: `Float64`, CUDA: `Float32`).
+See [`default_eltype`](@ref).
 
 ## Example
 ```julia
 @with_pool pool begin
-    v = ones!(pool, 100)              # Vector{Float64} view, all ones
+    v = ones!(pool, 100)              # Uses default_eltype(pool)
     m = ones!(pool, Float32, 10, 10)  # Matrix{Float32} view, all ones
 end
 ```
@@ -111,7 +113,7 @@ end
 
 @inline function ones!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
     _mark_untracked!(pool)
-    _ones_impl!(pool, Float64, dims...)
+    _ones_impl!(pool, default_eltype(pool), dims...)
 end
 
 @inline function ones!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N}
@@ -121,7 +123,7 @@ end
 
 @inline function ones!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N}
     _mark_untracked!(pool)
-    _ones_impl!(pool, Float64, dims...)
+    _ones_impl!(pool, default_eltype(pool), dims...)
 end
 
 # Internal implementation (for macro transformation)
@@ -136,11 +138,11 @@ end
     _ones_impl!(pool, default_eltype(pool), dims...)
 end
 
-# Nothing fallback (pooling disabled)
+# Nothing fallback (pooling disabled - uses Julia's default Float64)
 @inline ones!(::Nothing, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = ones(T, dims...)
-@inline ones!(::Nothing, dims::Vararg{Int,N}) where {N} = ones(Float64, dims...)
+@inline ones!(::Nothing, dims::Vararg{Int,N}) where {N} = ones(dims...)
 @inline ones!(::Nothing, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = ones(T, dims...)
-@inline ones!(::Nothing, dims::NTuple{N,Int}) where {N} = ones(Float64, dims...)
+@inline ones!(::Nothing, dims::NTuple{N,Int}) where {N} = ones(dims...)
 
 # ==============================================================================
 # similar! - Acquire arrays with same type/size as template
@@ -228,12 +230,13 @@ end
 Acquire a zero-initialized raw array (not a view) from the pool.
 
 Equivalent to `unsafe_acquire!(pool, T, dims...)` followed by `fill!(arr, zero(T))`.
-Default element type is `Float64` when not specified.
+Default element type depends on pool backend (CPU: `Float64`, CUDA: `Float32`).
+See [`default_eltype`](@ref).
 
 ## Example
 ```julia
 @with_pool pool begin
-    v = unsafe_zeros!(pool, 100)              # Array{Float64}, all zeros
+    v = unsafe_zeros!(pool, 100)              # Uses default_eltype(pool)
     m = unsafe_zeros!(pool, Float32, 10, 10)  # Array{Float32}, all zeros
 end
 ```
@@ -247,7 +250,7 @@ end
 
 @inline function unsafe_zeros!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
     _mark_untracked!(pool)
-    _unsafe_zeros_impl!(pool, Float64, dims...)
+    _unsafe_zeros_impl!(pool, default_eltype(pool), dims...)
 end
 
 @inline function unsafe_zeros!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N}
@@ -257,7 +260,7 @@ end
 
 @inline function unsafe_zeros!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N}
     _mark_untracked!(pool)
-    _unsafe_zeros_impl!(pool, Float64, dims...)
+    _unsafe_zeros_impl!(pool, default_eltype(pool), dims...)
 end
 
 # Internal implementation (for macro transformation)
@@ -272,11 +275,11 @@ end
     _unsafe_zeros_impl!(pool, default_eltype(pool), dims...)
 end
 
-# Nothing fallback (pooling disabled)
+# Nothing fallback (pooling disabled - uses Julia's default Float64)
 @inline unsafe_zeros!(::Nothing, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = zeros(T, dims...)
-@inline unsafe_zeros!(::Nothing, dims::Vararg{Int,N}) where {N} = zeros(Float64, dims...)
+@inline unsafe_zeros!(::Nothing, dims::Vararg{Int,N}) where {N} = zeros(dims...)
 @inline unsafe_zeros!(::Nothing, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = zeros(T, dims...)
-@inline unsafe_zeros!(::Nothing, dims::NTuple{N,Int}) where {N} = zeros(Float64, dims...)
+@inline unsafe_zeros!(::Nothing, dims::NTuple{N,Int}) where {N} = zeros(dims...)
 
 # ==============================================================================
 # unsafe_ones! - Acquire one-initialized raw arrays from pool
@@ -291,12 +294,13 @@ end
 Acquire a one-initialized raw array (not a view) from the pool.
 
 Equivalent to `unsafe_acquire!(pool, T, dims...)` followed by `fill!(arr, one(T))`.
-Default element type is `Float64` when not specified.
+Default element type depends on pool backend (CPU: `Float64`, CUDA: `Float32`).
+See [`default_eltype`](@ref).
 
 ## Example
 ```julia
 @with_pool pool begin
-    v = unsafe_ones!(pool, 100)              # Array{Float64}, all ones
+    v = unsafe_ones!(pool, 100)              # Uses default_eltype(pool)
     m = unsafe_ones!(pool, Float32, 10, 10)  # Array{Float32}, all ones
 end
 ```
@@ -310,7 +314,7 @@ end
 
 @inline function unsafe_ones!(pool::AbstractArrayPool, dims::Vararg{Int,N}) where {N}
     _mark_untracked!(pool)
-    _unsafe_ones_impl!(pool, Float64, dims...)
+    _unsafe_ones_impl!(pool, default_eltype(pool), dims...)
 end
 
 @inline function unsafe_ones!(pool::AbstractArrayPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N}
@@ -320,7 +324,7 @@ end
 
 @inline function unsafe_ones!(pool::AbstractArrayPool, dims::NTuple{N,Int}) where {N}
     _mark_untracked!(pool)
-    _unsafe_ones_impl!(pool, Float64, dims...)
+    _unsafe_ones_impl!(pool, default_eltype(pool), dims...)
 end
 
 # Internal implementation (for macro transformation)
@@ -335,11 +339,11 @@ end
     _unsafe_ones_impl!(pool, default_eltype(pool), dims...)
 end
 
-# Nothing fallback (pooling disabled)
+# Nothing fallback (pooling disabled - uses Julia's default Float64)
 @inline unsafe_ones!(::Nothing, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = ones(T, dims...)
-@inline unsafe_ones!(::Nothing, dims::Vararg{Int,N}) where {N} = ones(Float64, dims...)
+@inline unsafe_ones!(::Nothing, dims::Vararg{Int,N}) where {N} = ones(dims...)
 @inline unsafe_ones!(::Nothing, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = ones(T, dims...)
-@inline unsafe_ones!(::Nothing, dims::NTuple{N,Int}) where {N} = ones(Float64, dims...)
+@inline unsafe_ones!(::Nothing, dims::NTuple{N,Int}) where {N} = ones(dims...)
 
 # ==============================================================================
 # unsafe_similar! - Acquire raw arrays with same type/size as template
@@ -412,3 +416,108 @@ end
 @inline unsafe_similar!(::Nothing, x::AbstractArray, ::Type{T}) where {T} = similar(x, T)
 @inline unsafe_similar!(::Nothing, x::AbstractArray, dims::Vararg{Int,N}) where {N} = similar(x, dims...)
 @inline unsafe_similar!(::Nothing, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = similar(x, T, dims...)
+
+# ==============================================================================
+# BackendNotLoadedError - Error for unknown backends
+# ==============================================================================
+
+"""
+    BackendNotLoadedError <: Exception
+
+Error thrown when a backend-specific operation is attempted but the backend
+package is not loaded.
+
+## Example
+```julia
+@maybe_with_pool :cuda pool begin
+    zeros!(pool, 10)  # Throws if CUDA.jl not loaded
+end
+```
+"""
+struct BackendNotLoadedError <: Exception
+    backend::Symbol
+end
+
+function Base.showerror(io::IO, e::BackendNotLoadedError)
+    print(io, "Backend :$(e.backend) is not available. ")
+    if e.backend == :cuda
+        print(io, "Make sure CUDA.jl is loaded: `using CUDA`")
+    else
+        print(io, "Make sure the appropriate backend package is loaded.")
+    end
+end
+
+# ==============================================================================
+# DisabledPool Fallbacks (pooling disabled with backend context)
+# ==============================================================================
+
+# --- Default Element Type ---
+"""
+    default_eltype(::DisabledPool{:cpu}) -> Float64
+
+Default element type for disabled CPU pools (matches Julia's `zeros()` default).
+"""
+default_eltype(::DisabledPool{:cpu}) = Float64
+
+# --- Generic Backend Fallback (throws error) ---
+# Catches DisabledPool{:unknown_backend} and similar unhandled backends
+@noinline function _throw_backend_not_loaded(backend::Symbol)
+    throw(BackendNotLoadedError(backend))
+end
+
+# --- zeros! for DisabledPool{:cpu} ---
+@inline zeros!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = zeros(T, dims...)
+@inline zeros!(p::DisabledPool{:cpu}, dims::Vararg{Int,N}) where {N} = zeros(default_eltype(p), dims...)
+@inline zeros!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = zeros(T, dims...)
+@inline zeros!(p::DisabledPool{:cpu}, dims::NTuple{N,Int}) where {N} = zeros(default_eltype(p), dims...)
+
+# --- ones! for DisabledPool{:cpu} ---
+@inline ones!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = ones(T, dims...)
+@inline ones!(p::DisabledPool{:cpu}, dims::Vararg{Int,N}) where {N} = ones(default_eltype(p), dims...)
+@inline ones!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = ones(T, dims...)
+@inline ones!(p::DisabledPool{:cpu}, dims::NTuple{N,Int}) where {N} = ones(default_eltype(p), dims...)
+
+# --- similar! for DisabledPool{:cpu} ---
+@inline similar!(::DisabledPool{:cpu}, x::AbstractArray) = similar(x)
+@inline similar!(::DisabledPool{:cpu}, x::AbstractArray, ::Type{T}) where {T} = similar(x, T)
+@inline similar!(::DisabledPool{:cpu}, x::AbstractArray, dims::Vararg{Int,N}) where {N} = similar(x, dims...)
+@inline similar!(::DisabledPool{:cpu}, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = similar(x, T, dims...)
+
+# --- unsafe_zeros! for DisabledPool{:cpu} ---
+@inline unsafe_zeros!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = zeros(T, dims...)
+@inline unsafe_zeros!(p::DisabledPool{:cpu}, dims::Vararg{Int,N}) where {N} = zeros(default_eltype(p), dims...)
+@inline unsafe_zeros!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = zeros(T, dims...)
+@inline unsafe_zeros!(p::DisabledPool{:cpu}, dims::NTuple{N,Int}) where {N} = zeros(default_eltype(p), dims...)
+
+# --- unsafe_ones! for DisabledPool{:cpu} ---
+@inline unsafe_ones!(::DisabledPool{:cpu}, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = ones(T, dims...)
+@inline unsafe_ones!(p::DisabledPool{:cpu}, dims::Vararg{Int,N}) where {N} = ones(default_eltype(p), dims...)
+@inline unsafe_ones!(::DisabledPool{:cpu}, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = ones(T, dims...)
+@inline unsafe_ones!(p::DisabledPool{:cpu}, dims::NTuple{N,Int}) where {N} = ones(default_eltype(p), dims...)
+
+# --- unsafe_similar! for DisabledPool{:cpu} ---
+@inline unsafe_similar!(::DisabledPool{:cpu}, x::AbstractArray) = similar(x)
+@inline unsafe_similar!(::DisabledPool{:cpu}, x::AbstractArray, ::Type{T}) where {T} = similar(x, T)
+@inline unsafe_similar!(::DisabledPool{:cpu}, x::AbstractArray, dims::Vararg{Int,N}) where {N} = similar(x, dims...)
+@inline unsafe_similar!(::DisabledPool{:cpu}, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = similar(x, T, dims...)
+
+# --- Generic DisabledPool fallbacks (unknown backend → error) ---
+@inline zeros!(p::DisabledPool{B}, args...) where {B} = _throw_backend_not_loaded(B)
+@inline ones!(p::DisabledPool{B}, args...) where {B} = _throw_backend_not_loaded(B)
+@inline similar!(p::DisabledPool{B}, args...) where {B} = _throw_backend_not_loaded(B)
+@inline unsafe_zeros!(p::DisabledPool{B}, args...) where {B} = _throw_backend_not_loaded(B)
+@inline unsafe_ones!(p::DisabledPool{B}, args...) where {B} = _throw_backend_not_loaded(B)
+@inline unsafe_similar!(p::DisabledPool{B}, args...) where {B} = _throw_backend_not_loaded(B)
+
+# ==============================================================================
+# _impl! Delegators for DisabledPool
+# ==============================================================================
+# When macros transform zeros!(pool, ...) → _zeros_impl!(pool, ...),
+# DisabledPool needs to delegate back to the public API.
+
+@inline _zeros_impl!(p::DisabledPool, args...) = zeros!(p, args...)
+@inline _ones_impl!(p::DisabledPool, args...) = ones!(p, args...)
+@inline _similar_impl!(p::DisabledPool, args...) = similar!(p, args...)
+@inline _unsafe_zeros_impl!(p::DisabledPool, args...) = unsafe_zeros!(p, args...)
+@inline _unsafe_ones_impl!(p::DisabledPool, args...) = unsafe_ones!(p, args...)
+@inline _unsafe_similar_impl!(p::DisabledPool, args...) = unsafe_similar!(p, args...)
diff --git a/src/macros.jl b/src/macros.jl
index aa4d64b..ea09e08 100644
--- a/src/macros.jl
+++ b/src/macros.jl
@@ -161,19 +161,48 @@ macro maybe_with_pool(expr)
     _generate_pool_code(pool_name, expr, false)
 end
 
+# Backend-specific variants: @maybe_with_pool :cuda pool begin ... end
+macro maybe_with_pool(backend::QuoteNode, pool_name, expr)
+    _generate_pool_code_with_backend(backend.value, pool_name, expr, false)
+end
+
+macro maybe_with_pool(backend::QuoteNode, expr)
+    pool_name = gensym(:pool)
+    _generate_pool_code_with_backend(backend.value, pool_name, expr, false)
+end
+
+# ==============================================================================
+# Internal: DisabledPool Expression Generator
+# ==============================================================================
+
+"""
+    _disabled_pool_expr(backend::Symbol) -> Expr
+
+Generate expression for DisabledPool singleton based on backend.
+Used when pooling is disabled to preserve backend context.
+"""
+function _disabled_pool_expr(backend::Symbol)
+    if backend == :cpu
+        :($DISABLED_CPU)
+    else
+        :($(DisabledPool{backend}()))
+    end
+end
+
 # ==============================================================================
 # Internal: Code Generation
 # ==============================================================================
 
 function _generate_pool_code(pool_name, expr, force_enable)
-    # Compile-time check: if pooling disabled, just run expr with pool=nothing
+    # Compile-time check: if pooling disabled, use DisabledPool to preserve backend context
     if !USE_POOLING
+        disabled_pool = _disabled_pool_expr(:cpu)
         if Meta.isexpr(expr, [:function, :(=)]) && _is_function_def(expr)
-            # Function definition: inject local pool = nothing at start of body
-            return _generate_function_pool_code(pool_name, expr, force_enable, true)
+            # Function definition: inject local pool = DisabledPool at start of body
+            return _generate_function_pool_code(pool_name, expr, force_enable, true, :cpu)
         else
             return quote
-                local $(esc(pool_name)) = $(nothing)
+                local $(esc(pool_name)) = $disabled_pool
                 $(esc(expr))
             end
         end
@@ -256,7 +285,7 @@ function _generate_pool_code(pool_name, expr, force_enable)
                     $rewind_call
                 end
             else
-                local $(esc(pool_name)) = $(nothing)
+                local $(esc(pool_name)) = $DISABLED_CPU
                 $(esc(expr))
             end
         end
@@ -276,13 +305,72 @@ Uses `_get_pool_for_backend(Val{backend}())` for zero-overhead dispatch.
 Includes type-specific checkpoint/rewind optimization (same as regular @with_pool).
 """
 function _generate_pool_code_with_backend(backend::Symbol, pool_name, expr, force_enable::Bool)
-    # Compile-time check: if pooling disabled, just run expr with pool=nothing
+    # Compile-time check: if pooling disabled, use DisabledPool to preserve backend context
     if !USE_POOLING
+        disabled_pool = _disabled_pool_expr(backend)
         if Meta.isexpr(expr, [:function, :(=)]) && _is_function_def(expr)
             return _generate_function_pool_code_with_backend(backend, pool_name, expr, true)
         else
             return quote
-                local $(esc(pool_name)) = $(nothing)
+                local $(esc(pool_name)) = $disabled_pool
+                $(esc(expr))
+            end
+        end
+    end
+
+    # Runtime check for @maybe_with_pool :backend (force_enable=false)
+    if !force_enable
+        disabled_pool = _disabled_pool_expr(backend)
+        # Check if function definition
+        if Meta.isexpr(expr, [:function, :(=)]) && _is_function_def(expr)
+            return _generate_function_pool_code_with_backend(backend, pool_name, expr, false)
+        end
+
+        # Block logic with runtime check
+        all_types = _extract_acquire_types(expr, pool_name)
+        local_vars = _extract_local_assignments(expr)
+        static_types, has_dynamic = _filter_static_types(all_types, local_vars)
+        use_typed = !has_dynamic && !isempty(static_types)
+        transformed_expr = _transform_acquire_calls(expr, pool_name)
+        pool_getter = :($_get_pool_for_backend($(Val{backend}())))
+
+        if use_typed
+            typed_checkpoint_call = _generate_typed_checkpoint_call(esc(pool_name), static_types)
+            checkpoint_call = quote
+                if @inbounds $(esc(pool_name))._untracked_flags[$(esc(pool_name))._current_depth]
+                    $checkpoint!($(esc(pool_name)))
+                else
+                    $typed_checkpoint_call
+                end
+            end
+            typed_rewind_call = _generate_typed_rewind_call(esc(pool_name), static_types)
+            rewind_call = quote
+                if @inbounds $(esc(pool_name))._untracked_flags[$(esc(pool_name))._current_depth]
+                    $rewind!($(esc(pool_name)))
+                else
+                    $typed_rewind_call
+                end
+            end
+        else
+            checkpoint_call = :($checkpoint!($(esc(pool_name))))
+            rewind_call = :($rewind!($(esc(pool_name))))
+        end
+
+        return quote
+            if $MAYBE_POOLING_ENABLED[]
+                local $(esc(pool_name)) = $pool_getter
+                $checkpoint_call
+                try
+                    local _result = $(esc(transformed_expr))
+                    if $POOL_DEBUG[]
+                        $_validate_pool_return(_result, $(esc(pool_name)))
+                    end
+                    _result
+                finally
+                    $rewind_call
+                end
+            else
+                local $(esc(pool_name)) = $disabled_pool
                 $(esc(expr))
             end
         end
@@ -362,8 +450,9 @@ function _generate_function_pool_code_with_backend(backend::Symbol, pool_name, f
     body = func_def.args[2]
 
     if disable_pooling
+        disabled_pool = _disabled_pool_expr(backend)
         new_body = quote
-            local $(esc(pool_name)) = $(nothing)
+            local $(esc(pool_name)) = $disabled_pool
             $(esc(body))
         end
         return Expr(def_head, esc(call_expr), new_body)
@@ -422,14 +511,15 @@ function _generate_function_pool_code_with_backend(backend::Symbol, pool_name, f
     return Expr(def_head, esc(call_expr), new_body)
 end
 
-function _generate_function_pool_code(pool_name, func_def, force_enable, disable_pooling)
+function _generate_function_pool_code(pool_name, func_def, force_enable, disable_pooling, backend::Symbol=:cpu)
     def_head = func_def.head
     call_expr = func_def.args[1]
     body = func_def.args[2]
 
     if disable_pooling
+        disabled_pool = _disabled_pool_expr(backend)
         new_body = quote
-            local $(esc(pool_name)) = $(nothing)
+            local $(esc(pool_name)) = $disabled_pool
             $(esc(body))
         end
         return Expr(def_head, esc(call_expr), new_body)
@@ -484,6 +574,7 @@ function _generate_function_pool_code(pool_name, func_def, force_enable, disable
             end
         end
     else
+        disabled_pool = _disabled_pool_expr(backend)
         new_body = quote
             if $MAYBE_POOLING_ENABLED[]
                 local $(esc(pool_name)) = get_task_local_pool()
@@ -494,7 +585,7 @@ function _generate_function_pool_code(pool_name, func_def, force_enable, disable
                     $rewind_call
                 end
             else
-                local $(esc(pool_name)) = $(nothing)
+                local $(esc(pool_name)) = $disabled_pool
                 $(esc(body))
             end
         end
diff --git a/src/state.jl b/src/state.jl
index 34455cf..df75934 100644
--- a/src/state.jl
+++ b/src/state.jl
@@ -389,3 +389,22 @@ end
 reset!(::Nothing) = nothing
 reset!(::Nothing, ::Type) = nothing
 reset!(::Nothing, types::Type...) = nothing
+
+# ==============================================================================
+# DisabledPool State Management (no-ops)
+# ==============================================================================
+# DisabledPool doesn't track state, so all operations are no-ops.
+
+checkpoint!(::DisabledPool) = nothing
+checkpoint!(::DisabledPool, ::Type) = nothing
+checkpoint!(::DisabledPool, types::Type...) = nothing
+
+rewind!(::DisabledPool) = nothing
+rewind!(::DisabledPool, ::Type) = nothing
+rewind!(::DisabledPool, types::Type...) = nothing
+
+reset!(::DisabledPool) = nothing
+reset!(::DisabledPool, ::Type) = nothing
+reset!(::DisabledPool, types::Type...) = nothing
+
+Base.empty!(::DisabledPool) = nothing
diff --git a/src/types.jl b/src/types.jl
index 06bee76..77f2052 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -79,6 +79,65 @@ Abstract base for multi-type array pools.
 """
 abstract type AbstractArrayPool end
 
+# ==============================================================================
+# Disabled Pool Sentinel Types
+# ==============================================================================
+
+"""
+    DisabledPool{Backend}
+
+Sentinel type for disabled pooling that preserves backend context.
+When `USE_POOLING=false` (compile-time) or `MAYBE_POOLING_ENABLED[]=false` (runtime),
+macros return `DisabledPool{backend}()` instead of `nothing`.
+
+Backend symbols:
+- `:cpu` - Standard Julia arrays
+- `:cuda` - CUDA.jl CuArrays (defined in extension)
+
+This enables `@with_pool :cuda` to return correct array types even when pooling is off.
+
+## Example
+```julia
+# When USE_POOLING=false:
+@with_pool :cuda pool begin
+    v = zeros!(pool, 10)  # Returns CuArray{Float32}, not Array{Float64}!
+end
+```
+
+See also: [`pooling_enabled`](@ref), [`DISABLED_CPU`](@ref)
+"""
+struct DisabledPool{Backend} end
+
+"""
+    DISABLED_CPU
+
+Singleton instance for disabled CPU pooling.
+Used by macros when `USE_POOLING=false` without backend specification.
+"""
+const DISABLED_CPU = DisabledPool{:cpu}()
+
+"""
+    pooling_enabled(pool) -> Bool
+
+Returns `true` if `pool` is an active pool, `false` if pooling is disabled.
+
+## Examples
+```julia
+@maybe_with_pool pool begin
+    if pooling_enabled(pool)
+        # Using pooled memory
+    else
+        # Using standard allocation
+    end
+end
+```
+
+See also: [`DisabledPool`](@ref)
+"""
+pooling_enabled(::AbstractArrayPool) = true
+pooling_enabled(::DisabledPool) = false
+pooling_enabled(::Nothing) = false  # Backward compatibility
+
 # ==============================================================================
 # Core Data Structures
 # ==============================================================================
diff --git a/test/test_disabled_pooling.jl b/test/test_disabled_pooling.jl
index da4efe8..cc92621 100644
--- a/test/test_disabled_pooling.jl
+++ b/test/test_disabled_pooling.jl
@@ -21,9 +21,10 @@
     @test USE_POOLING == false
     println("USE_POOLING = ", USE_POOLING)
 
-    # Test @with_pool block mode - should set pool=nothing
+    # Test @with_pool block mode - should set pool=DisabledPool{:cpu}
     result1 = @with_pool pool begin
-        @test pool === nothing
+        @test pool isa DisabledPool{:cpu}
+        @test !pooling_enabled(pool)
         v = acquire!(pool, Float64, 10)  # fallback to normal allocation
         @test v isa Vector{Float64}
         @test length(v) == 10
@@ -33,9 +34,10 @@
     @test result1 == 10.0
     println("@with_pool block mode: PASS")
 
-    # Test @maybe_with_pool block mode - should also set pool=nothing
+    # Test @maybe_with_pool block mode - should also set pool=DisabledPool{:cpu}
     result2 = @maybe_with_pool pool begin
-        @test pool === nothing
+        @test pool isa DisabledPool{:cpu}
+        @test !pooling_enabled(pool)
         v = acquire!(pool, Float64, 5)
         @test v isa Vector{Float64}
         v .= 4.0
diff --git a/test/test_macro_expansion.jl b/test/test_macro_expansion.jl
index 520597c..e68c303 100644
--- a/test/test_macro_expansion.jl
+++ b/test/test_macro_expansion.jl
@@ -43,9 +43,9 @@
             # Should contain conditional check (MAYBE_POOLING_ENABLED is inlined as RefValue)
             @test occursin("RefValue", expr_str) || occursin("if", expr_str)
 
-            # Should have both branches (pool and nothing)
+            # Should have both branches (pool getter and DisabledPool fallback)
             @test occursin("get_task_local_pool", expr_str)
-            @test occursin("nothing", expr_str)
+            @test occursin("DisabledPool", expr_str)
         end
 
         # Test typed checkpoint optimization
diff --git a/test/test_macros.jl b/test/test_macros.jl
index 190a5ae..606c3f4 100644
--- a/test/test_macros.jl
+++ b/test/test_macros.jl
@@ -130,7 +130,8 @@ import AdaptiveArrayPools: checkpoint!, rewind!
         MAYBE_POOLING_ENABLED[] = false
 
         result = @maybe_with_pool pool begin
-            @test pool === nothing
+            @test pool isa DisabledPool{:cpu}
+            @test !pooling_enabled(pool)
             v = acquire!(pool, Float64, 10)  # Falls back to normal allocation
             @test v isa Vector{Float64}
             v .= 4.0
@@ -242,7 +243,8 @@ import AdaptiveArrayPools: checkpoint!, rewind!
         MAYBE_POOLING_ENABLED[] = false
 
         @maybe_with_pool p2 maybe_short_disabled(n) = begin
-            @test p2 === nothing
+            @test p2 isa DisabledPool{:cpu}
+            @test !pooling_enabled(p2)
             v = acquire!(p2, Float64, n)  # Falls back to allocation
             v .= 1.0
             sum(v)
diff --git a/test/test_task_local_pool.jl b/test/test_task_local_pool.jl
index 704492e..8a8645b 100644
--- a/test/test_task_local_pool.jl
+++ b/test/test_task_local_pool.jl
@@ -70,9 +70,9 @@
         end
         @test res == 20.0
 
-        # Verify pool was nothing (fallback allocation used)
+        # Verify pool is DisabledPool (fallback allocation used)
         result_type = @maybe_with_pool pool begin
-            pool === nothing
+            pool isa DisabledPool{:cpu} && !pooling_enabled(pool)
         end
         @test result_type == true
 

From 86a647af7a861f60284e812542adc993b70ffeea Mon Sep 17 00:00:00 2001
From: Min-Gu Yoo <mgyoo86@gmail.com>
Date: Tue, 16 Dec 2025 15:05:05 -0800
Subject: [PATCH 15/20] refactor(api): remove Nothing fallbacks in favor of
 DisabledPool

Remove all ::Nothing method fallbacks since macros now exclusively
use DisabledPool{backend}() when pooling is disabled.

Changes:
- Remove zeros!/ones!/similar! Nothing fallbacks from convenience.jl
- Remove unsafe_zeros!/unsafe_ones!/unsafe_similar! Nothing fallbacks
- Remove acquire!/unsafe_acquire! Nothing fallbacks from acquire.jl
- Remove checkpoint!/rewind!/reset!/empty! Nothing fallbacks from state.jl
- Remove pooling_enabled(::Nothing) from types.jl
- Replace _validate_pool_return(::Nothing) with ::DisabledPool in utils.jl
- Update all tests to use DISABLED_CPU instead of nothing

This enforces a consistent API pattern: DisabledPool{B} is the only
way to represent disabled pooling, eliminating confusion for extension
developers implementing new backends.
---
 src/acquire.jl                | 30 ------------------------
 src/convenience.jl            | 36 ----------------------------
 src/state.jl                  | 14 -----------
 src/types.jl                  |  1 -
 src/utils.jl                  |  2 +-
 test/test_aliases.jl          | 12 +++++-----
 test/test_basic.jl            |  6 ++---
 test/test_convenience.jl      | 44 +++++++++++++++++------------------
 test/test_multidimensional.jl | 16 ++++++-------
 test/test_state.jl            | 42 ++++++++++++++++-----------------
 test/test_utils.jl            |  6 ++---
 11 files changed, 64 insertions(+), 145 deletions(-)

diff --git a/src/acquire.jl b/src/acquire.jl
index 2a23768..e45d0eb 100644
--- a/src/acquire.jl
+++ b/src/acquire.jl
@@ -279,19 +279,6 @@ end
     _acquire_impl!(pool, T, dims...)
 end
 
-# Fallback: When pool is `nothing` (e.g. pooling disabled), allocate normally
-@inline function acquire!(::Nothing, ::Type{T}, n::Int) where {T}
-    Vector{T}(undef, n)
-end
-
-@inline function acquire!(::Nothing, ::Type{T}, dims::Vararg{Int, N}) where {T, N}
-    Array{T, N}(undef, dims)
-end
-
-@inline function acquire!(::Nothing, ::Type{T}, dims::NTuple{N, Int}) where {T, N}
-    Array{T, N}(undef, dims)
-end
-
 # Similar-style convenience methods
 """
     acquire!(pool, x::AbstractArray) -> SubArray
@@ -312,8 +299,6 @@ end
     _acquire_impl!(pool, eltype(x), size(x))
 end
 
-@inline acquire!(::Nothing, x::AbstractArray) = similar(x)
-
 # ==============================================================================
 # Unsafe Acquisition API (Raw Arrays)
 # ==============================================================================
@@ -379,19 +364,6 @@ end
     _unsafe_acquire_impl!(pool, T, dims)
 end
 
-# Fallback: When pool is `nothing`, allocate normally
-@inline function unsafe_acquire!(::Nothing, ::Type{T}, n::Int) where {T}
-    Vector{T}(undef, n)
-end
-
-@inline function unsafe_acquire!(::Nothing, ::Type{T}, dims::Vararg{Int, N}) where {T, N}
-    Array{T, N}(undef, dims)
-end
-
-@inline function unsafe_acquire!(::Nothing, ::Type{T}, dims::NTuple{N, Int}) where {T, N}
-    Array{T, N}(undef, dims)
-end
-
 # Similar-style convenience methods
 """
     unsafe_acquire!(pool, x::AbstractArray) -> Array
@@ -412,8 +384,6 @@ end
     _unsafe_acquire_impl!(pool, eltype(x), size(x))
 end
 
-@inline unsafe_acquire!(::Nothing, x::AbstractArray) = similar(x)
-
 # ==============================================================================
 # API Aliases
 # ==============================================================================
diff --git a/src/convenience.jl b/src/convenience.jl
index ee6794a..bf7badc 100644
--- a/src/convenience.jl
+++ b/src/convenience.jl
@@ -74,12 +74,6 @@ end
     _zeros_impl!(pool, default_eltype(pool), dims...)
 end
 
-# Nothing fallback (pooling disabled - uses Julia's default Float64)
-@inline zeros!(::Nothing, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = zeros(T, dims...)
-@inline zeros!(::Nothing, dims::Vararg{Int,N}) where {N} = zeros(dims...)
-@inline zeros!(::Nothing, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = zeros(T, dims...)
-@inline zeros!(::Nothing, dims::NTuple{N,Int}) where {N} = zeros(dims...)
-
 # ==============================================================================
 # ones! - Acquire one-initialized arrays from pool
 # ==============================================================================
@@ -138,12 +132,6 @@ end
     _ones_impl!(pool, default_eltype(pool), dims...)
 end
 
-# Nothing fallback (pooling disabled - uses Julia's default Float64)
-@inline ones!(::Nothing, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = ones(T, dims...)
-@inline ones!(::Nothing, dims::Vararg{Int,N}) where {N} = ones(dims...)
-@inline ones!(::Nothing, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = ones(T, dims...)
-@inline ones!(::Nothing, dims::NTuple{N,Int}) where {N} = ones(dims...)
-
 # ==============================================================================
 # similar! - Acquire arrays with same type/size as template
 # ==============================================================================
@@ -211,12 +199,6 @@ end
     _acquire_impl!(pool, T, dims...)
 end
 
-# Nothing fallback (pooling disabled)
-@inline similar!(::Nothing, x::AbstractArray) = similar(x)
-@inline similar!(::Nothing, x::AbstractArray, ::Type{T}) where {T} = similar(x, T)
-@inline similar!(::Nothing, x::AbstractArray, dims::Vararg{Int,N}) where {N} = similar(x, dims...)
-@inline similar!(::Nothing, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = similar(x, T, dims...)
-
 # ==============================================================================
 # unsafe_zeros! - Acquire zero-initialized raw arrays from pool
 # ==============================================================================
@@ -275,12 +257,6 @@ end
     _unsafe_zeros_impl!(pool, default_eltype(pool), dims...)
 end
 
-# Nothing fallback (pooling disabled - uses Julia's default Float64)
-@inline unsafe_zeros!(::Nothing, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = zeros(T, dims...)
-@inline unsafe_zeros!(::Nothing, dims::Vararg{Int,N}) where {N} = zeros(dims...)
-@inline unsafe_zeros!(::Nothing, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = zeros(T, dims...)
-@inline unsafe_zeros!(::Nothing, dims::NTuple{N,Int}) where {N} = zeros(dims...)
-
 # ==============================================================================
 # unsafe_ones! - Acquire one-initialized raw arrays from pool
 # ==============================================================================
@@ -339,12 +315,6 @@ end
     _unsafe_ones_impl!(pool, default_eltype(pool), dims...)
 end
 
-# Nothing fallback (pooling disabled - uses Julia's default Float64)
-@inline unsafe_ones!(::Nothing, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = ones(T, dims...)
-@inline unsafe_ones!(::Nothing, dims::Vararg{Int,N}) where {N} = ones(dims...)
-@inline unsafe_ones!(::Nothing, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = ones(T, dims...)
-@inline unsafe_ones!(::Nothing, dims::NTuple{N,Int}) where {N} = ones(dims...)
-
 # ==============================================================================
 # unsafe_similar! - Acquire raw arrays with same type/size as template
 # ==============================================================================
@@ -411,12 +381,6 @@ end
     _unsafe_acquire_impl!(pool, T, dims...)
 end
 
-# Nothing fallback (pooling disabled)
-@inline unsafe_similar!(::Nothing, x::AbstractArray) = similar(x)
-@inline unsafe_similar!(::Nothing, x::AbstractArray, ::Type{T}) where {T} = similar(x, T)
-@inline unsafe_similar!(::Nothing, x::AbstractArray, dims::Vararg{Int,N}) where {N} = similar(x, dims...)
-@inline unsafe_similar!(::Nothing, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = similar(x, T, dims...)
-
 # ==============================================================================
 # BackendNotLoadedError - Error for unknown backends
 # ==============================================================================
diff --git a/src/state.jl b/src/state.jl
index df75934..9cb09ab 100644
--- a/src/state.jl
+++ b/src/state.jl
@@ -73,10 +73,6 @@ compile-time unrolling. Increments _current_depth once for all types.
     end
 end
 
-checkpoint!(::Nothing) = nothing
-checkpoint!(::Nothing, ::Type) = nothing
-checkpoint!(::Nothing, types::Type...) = nothing
-
 # Internal helper for checkpoint (works for any AbstractTypedPool)
 @inline function _checkpoint_typed_pool!(tp::AbstractTypedPool, depth::Int)
     push!(tp._checkpoint_n_active, tp.n_active)
@@ -177,10 +173,6 @@ Decrements _current_depth once after all types are rewound.
     end
 end
 
-rewind!(::Nothing) = nothing
-rewind!(::Nothing, ::Type) = nothing
-rewind!(::Nothing, types::Type...) = nothing
-
 # Internal helper for rewind with orphan cleanup (works for any AbstractTypedPool)
 # Uses 1-based sentinel pattern: no isempty checks needed (sentinel [0] guarantees non-empty)
 @inline function _rewind_typed_pool!(tp::AbstractTypedPool, current_depth::Int)
@@ -276,8 +268,6 @@ function Base.empty!(pool::AdaptiveArrayPool)
     return pool
 end
 
-Base.empty!(::Nothing) = nothing
-
 # ==============================================================================
 # State Management - reset!
 # ==============================================================================
@@ -386,10 +376,6 @@ See also: [`reset!(::AdaptiveArrayPool)`](@ref), [`rewind!`](@ref)
     end
 end
 
-reset!(::Nothing) = nothing
-reset!(::Nothing, ::Type) = nothing
-reset!(::Nothing, types::Type...) = nothing
-
 # ==============================================================================
 # DisabledPool State Management (no-ops)
 # ==============================================================================
diff --git a/src/types.jl b/src/types.jl
index 77f2052..9a3ffc7 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -136,7 +136,6 @@ See also: [`DisabledPool`](@ref)
 """
 pooling_enabled(::AbstractArrayPool) = true
 pooling_enabled(::DisabledPool) = false
-pooling_enabled(::Nothing) = false  # Backward compatibility
 
 # ==============================================================================
 # Core Data Structures
diff --git a/src/utils.jl b/src/utils.jl
index 7b4d5cc..7d6f7ec 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -73,7 +73,7 @@ function _check_pointer_overlap(arr::Array, pool::AdaptiveArrayPool)
     end
 end
 
-_validate_pool_return(val, ::Nothing) = nothing
+_validate_pool_return(val, ::DisabledPool) = nothing
 
 # ==============================================================================
 # Statistics & Pretty Printing
diff --git a/test/test_aliases.jl b/test/test_aliases.jl
index 0050835..9f53563 100644
--- a/test/test_aliases.jl
+++ b/test/test_aliases.jl
@@ -33,11 +33,11 @@ using AdaptiveArrayPools
 
         rewind!(pool)
 
-        # nothing fallback
-        v_nothing = acquire_view!(nothing, Float64, 10)
+        # DisabledPool fallback
+        v_nothing = acquire_view!(DISABLED_CPU, Float64, 10)
         @test v_nothing isa Vector{Float64}
 
-        m_nothing = acquire_view!(nothing, Float64, 5, 5)
+        m_nothing = acquire_view!(DISABLED_CPU, Float64, 5, 5)
         @test m_nothing isa Matrix{Float64}
     end
 
@@ -76,11 +76,11 @@ using AdaptiveArrayPools
 
         rewind!(pool)
 
-        # nothing fallback
-        v_nothing = acquire_array!(nothing, Float64, 10)
+        # DisabledPool fallback
+        v_nothing = acquire_array!(DISABLED_CPU, Float64, 10)
         @test v_nothing isa Vector{Float64}
 
-        m_nothing = acquire_array!(nothing, Float64, 5, 5)
+        m_nothing = acquire_array!(DISABLED_CPU, Float64, 5, 5)
         @test m_nothing isa Matrix{Float64}
     end
 
diff --git a/test/test_basic.jl b/test/test_basic.jl
index 541b840..3a42077 100644
--- a/test/test_basic.jl
+++ b/test/test_basic.jl
@@ -117,14 +117,14 @@
         @test true
     end
 
-    @testset "acquire! fallback (nothing)" begin
+    @testset "acquire! fallback (DISABLED_CPU)" begin
         # Without pool - returns Vector (allocation)
-        v3 = acquire!(nothing, Float64, 10)
+        v3 = acquire!(DISABLED_CPU, Float64, 10)
         @test v3 isa Vector{Float64}
         @test length(v3) == 10
 
         # Different types
-        v4 = acquire!(nothing, Int64, 5)
+        v4 = acquire!(DISABLED_CPU, Int64, 5)
         @test v4 isa Vector{Int64}
         @test length(v4) == 5
     end
diff --git a/test/test_convenience.jl b/test/test_convenience.jl
index b0b50e4..2616697 100644
--- a/test/test_convenience.jl
+++ b/test/test_convenience.jl
@@ -50,23 +50,23 @@
             @test eltype(m32) == Float32
         end
 
-        @testset "Nothing fallback" begin
-            v = zeros!(nothing, Float64, 10)
+        @testset "DisabledPool fallback" begin
+            v = zeros!(DISABLED_CPU, Float64, 10)
             @test v isa Array{Float64}
             @test length(v) == 10
             @test all(v .== 0.0)
 
-            v2 = zeros!(nothing, 5, 5)
+            v2 = zeros!(DISABLED_CPU, 5, 5)
             @test v2 isa Matrix{Float64}
             @test size(v2) == (5, 5)
 
             # NTuple fallbacks
             dims = (3, 4)
-            v3 = zeros!(nothing, Float32, dims)
+            v3 = zeros!(DISABLED_CPU, Float32, dims)
             @test v3 isa Array{Float32}
             @test size(v3) == dims
 
-            v4 = zeros!(nothing, dims)
+            v4 = zeros!(DISABLED_CPU, dims)
             @test v4 isa Array{Float64}
             @test size(v4) == dims
         end
@@ -120,24 +120,24 @@
             @test all(m32 .== 1.0f0)
         end
 
-        @testset "Nothing fallback" begin
-            v = ones!(nothing, Float64, 10)
+        @testset "DisabledPool fallback" begin
+            v = ones!(DISABLED_CPU, Float64, 10)
             @test v isa Array{Float64}
             @test length(v) == 10
             @test all(v .== 1.0)
 
             # Vararg without type
-            v2 = ones!(nothing, 5, 5)
+            v2 = ones!(DISABLED_CPU, 5, 5)
             @test v2 isa Matrix{Float64}
             @test size(v2) == (5, 5)
 
             # NTuple fallbacks
             dims = (3, 4)
-            v3 = ones!(nothing, Float32, dims)
+            v3 = ones!(DISABLED_CPU, Float32, dims)
             @test v3 isa Array{Float32}
             @test size(v3) == dims
 
-            v4 = ones!(nothing, dims)
+            v4 = ones!(DISABLED_CPU, dims)
             @test v4 isa Array{Float64}
             @test size(v4) == dims
         end
@@ -178,22 +178,22 @@
             @test eltype(v) == Float64
         end
 
-        @testset "Nothing fallback" begin
-            v = similar!(nothing, template)
+        @testset "DisabledPool fallback" begin
+            v = similar!(DISABLED_CPU, template)
             @test v isa Array{Float64}
             @test size(v) == size(template)
 
-            v2 = similar!(nothing, template, Int64)
+            v2 = similar!(DISABLED_CPU, template, Int64)
             @test v2 isa Array{Int64}
             @test size(v2) == size(template)
 
             # Vararg with different size (same type)
-            v3 = similar!(nothing, template, 5, 5)
+            v3 = similar!(DISABLED_CPU, template, 5, 5)
             @test v3 isa Array{Float64}
             @test size(v3) == (5, 5)
 
             # Vararg with different type and size
-            v4 = similar!(nothing, template, Int32, 3, 4)
+            v4 = similar!(DISABLED_CPU, template, Int32, 3, 4)
             @test v4 isa Array{Int32}
             @test size(v4) == (3, 4)
         end
@@ -289,8 +289,8 @@
             @test eltype(m32) == Float32
         end
 
-        @testset "Nothing fallback" begin
-            v = unsafe_zeros!(nothing, Float64, 10)
+        @testset "DisabledPool fallback" begin
+            v = unsafe_zeros!(DISABLED_CPU, Float64, 10)
             @test v isa Array{Float64}
             @test all(v .== 0.0)
         end
@@ -335,8 +335,8 @@
             @test all(m32 .== 1.0f0)
         end
 
-        @testset "Nothing fallback" begin
-            v = unsafe_ones!(nothing, Float64, 10)
+        @testset "DisabledPool fallback" begin
+            v = unsafe_ones!(DISABLED_CPU, Float64, 10)
             @test v isa Array{Float64}
             @test all(v .== 1.0)
         end
@@ -374,12 +374,12 @@
             @test size(v) == (3, 4)
         end
 
-        @testset "Nothing fallback" begin
-            v = unsafe_similar!(nothing, template)
+        @testset "DisabledPool fallback" begin
+            v = unsafe_similar!(DISABLED_CPU, template)
             @test v isa Array{Float64}
             @test size(v) == size(template)
 
-            v2 = unsafe_similar!(nothing, template, Int64)
+            v2 = unsafe_similar!(DISABLED_CPU, template, Int64)
             @test v2 isa Array{Int64}
         end
     end
diff --git a/test/test_multidimensional.jl b/test/test_multidimensional.jl
index 29dd7ab..dca6f29 100644
--- a/test/test_multidimensional.jl
+++ b/test/test_multidimensional.jl
@@ -36,7 +36,7 @@ using AdaptiveArrayPools: checkpoint!, rewind!
         @test size(mat3) == (10, 10)
 
         # Without pool (fallback)
-        mat_alloc = acquire!(nothing, Float64, 10, 10)
+        mat_alloc = acquire!(DISABLED_CPU, Float64, 10, 10)
         @test mat_alloc isa Array{Float64,2}
         @test size(mat_alloc) == (10, 10)
     end
@@ -59,7 +59,7 @@ using AdaptiveArrayPools: checkpoint!, rewind!
         @test tensor isa Base.ReshapedArray{Float64, 3}
 
         # Fallback with nothing
-        mat_alloc = acquire!(nothing, Float64, dims)
+        mat_alloc = acquire!(DISABLED_CPU, Float64, dims)
         @test mat_alloc isa Array{Float64, 2}
         @test size(mat_alloc) == (3, 4)
     end
@@ -143,16 +143,16 @@ using AdaptiveArrayPools: checkpoint!, rewind!
     end
 
     @testset "unsafe_acquire! fallback (nothing)" begin
-        v = unsafe_acquire!(nothing, Float64, 10)
+        v = unsafe_acquire!(DISABLED_CPU, Float64, 10)
         @test v isa Vector{Float64}
         @test length(v) == 10
 
-        mat = unsafe_acquire!(nothing, Float64, 10, 10)
+        mat = unsafe_acquire!(DISABLED_CPU, Float64, 10, 10)
         @test mat isa Matrix{Float64}
         @test size(mat) == (10, 10)
 
         # Tuple support
-        arr = unsafe_acquire!(nothing, Float64, (3, 4, 5))
+        arr = unsafe_acquire!(DISABLED_CPU, Float64, (3, 4, 5))
         @test arr isa Array{Float64, 3}
         @test size(arr) == (3, 4, 5)
     end
@@ -220,12 +220,12 @@ using AdaptiveArrayPools: checkpoint!, rewind!
 
         rewind!(pool)
 
-        # Test nothing fallback
-        nothing_mat = acquire!(nothing, ref_mat)
+        # Test DisabledPool fallback
+        nothing_mat = acquire!(DISABLED_CPU, ref_mat)
         @test size(nothing_mat) == size(ref_mat)
         @test nothing_mat isa Matrix{Float64}
 
-        nothing_unsafe = unsafe_acquire!(nothing, ref_mat)
+        nothing_unsafe = unsafe_acquire!(DISABLED_CPU, ref_mat)
         @test size(nothing_unsafe) == size(ref_mat)
         @test nothing_unsafe isa Matrix{Float64}
     end
diff --git a/test/test_state.jl b/test/test_state.jl
index 0ba05de..9881f41 100644
--- a/test/test_state.jl
+++ b/test/test_state.jl
@@ -64,9 +64,9 @@
         @test all(v1 .== 1.0)
         @test all(v2 .== 2.0)
 
-        # nothing compatibility
-        @test checkpoint!(nothing) === nothing
-        @test rewind!(nothing) === nothing
+        # DISABLED_POOL compatibility
+        @test checkpoint!(DISABLED_CPU) === nothing
+        @test rewind!(DISABLED_CPU) === nothing
     end
 
     @testset "Nested checkpoint/rewind" begin
@@ -165,23 +165,23 @@
         @test pool.others[UInt8].n_active == 0
     end
 
-    @testset "Nothing fallback methods" begin
-        # acquire! with nothing pool
-        v1 = acquire!(nothing, Float64, 100)
+    @testset "DisabledPool fallback methods" begin
+        # acquire! with DisabledPool
+        v1 = acquire!(DISABLED_CPU, Float64, 100)
         @test v1 isa Vector{Float64}
         @test length(v1) == 100
 
-        # Multi-dimensional acquire! with nothing
-        mat = acquire!(nothing, Float64, 10, 20)
+        # Multi-dimensional acquire! with DisabledPool
+        mat = acquire!(DISABLED_CPU, Float64, 10, 20)
         @test mat isa Array{Float64, 2}
         @test size(mat) == (10, 20)
 
-        tensor = acquire!(nothing, Int32, 3, 4, 5)
+        tensor = acquire!(DISABLED_CPU, Int32, 3, 4, 5)
         @test tensor isa Array{Int32, 3}
         @test size(tensor) == (3, 4, 5)
 
-        # empty! with nothing
-        @test empty!(nothing) === nothing
+        # empty! with DisabledPool
+        @test empty!(DISABLED_CPU) === nothing
     end
 
     @testset "empty! pool clearing" begin
@@ -331,8 +331,8 @@
             @test length(pool.others[UInt16].vectors) == 1
         end
 
-        @testset "reset!(nothing) compatibility" begin
-            @test reset!(nothing) === nothing
+        @testset "reset!(DISABLED_CPU) compatibility" begin
+            @test reset!(DISABLED_CPU) === nothing
         end
 
         @testset "pool usable after reset!" begin
@@ -473,9 +473,9 @@
             @test pool.float32.n_active == 1  # Float32 unchanged
         end
 
-        @testset "reset!(nothing, Type) compatibility" begin
-            @test reset!(nothing, Float64) === nothing
-            @test reset!(nothing, Float64, Int64) === nothing
+        @testset "reset!(DISABLED_CPU, Type) compatibility" begin
+            @test reset!(DISABLED_CPU, Float64) === nothing
+            @test reset!(DISABLED_CPU, Float64, Int64) === nothing
         end
     end
 
@@ -646,11 +646,11 @@
         @test pool.complexf32.n_active == 0
         @test pool.bool.n_active == 0
 
-        # nothing fallback with types
-        @test checkpoint!(nothing, Float64) === nothing
-        @test checkpoint!(nothing, Float64, Int64) === nothing
-        @test rewind!(nothing, Float64) === nothing
-        @test rewind!(nothing, Float64, Int64) === nothing
+        # DisabledPool fallback with types
+        @test checkpoint!(DISABLED_CPU, Float64) === nothing
+        @test checkpoint!(DISABLED_CPU, Float64, Int64) === nothing
+        @test rewind!(DISABLED_CPU, Float64) === nothing
+        @test rewind!(DISABLED_CPU, Float64, Int64) === nothing
     end
 
     @testset "Internal TypedPool helpers" begin
diff --git a/test/test_utils.jl b/test/test_utils.jl
index fd384bf..ddbde0c 100644
--- a/test/test_utils.jl
+++ b/test/test_utils.jl
@@ -167,9 +167,9 @@ end
         @test_throws ErrorException _validate_pool_return(pool_view_uint8, pool)
         rewind!(pool)
 
-        # Nothing pool always passes
-        _validate_pool_return(pool_view, nothing)
-        _validate_pool_return(42, nothing)
+        # DisabledPool always passes
+        _validate_pool_return(pool_view, DISABLED_CPU)
+        _validate_pool_return(42, DISABLED_CPU)
     end
 
     @testset "_validate_pool_return with all fixed slots" begin

From 2b022d86f462ad8fc6f172bd3e4f399978b30e23 Mon Sep 17 00:00:00 2001
From: Min-Gu Yoo <mgyoo86@gmail.com>
Date: Tue, 16 Dec 2025 15:41:40 -0800
Subject: [PATCH 16/20] test: add coverage tests for DisabledPool and macro
 internals

- Add test/test_coverage.jl for CPU coverage (120 tests)
  - DisabledPool convenience functions (zeros!, ones!, similar!, etc.)
  - DisabledPool acquire functions
  - BackendNotLoadedError handling
  - _impl! delegators for DisabledPool
  - Macro internals (_filter_static_types, _generate_*, etc.)
  - Qualified name transformation in _transform_acquire_calls

- Add test/cuda/test_disabled_pool.jl for CUDA DisabledPool (253 lines)
  - DISABLED_CUDA singleton and default_eltype
  - zeros!/ones! with type and default Float32
  - similar!/unsafe_similar! with CuArray and CPU->GPU conversion
  - acquire!/unsafe_acquire! variants
---
 test/cuda/runtests.jl           |   1 +
 test/cuda/test_disabled_pool.jl | 253 +++++++++++++++++++
 test/runtests.jl                |   1 +
 test/test_coverage.jl           | 425 ++++++++++++++++++++++++++++++++
 4 files changed, 680 insertions(+)
 create mode 100644 test/cuda/test_disabled_pool.jl
 create mode 100644 test/test_coverage.jl

diff --git a/test/cuda/runtests.jl b/test/cuda/runtests.jl
index 1bfab0c..cc2fb15 100644
--- a/test/cuda/runtests.jl
+++ b/test/cuda/runtests.jl
@@ -40,4 +40,5 @@ else
     include("test_nway_cache.jl")
     include("test_display.jl")
     include("test_convenience.jl")
+    include("test_disabled_pool.jl")
 end
diff --git a/test/cuda/test_disabled_pool.jl b/test/cuda/test_disabled_pool.jl
new file mode 100644
index 0000000..6c7da69
--- /dev/null
+++ b/test/cuda/test_disabled_pool.jl
@@ -0,0 +1,253 @@
+# Tests for DisabledPool{:cuda} dispatch methods
+# These ensure correct CuArray allocation when pooling is disabled with :cuda backend
+
+using AdaptiveArrayPools: DisabledPool, DISABLED_CPU, pooling_enabled, default_eltype
+
+@testset "DisabledPool{:cuda}" begin
+    # Get DISABLED_CUDA from extension
+    DISABLED_CUDA = ext.DISABLED_CUDA
+
+    @testset "DISABLED_CUDA singleton" begin
+        @test DISABLED_CUDA isa DisabledPool{:cuda}
+        @test !pooling_enabled(DISABLED_CUDA)
+    end
+
+    @testset "default_eltype" begin
+        @test default_eltype(DISABLED_CUDA) === Float32
+    end
+
+    @testset "zeros!" begin
+        # With type
+        v1 = zeros!(DISABLED_CUDA, Float32, 10)
+        @test v1 isa CuVector{Float32}
+        @test length(v1) == 10
+        @test all(v1 .== 0.0f0)
+
+        v2 = zeros!(DISABLED_CUDA, Float64, 5, 5)
+        @test v2 isa CuArray{Float64,2}
+        @test size(v2) == (5, 5)
+        @test all(v2 .== 0.0)
+
+        # Without type (default Float32)
+        v3 = zeros!(DISABLED_CUDA, 8)
+        @test v3 isa CuVector{Float32}
+        @test length(v3) == 8
+
+        v4 = zeros!(DISABLED_CUDA, 3, 4)
+        @test v4 isa CuArray{Float32,2}
+        @test size(v4) == (3, 4)
+
+        # Tuple dims
+        v5 = zeros!(DISABLED_CUDA, Float32, (2, 3, 4))
+        @test v5 isa CuArray{Float32,3}
+        @test size(v5) == (2, 3, 4)
+
+        v6 = zeros!(DISABLED_CUDA, (5, 6))
+        @test v6 isa CuArray{Float32,2}
+        @test size(v6) == (5, 6)
+    end
+
+    @testset "ones!" begin
+        # With type
+        v1 = ones!(DISABLED_CUDA, Float32, 10)
+        @test v1 isa CuVector{Float32}
+        @test length(v1) == 10
+        @test all(v1 .== 1.0f0)
+
+        v2 = ones!(DISABLED_CUDA, Float64, 5, 5)
+        @test v2 isa CuArray{Float64,2}
+        @test size(v2) == (5, 5)
+        @test all(v2 .== 1.0)
+
+        # Without type (default Float32)
+        v3 = ones!(DISABLED_CUDA, 8)
+        @test v3 isa CuVector{Float32}
+        @test all(v3 .== 1.0f0)
+
+        v4 = ones!(DISABLED_CUDA, 3, 4)
+        @test v4 isa CuArray{Float32,2}
+        @test size(v4) == (3, 4)
+
+        # Tuple dims
+        v5 = ones!(DISABLED_CUDA, Float32, (2, 3))
+        @test v5 isa CuArray{Float32,2}
+        @test size(v5) == (2, 3)
+
+        v6 = ones!(DISABLED_CUDA, (4, 5))
+        @test v6 isa CuArray{Float32,2}
+        @test size(v6) == (4, 5)
+    end
+
+    @testset "similar! with CuArray input" begin
+        template = CUDA.zeros(Float32, 10)
+
+        v1 = similar!(DISABLED_CUDA, template)
+        @test v1 isa CuVector{Float32}
+        @test length(v1) == 10
+
+        v2 = similar!(DISABLED_CUDA, template, Float64)
+        @test v2 isa CuVector{Float64}
+        @test length(v2) == 10
+
+        v3 = similar!(DISABLED_CUDA, template, 5, 5)
+        @test v3 isa CuArray{Float32,2}
+        @test size(v3) == (5, 5)
+
+        v4 = similar!(DISABLED_CUDA, template, Float64, 3, 4)
+        @test v4 isa CuArray{Float64,2}
+        @test size(v4) == (3, 4)
+    end
+
+    @testset "similar! with AbstractArray input (CPU->GPU)" begin
+        cpu_template = zeros(Float64, 8)
+
+        v1 = similar!(DISABLED_CUDA, cpu_template)
+        @test v1 isa CuVector{Float64}
+        @test length(v1) == 8
+
+        v2 = similar!(DISABLED_CUDA, cpu_template, Float32)
+        @test v2 isa CuVector{Float32}
+        @test length(v2) == 8
+
+        v3 = similar!(DISABLED_CUDA, cpu_template, 4, 4)
+        @test v3 isa CuArray{Float64,2}
+        @test size(v3) == (4, 4)
+
+        v4 = similar!(DISABLED_CUDA, cpu_template, Int32, 2, 3)
+        @test v4 isa CuArray{Int32,2}
+        @test size(v4) == (2, 3)
+    end
+
+    @testset "unsafe_zeros!" begin
+        v1 = unsafe_zeros!(DISABLED_CUDA, Float32, 10)
+        @test v1 isa CuVector{Float32}
+        @test all(v1 .== 0.0f0)
+
+        v2 = unsafe_zeros!(DISABLED_CUDA, Float64, 5, 5)
+        @test v2 isa CuArray{Float64,2}
+        @test size(v2) == (5, 5)
+
+        # Without type
+        v3 = unsafe_zeros!(DISABLED_CUDA, 8)
+        @test v3 isa CuVector{Float32}
+
+        # Tuple dims
+        v4 = unsafe_zeros!(DISABLED_CUDA, Float32, (3, 4))
+        @test v4 isa CuArray{Float32,2}
+        @test size(v4) == (3, 4)
+
+        v5 = unsafe_zeros!(DISABLED_CUDA, (2, 3))
+        @test v5 isa CuArray{Float32,2}
+    end
+
+    @testset "unsafe_ones!" begin
+        v1 = unsafe_ones!(DISABLED_CUDA, Float32, 10)
+        @test v1 isa CuVector{Float32}
+        @test all(v1 .== 1.0f0)
+
+        v2 = unsafe_ones!(DISABLED_CUDA, Float64, 5, 5)
+        @test v2 isa CuArray{Float64,2}
+        @test size(v2) == (5, 5)
+
+        # Without type
+        v3 = unsafe_ones!(DISABLED_CUDA, 8)
+        @test v3 isa CuVector{Float32}
+
+        # Tuple dims
+        v4 = unsafe_ones!(DISABLED_CUDA, Float32, (3, 4))
+        @test v4 isa CuArray{Float32,2}
+
+        v5 = unsafe_ones!(DISABLED_CUDA, (2, 3))
+        @test v5 isa CuArray{Float32,2}
+    end
+
+    @testset "unsafe_similar! with CuArray input" begin
+        template = CUDA.zeros(Float32, 10)
+
+        v1 = unsafe_similar!(DISABLED_CUDA, template)
+        @test v1 isa CuVector{Float32}
+
+        v2 = unsafe_similar!(DISABLED_CUDA, template, Float64)
+        @test v2 isa CuVector{Float64}
+
+        v3 = unsafe_similar!(DISABLED_CUDA, template, 5, 5)
+        @test v3 isa CuArray{Float32,2}
+
+        v4 = unsafe_similar!(DISABLED_CUDA, template, Float64, 3, 4)
+        @test v4 isa CuArray{Float64,2}
+    end
+
+    @testset "unsafe_similar! with AbstractArray input (CPU->GPU)" begin
+        cpu_template = zeros(Float64, 8)
+
+        v1 = unsafe_similar!(DISABLED_CUDA, cpu_template)
+        @test v1 isa CuVector{Float64}
+
+        v2 = unsafe_similar!(DISABLED_CUDA, cpu_template, Float32)
+        @test v2 isa CuVector{Float32}
+
+        v3 = unsafe_similar!(DISABLED_CUDA, cpu_template, 4, 4)
+        @test v3 isa CuArray{Float64,2}
+
+        v4 = unsafe_similar!(DISABLED_CUDA, cpu_template, Int32, 2, 3)
+        @test v4 isa CuArray{Int32,2}
+    end
+
+    @testset "acquire!" begin
+        # Type + single dim
+        v1 = acquire!(DISABLED_CUDA, Float32, 10)
+        @test v1 isa CuVector{Float32}
+        @test length(v1) == 10
+
+        # Type + vararg dims
+        v2 = acquire!(DISABLED_CUDA, Float64, 5, 5)
+        @test v2 isa CuArray{Float64,2}
+        @test size(v2) == (5, 5)
+
+        # Type + tuple dims
+        v3 = acquire!(DISABLED_CUDA, Float32, (3, 4, 5))
+        @test v3 isa CuArray{Float32,3}
+        @test size(v3) == (3, 4, 5)
+
+        # CuArray template
+        template = CUDA.zeros(Float32, 8)
+        v4 = acquire!(DISABLED_CUDA, template)
+        @test v4 isa CuVector{Float32}
+        @test length(v4) == 8
+
+        # AbstractArray template (CPU->GPU)
+        cpu_template = zeros(Float64, 6)
+        v5 = acquire!(DISABLED_CUDA, cpu_template)
+        @test v5 isa CuVector{Float64}
+        @test length(v5) == 6
+    end
+
+    @testset "unsafe_acquire!" begin
+        # Type + single dim
+        v1 = unsafe_acquire!(DISABLED_CUDA, Float32, 10)
+        @test v1 isa CuVector{Float32}
+        @test length(v1) == 10
+
+        # Type + vararg dims
+        v2 = unsafe_acquire!(DISABLED_CUDA, Float64, 5, 5)
+        @test v2 isa CuArray{Float64,2}
+        @test size(v2) == (5, 5)
+
+        # Type + tuple dims
+        v3 = unsafe_acquire!(DISABLED_CUDA, Float32, (3, 4, 5))
+        @test v3 isa CuArray{Float32,3}
+        @test size(v3) == (3, 4, 5)
+
+        # CuArray template
+        template = CUDA.zeros(Float32, 8)
+        v4 = unsafe_acquire!(DISABLED_CUDA, template)
+        @test v4 isa CuVector{Float32}
+        @test length(v4) == 8
+
+        # AbstractArray template (CPU->GPU)
+        cpu_template = zeros(Float64, 6)
+        v5 = unsafe_acquire!(DISABLED_CUDA, cpu_template)
+        @test v5 isa CuVector{Float64}
+        @test length(v5) == 6
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 4782a8a..8ac91c8 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -25,6 +25,7 @@ else
     include("test_fixed_slots.jl")
     include("test_backend_macro_expansion.jl")
     include("test_convenience.jl")
+    include("test_coverage.jl")
 
     # CUDA extension tests (auto-detect, skip with TEST_CUDA=false)
     if get(ENV, "TEST_CUDA", "true") != "false"
diff --git a/test/test_coverage.jl b/test/test_coverage.jl
new file mode 100644
index 0000000..3d7a132
--- /dev/null
+++ b/test/test_coverage.jl
@@ -0,0 +1,425 @@
+# ==============================================================================
+# Coverage Tests - Simple tests to cover uncovered code paths
+# ==============================================================================
+
+@testset "Coverage Tests" begin
+
+    @testset "DisabledPool convenience functions" begin
+        pool = DISABLED_CPU
+
+        # unsafe_zeros! without explicit type (uses default_eltype)
+        v = unsafe_zeros!(pool, 5)
+        @test v isa Vector{Float64}
+        @test all(v .== 0.0)
+
+        v = unsafe_zeros!(pool, (3, 3))
+        @test v isa Matrix{Float64}
+
+        # unsafe_ones! without explicit type
+        v = unsafe_ones!(pool, 5)
+        @test v isa Vector{Float64}
+        @test all(v .== 1.0)
+
+        v = unsafe_ones!(pool, (3, 3))
+        @test v isa Matrix{Float64}
+
+        # unsafe_similar! with dims
+        template = rand(5, 5)
+        v = unsafe_similar!(pool, template, 3, 3)
+        @test v isa Matrix{Float64}
+        @test size(v) == (3, 3)
+
+        v = unsafe_similar!(pool, template, Int, 4, 4)
+        @test v isa Matrix{Int}
+        @test size(v) == (4, 4)
+    end
+
+    @testset "DisabledPool acquire functions" begin
+        pool = DISABLED_CPU
+
+        # acquire! with vararg dims
+        v = acquire!(pool, Float32, 3, 3)
+        @test v isa Array{Float32,2}
+
+        # acquire! with tuple dims
+        v = acquire!(pool, Float32, (2, 2))
+        @test v isa Array{Float32,2}
+
+        # acquire! with similar
+        template = rand(Int32, 4, 4)
+        v = acquire!(pool, template)
+        @test v isa Array{Int32,2}
+
+        # unsafe_acquire! variants
+        v = unsafe_acquire!(pool, Float32, 3, 3)
+        @test v isa Array{Float32,2}
+
+        v = unsafe_acquire!(pool, Float32, (2, 2))
+        @test v isa Array{Float32,2}
+
+        v = unsafe_acquire!(pool, template)
+        @test v isa Array{Int32,2}
+    end
+
+    @testset "BackendNotLoadedError" begin
+        # Test error message for :cuda backend
+        err = AdaptiveArrayPools.BackendNotLoadedError(:cuda)
+        io = IOBuffer()
+        showerror(io, err)
+        msg = String(take!(io))
+        @test occursin("cuda", msg)
+        @test occursin("CUDA.jl", msg)
+
+        # Test error message for other backends
+        err2 = AdaptiveArrayPools.BackendNotLoadedError(:metal)
+        io2 = IOBuffer()
+        showerror(io2, err2)
+        msg2 = String(take!(io2))
+        @test occursin("metal", msg2)
+        @test occursin("backend package", msg2)
+
+        # Test that errors are thrown for unknown backend
+        fake_pool = DisabledPool{:fake_backend}()
+        @test try zeros!(fake_pool, 10); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end
+        @test try ones!(fake_pool, 10); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end
+        @test try similar!(fake_pool, rand(3)); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end
+        @test try unsafe_zeros!(fake_pool, 10); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end
+        @test try unsafe_ones!(fake_pool, 10); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end
+        @test try unsafe_similar!(fake_pool, rand(3)); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end
+        @test try acquire!(fake_pool, Float64, 10); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end
+        @test try unsafe_acquire!(fake_pool, Float64, 10); false catch e; e isa AdaptiveArrayPools.BackendNotLoadedError end
+    end
+
+    @testset "_impl! delegators for DisabledPool" begin
+        pool = DISABLED_CPU
+
+        # These are called by macro-transformed code
+        v = AdaptiveArrayPools._zeros_impl!(pool, Float64, 5)
+        @test v isa Vector{Float64}
+        @test all(v .== 0.0)
+
+        v = AdaptiveArrayPools._ones_impl!(pool, Float64, 5)
+        @test v isa Vector{Float64}
+        @test all(v .== 1.0)
+
+        template = rand(3, 3)
+        v = AdaptiveArrayPools._similar_impl!(pool, template)
+        @test v isa Matrix{Float64}
+
+        v = AdaptiveArrayPools._unsafe_zeros_impl!(pool, Float64, 5)
+        @test v isa Vector{Float64}
+
+        v = AdaptiveArrayPools._unsafe_ones_impl!(pool, Float64, 5)
+        @test v isa Vector{Float64}
+
+        v = AdaptiveArrayPools._unsafe_similar_impl!(pool, template)
+        @test v isa Matrix{Float64}
+
+        v = AdaptiveArrayPools._acquire_impl!(pool, Float64, 5)
+        @test v isa Vector{Float64}
+
+        v = AdaptiveArrayPools._unsafe_acquire_impl!(pool, Float64, 5)
+        @test v isa Vector{Float64}
+    end
+
+    @testset "Macro internals" begin
+        # Test _disabled_pool_expr for cpu backend
+        # Note: Returns the actual DisabledPool instance (const interpolation)
+        cpu_result = AdaptiveArrayPools._disabled_pool_expr(:cpu)
+        @test cpu_result isa DisabledPool{:cpu}
+
+        # Test _disabled_pool_expr for non-cpu backend (triggers the else branch)
+        cuda_result = AdaptiveArrayPools._disabled_pool_expr(:cuda)
+        @test cuda_result isa DisabledPool{:cuda}
+
+        # Test _is_function_def
+        @test AdaptiveArrayPools._is_function_def(:(function foo() end)) == true
+        @test AdaptiveArrayPools._is_function_def(:(foo(x) = x + 1)) == true
+        @test AdaptiveArrayPools._is_function_def(:(x = 1)) == false
+        @test AdaptiveArrayPools._is_function_def(:(begin; end)) == false
+
+        # Test _filter_static_types
+        types = Set{Any}([Float64, Int64])
+        static, has_dyn = AdaptiveArrayPools._filter_static_types(types)
+        @test Float64 in static
+        @test Int64 in static
+        @test !has_dyn
+
+        # With local vars
+        types2 = Set{Any}([:T, Float64])
+        local_vars = Set([:T])
+        static2, has_dyn2 = AdaptiveArrayPools._filter_static_types(types2, local_vars)
+        @test Float64 in static2
+        @test !(:T in static2)
+        @test has_dyn2
+
+        # With curly expression (parametric type)
+        types3 = Set{Any}([Expr(:curly, :Vector, :Float64)])
+        static3, has_dyn3 = AdaptiveArrayPools._filter_static_types(types3)
+        @test isempty(static3)
+        @test has_dyn3
+
+        # With eltype expression
+        types4 = Set{Any}([Expr(:call, :eltype, :x)])
+        static4, has_dyn4 = AdaptiveArrayPools._filter_static_types(types4)
+        @test length(static4) == 1  # eltype(x) is safe if x is not local
+        @test !has_dyn4
+
+        # With eltype of local var
+        types5 = Set{Any}([Expr(:call, :eltype, :local_arr)])
+        local_vars5 = Set([:local_arr])
+        static5, has_dyn5 = AdaptiveArrayPools._filter_static_types(types5, local_vars5)
+        @test isempty(static5)
+        @test has_dyn5
+
+        # With default_eltype expression
+        types6 = Set{Any}([Expr(:call, :default_eltype, :pool)])
+        static6, has_dyn6 = AdaptiveArrayPools._filter_static_types(types6)
+        @test length(static6) == 1
+        @test !has_dyn6
+
+        # With GlobalRef (concrete type reference)
+        types7 = Set{Any}([GlobalRef(Base, :Float64)])
+        static7, has_dyn7 = AdaptiveArrayPools._filter_static_types(types7)
+        @test length(static7) == 1
+        @test !has_dyn7
+
+        # Test _generate_typed_checkpoint_call
+        pool_expr = :pool
+        checkpoint_call = AdaptiveArrayPools._generate_typed_checkpoint_call(pool_expr, [Float64])
+        @test checkpoint_call isa Expr
+
+        empty_checkpoint = AdaptiveArrayPools._generate_typed_checkpoint_call(pool_expr, [])
+        @test empty_checkpoint isa Expr
+
+        # Test _generate_typed_rewind_call
+        rewind_call = AdaptiveArrayPools._generate_typed_rewind_call(pool_expr, [Float64])
+        @test rewind_call isa Expr
+
+        empty_rewind = AdaptiveArrayPools._generate_typed_rewind_call(pool_expr, [])
+        @test empty_rewind isa Expr
+    end
+
+    @testset "pool_stats error handling" begin
+        # Test pool_stats(:cuda) without CUDA loaded
+        @test_throws MethodError pool_stats(:cuda)
+    end
+
+    @testset "set_cache_ways! validation" begin
+        # Test invalid range
+        @test_throws ArgumentError AdaptiveArrayPools.set_cache_ways!(0)
+        @test_throws ArgumentError AdaptiveArrayPools.set_cache_ways!(17)
+    end
+
+    @testset "_transform_acquire_calls with qualified names" begin
+        # Test qualified name transformation (AdaptiveArrayPools.function!)
+        # These test the elseif branches for qualified names in _transform_acquire_calls
+
+        # Qualified unsafe_zeros!
+        expr1 = :(AdaptiveArrayPools.unsafe_zeros!(pool, Float64, 10))
+        result1 = AdaptiveArrayPools._transform_acquire_calls(expr1, :pool)
+        @test result1.args[1] === AdaptiveArrayPools._UNSAFE_ZEROS_IMPL_REF
+
+        # Qualified unsafe_ones!
+        expr2 = :(AdaptiveArrayPools.unsafe_ones!(pool, Float64, 10))
+        result2 = AdaptiveArrayPools._transform_acquire_calls(expr2, :pool)
+        @test result2.args[1] === AdaptiveArrayPools._UNSAFE_ONES_IMPL_REF
+
+        # Qualified unsafe_similar!
+        expr3 = :(AdaptiveArrayPools.unsafe_similar!(pool, x))
+        result3 = AdaptiveArrayPools._transform_acquire_calls(expr3, :pool)
+        @test result3.args[1] === AdaptiveArrayPools._UNSAFE_SIMILAR_IMPL_REF
+
+        # Qualified zeros!
+        expr4 = :(AdaptiveArrayPools.zeros!(pool, Float64, 10))
+        result4 = AdaptiveArrayPools._transform_acquire_calls(expr4, :pool)
+        @test result4.args[1] === AdaptiveArrayPools._ZEROS_IMPL_REF
+
+        # Qualified ones!
+        expr5 = :(AdaptiveArrayPools.ones!(pool, Float64, 10))
+        result5 = AdaptiveArrayPools._transform_acquire_calls(expr5, :pool)
+        @test result5.args[1] === AdaptiveArrayPools._ONES_IMPL_REF
+
+        # Qualified similar!
+        expr6 = :(AdaptiveArrayPools.similar!(pool, x))
+        result6 = AdaptiveArrayPools._transform_acquire_calls(expr6, :pool)
+        @test result6.args[1] === AdaptiveArrayPools._SIMILAR_IMPL_REF
+
+        # Qualified acquire!
+        expr7 = :(AdaptiveArrayPools.acquire!(pool, Float64, 10))
+        result7 = AdaptiveArrayPools._transform_acquire_calls(expr7, :pool)
+        @test result7.args[1] === AdaptiveArrayPools._ACQUIRE_IMPL_REF
+
+        # Qualified unsafe_acquire!
+        expr8 = :(AdaptiveArrayPools.unsafe_acquire!(pool, Float64, 10))
+        result8 = AdaptiveArrayPools._transform_acquire_calls(expr8, :pool)
+        @test result8.args[1] === AdaptiveArrayPools._UNSAFE_ACQUIRE_IMPL_REF
+
+        # Qualified acquire_view! (alias)
+        expr9 = :(AdaptiveArrayPools.acquire_view!(pool, Float64, 10))
+        result9 = AdaptiveArrayPools._transform_acquire_calls(expr9, :pool)
+        @test result9.args[1] === AdaptiveArrayPools._ACQUIRE_IMPL_REF
+
+        # Qualified acquire_array! (alias)
+        expr10 = :(AdaptiveArrayPools.acquire_array!(pool, Float64, 10))
+        result10 = AdaptiveArrayPools._transform_acquire_calls(expr10, :pool)
+        @test result10.args[1] === AdaptiveArrayPools._UNSAFE_ACQUIRE_IMPL_REF
+    end
+
+    @testset "_generate_pool_code_with_backend" begin
+        # Test that backend-specific code generation works
+        # With USE_POOLING=false, it should return DisabledPool expression
+
+        # Test block expression with :cpu backend
+        result_cpu = AdaptiveArrayPools._generate_pool_code_with_backend(:cpu, :pool, :(x = 1), true)
+        @test result_cpu isa Expr
+
+        # Test block expression with :cuda backend
+        result_cuda = AdaptiveArrayPools._generate_pool_code_with_backend(:cuda, :pool, :(x = 1), true)
+        @test result_cuda isa Expr
+
+        # Test force_enable=false (maybe_with_pool path)
+        result_maybe = AdaptiveArrayPools._generate_pool_code_with_backend(:cpu, :pool, :(x = 1), false)
+        @test result_maybe isa Expr
+
+        # Test function definition with backend
+        func_expr = :(function foo() end)
+        result_func = AdaptiveArrayPools._generate_pool_code_with_backend(:cpu, :pool, func_expr, true)
+        @test result_func isa Expr
+        @test result_func.head == :function
+    end
+
+    @testset "_generate_function_pool_code" begin
+        # Test function code generation with disable_pooling=true
+        func_expr = :(function bar(x) x + 1 end)
+        result = AdaptiveArrayPools._generate_function_pool_code(:pool, func_expr, true, true, :cpu)
+        @test result isa Expr
+        @test result.head == :function
+
+        # Test with force_enable=false, disable_pooling=false
+        result2 = AdaptiveArrayPools._generate_function_pool_code(:pool, func_expr, false, false, :cpu)
+        @test result2 isa Expr
+
+        # Test with short form function
+        short_func = :(baz(x) = x * 2)
+        result3 = AdaptiveArrayPools._generate_function_pool_code(:pool, short_func, true, true, :cpu)
+        @test result3 isa Expr
+        @test result3.head == :(=)
+    end
+
+    @testset "_generate_function_pool_code_with_backend" begin
+        # Test function code generation with backend
+        func_expr = :(function compute(x) x + 1 end)
+
+        # With disable_pooling=true
+        result1 = AdaptiveArrayPools._generate_function_pool_code_with_backend(:cpu, :pool, func_expr, true)
+        @test result1 isa Expr
+        @test result1.head == :function
+
+        # With disable_pooling=false (generates full checkpoint/rewind)
+        result2 = AdaptiveArrayPools._generate_function_pool_code_with_backend(:cuda, :pool, func_expr, false)
+        @test result2 isa Expr
+        @test result2.head == :function
+
+        # Test with short form function
+        short_func = :(fast(x) = x * 2)
+        result3 = AdaptiveArrayPools._generate_function_pool_code_with_backend(:cpu, :pool, short_func, true)
+        @test result3 isa Expr
+        @test result3.head == :(=)
+    end
+
+    @testset "_extract_acquire_types with qualified names" begin
+        # Test type extraction from qualified function calls
+        # Note: _extract_acquire_types returns Symbols, not Types
+        expr1 = :(AdaptiveArrayPools.zeros!(pool, Float32, 10))
+        types1 = AdaptiveArrayPools._extract_acquire_types(expr1, :pool)
+        @test :Float32 in types1
+
+        expr2 = :(AdaptiveArrayPools.ones!(pool, Int64, 5))
+        types2 = AdaptiveArrayPools._extract_acquire_types(expr2, :pool)
+        @test :Int64 in types2
+
+        expr3 = :(AdaptiveArrayPools.similar!(pool, x, Float64))
+        types3 = AdaptiveArrayPools._extract_acquire_types(expr3, :pool)
+        @test :Float64 in types3
+
+        # Test acquire! qualified names
+        expr4 = :(AdaptiveArrayPools.acquire!(pool, Float64, 10))
+        types4 = AdaptiveArrayPools._extract_acquire_types(expr4, :pool)
+        @test :Float64 in types4
+
+        # Test acquire_view! alias
+        expr5 = :(AdaptiveArrayPools.acquire_view!(pool, Int32, 5))
+        types5 = AdaptiveArrayPools._extract_acquire_types(expr5, :pool)
+        @test :Int32 in types5
+
+        # Test acquire_array! alias
+        expr6 = :(AdaptiveArrayPools.acquire_array!(pool, Float32, 3, 3))
+        types6 = AdaptiveArrayPools._extract_acquire_types(expr6, :pool)
+        @test :Float32 in types6
+    end
+
+    @testset "_looks_like_type" begin
+        # Test type-like expressions
+        @test AdaptiveArrayPools._looks_like_type(:Float64) == true
+        @test AdaptiveArrayPools._looks_like_type(:Int) == true
+        @test AdaptiveArrayPools._looks_like_type(:x) == false  # lowercase
+        @test AdaptiveArrayPools._looks_like_type(Expr(:curly, :Vector, :Float64)) == true
+        @test AdaptiveArrayPools._looks_like_type(GlobalRef(Base, :Float64)) == true
+        @test AdaptiveArrayPools._looks_like_type(10) == false
+    end
+
+    @testset "_uses_local_var" begin
+        local_vars = Set([:x, :y])
+
+        # Direct local var
+        @test AdaptiveArrayPools._uses_local_var(:x, local_vars) == true
+        @test AdaptiveArrayPools._uses_local_var(:z, local_vars) == false
+
+        # Field access: x.field
+        @test AdaptiveArrayPools._uses_local_var(:(x.field), local_vars) == true
+        @test AdaptiveArrayPools._uses_local_var(:(z.field), local_vars) == false
+
+        # Indexing: x[i]
+        @test AdaptiveArrayPools._uses_local_var(:(x[1]), local_vars) == true
+        @test AdaptiveArrayPools._uses_local_var(:(z[1]), local_vars) == false
+
+        # Nested: x.a.b
+        @test AdaptiveArrayPools._uses_local_var(:(x.a.b), local_vars) == true
+
+        # Call expression: foo(x)
+        @test AdaptiveArrayPools._uses_local_var(:(foo(x)), local_vars) == true
+        @test AdaptiveArrayPools._uses_local_var(:(foo(z)), local_vars) == false
+    end
+
+    @testset "_extract_local_assignments" begin
+        # Simple assignment
+        expr1 = :(T = eltype(x))
+        locals1 = AdaptiveArrayPools._extract_local_assignments(expr1)
+        @test :T in locals1
+
+        # Typed assignment
+        expr2 = :(T::Type = Float64)
+        locals2 = AdaptiveArrayPools._extract_local_assignments(expr2)
+        @test :T in locals2
+
+        # local declaration
+        expr3 = :(local T)
+        locals3 = AdaptiveArrayPools._extract_local_assignments(expr3)
+        @test :T in locals3
+
+        # local with assignment
+        expr4 = :(local T = Int)
+        locals4 = AdaptiveArrayPools._extract_local_assignments(expr4)
+        @test :T in locals4
+
+        # Nested in block
+        expr5 = quote
+            x = 1
+            y = 2
+        end
+        locals5 = AdaptiveArrayPools._extract_local_assignments(expr5)
+        @test :x in locals5
+        @test :y in locals5
+    end
+end

From 159235bb330de0fe2147005f84689360a137a9e8 Mon Sep 17 00:00:00 2001
From: Min-Gu Yoo <mgyoo86@gmail.com>
Date: Tue, 16 Dec 2025 16:07:46 -0800
Subject: [PATCH 17/20] refactor(impl): use explicit type signatures for
 DisabledPool delegators

Replace variadic args... with explicit overloads for _impl! delegators
to ensure proper CUDA inlining and type specialization:
- _zeros_impl!, _ones_impl!, _similar_impl!
- _unsafe_zeros_impl!, _unsafe_ones_impl!, _unsafe_similar_impl!
- _acquire_impl!, _unsafe_acquire_impl!

Add comprehensive tests covering all explicit overloads including
tuple dimension variants.
---
 src/acquire.jl        | 13 +++++-
 src/convenience.jl    | 46 +++++++++++++++++----
 test/test_coverage.jl | 93 ++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 142 insertions(+), 10 deletions(-)

diff --git a/src/acquire.jl b/src/acquire.jl
index e45d0eb..98c2298 100644
--- a/src/acquire.jl
+++ b/src/acquire.jl
@@ -433,5 +433,14 @@ const _acquire_array_impl! = _unsafe_acquire_impl!
 @inline unsafe_acquire!(p::DisabledPool{B}, args...) where {B} = _throw_backend_not_loaded(B)
 
 # --- _impl! delegators for DisabledPool (macro transformation support) ---
-@inline _acquire_impl!(p::DisabledPool, args...) = acquire!(p, args...)
-@inline _unsafe_acquire_impl!(p::DisabledPool, args...) = unsafe_acquire!(p, args...)
+# Called when: USE_POOLING=true + @maybe_with_pool + MAYBE_POOLING_ENABLED[]=false
+# Explicit overloads for proper inlining (especially important for CUDA backend).
+@inline _acquire_impl!(p::DisabledPool, ::Type{T}, n::Int) where {T} = acquire!(p, T, n)
+@inline _acquire_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = acquire!(p, T, dims...)
+@inline _acquire_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = acquire!(p, T, dims)
+@inline _acquire_impl!(p::DisabledPool, x::AbstractArray) = acquire!(p, x)
+
+@inline _unsafe_acquire_impl!(p::DisabledPool, ::Type{T}, n::Int) where {T} = unsafe_acquire!(p, T, n)
+@inline _unsafe_acquire_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = unsafe_acquire!(p, T, dims...)
+@inline _unsafe_acquire_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = unsafe_acquire!(p, T, dims)
+@inline _unsafe_acquire_impl!(p::DisabledPool, x::AbstractArray) = unsafe_acquire!(p, x)
diff --git a/src/convenience.jl b/src/convenience.jl
index bf7badc..161b8bb 100644
--- a/src/convenience.jl
+++ b/src/convenience.jl
@@ -478,10 +478,42 @@ end
 # ==============================================================================
 # When macros transform zeros!(pool, ...) → _zeros_impl!(pool, ...),
 # DisabledPool needs to delegate back to the public API.
-
-@inline _zeros_impl!(p::DisabledPool, args...) = zeros!(p, args...)
-@inline _ones_impl!(p::DisabledPool, args...) = ones!(p, args...)
-@inline _similar_impl!(p::DisabledPool, args...) = similar!(p, args...)
-@inline _unsafe_zeros_impl!(p::DisabledPool, args...) = unsafe_zeros!(p, args...)
-@inline _unsafe_ones_impl!(p::DisabledPool, args...) = unsafe_ones!(p, args...)
-@inline _unsafe_similar_impl!(p::DisabledPool, args...) = unsafe_similar!(p, args...)
+#
+# Called when: USE_POOLING=true + @maybe_with_pool + MAYBE_POOLING_ENABLED[]=false
+# Explicit overloads for proper inlining (especially important for CUDA backend).
+
+# --- _zeros_impl! ---
+@inline _zeros_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = zeros!(p, T, dims...)
+@inline _zeros_impl!(p::DisabledPool, dims::Vararg{Int,N}) where {N} = zeros!(p, dims...)
+@inline _zeros_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = zeros!(p, T, dims)
+@inline _zeros_impl!(p::DisabledPool, dims::NTuple{N,Int}) where {N} = zeros!(p, dims)
+
+# --- _ones_impl! ---
+@inline _ones_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = ones!(p, T, dims...)
+@inline _ones_impl!(p::DisabledPool, dims::Vararg{Int,N}) where {N} = ones!(p, dims...)
+@inline _ones_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = ones!(p, T, dims)
+@inline _ones_impl!(p::DisabledPool, dims::NTuple{N,Int}) where {N} = ones!(p, dims)
+
+# --- _similar_impl! ---
+@inline _similar_impl!(p::DisabledPool, x::AbstractArray) = similar!(p, x)
+@inline _similar_impl!(p::DisabledPool, x::AbstractArray, ::Type{T}) where {T} = similar!(p, x, T)
+@inline _similar_impl!(p::DisabledPool, x::AbstractArray, dims::Vararg{Int,N}) where {N} = similar!(p, x, dims...)
+@inline _similar_impl!(p::DisabledPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = similar!(p, x, T, dims...)
+
+# --- _unsafe_zeros_impl! ---
+@inline _unsafe_zeros_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = unsafe_zeros!(p, T, dims...)
+@inline _unsafe_zeros_impl!(p::DisabledPool, dims::Vararg{Int,N}) where {N} = unsafe_zeros!(p, dims...)
+@inline _unsafe_zeros_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = unsafe_zeros!(p, T, dims)
+@inline _unsafe_zeros_impl!(p::DisabledPool, dims::NTuple{N,Int}) where {N} = unsafe_zeros!(p, dims)
+
+# --- _unsafe_ones_impl! ---
+@inline _unsafe_ones_impl!(p::DisabledPool, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = unsafe_ones!(p, T, dims...)
+@inline _unsafe_ones_impl!(p::DisabledPool, dims::Vararg{Int,N}) where {N} = unsafe_ones!(p, dims...)
+@inline _unsafe_ones_impl!(p::DisabledPool, ::Type{T}, dims::NTuple{N,Int}) where {T,N} = unsafe_ones!(p, T, dims)
+@inline _unsafe_ones_impl!(p::DisabledPool, dims::NTuple{N,Int}) where {N} = unsafe_ones!(p, dims)
+
+# --- _unsafe_similar_impl! ---
+@inline _unsafe_similar_impl!(p::DisabledPool, x::AbstractArray) = unsafe_similar!(p, x)
+@inline _unsafe_similar_impl!(p::DisabledPool, x::AbstractArray, ::Type{T}) where {T} = unsafe_similar!(p, x, T)
+@inline _unsafe_similar_impl!(p::DisabledPool, x::AbstractArray, dims::Vararg{Int,N}) where {N} = unsafe_similar!(p, x, dims...)
+@inline _unsafe_similar_impl!(p::DisabledPool, x::AbstractArray, ::Type{T}, dims::Vararg{Int,N}) where {T,N} = unsafe_similar!(p, x, T, dims...)
diff --git a/test/test_coverage.jl b/test/test_coverage.jl
index 3d7a132..e01c0fb 100644
--- a/test/test_coverage.jl
+++ b/test/test_coverage.jl
@@ -93,33 +93,124 @@
     @testset "_impl! delegators for DisabledPool" begin
         pool = DISABLED_CPU
 
-        # These are called by macro-transformed code
+        # --- _zeros_impl! ---
+        # Type + varargs
         v = AdaptiveArrayPools._zeros_impl!(pool, Float64, 5)
         @test v isa Vector{Float64}
         @test all(v .== 0.0)
 
+        v = AdaptiveArrayPools._zeros_impl!(pool, Float32, 3, 4)
+        @test v isa Matrix{Float32}
+        @test size(v) == (3, 4)
+
+        # No type (default eltype)
+        v = AdaptiveArrayPools._zeros_impl!(pool, 5)
+        @test v isa Vector{Float64}
+
+        v = AdaptiveArrayPools._zeros_impl!(pool, 3, 4)
+        @test v isa Matrix{Float64}
+
+        # Tuple dims
+        v = AdaptiveArrayPools._zeros_impl!(pool, Float64, (2, 3))
+        @test v isa Matrix{Float64}
+        @test size(v) == (2, 3)
+
+        v = AdaptiveArrayPools._zeros_impl!(pool, (2, 3))
+        @test v isa Matrix{Float64}
+
+        # --- _ones_impl! ---
         v = AdaptiveArrayPools._ones_impl!(pool, Float64, 5)
         @test v isa Vector{Float64}
         @test all(v .== 1.0)
 
+        v = AdaptiveArrayPools._ones_impl!(pool, 5)
+        @test v isa Vector{Float64}
+
+        v = AdaptiveArrayPools._ones_impl!(pool, Float64, (2, 3))
+        @test v isa Matrix{Float64}
+
+        v = AdaptiveArrayPools._ones_impl!(pool, (2, 3))
+        @test v isa Matrix{Float64}
+
+        # --- _similar_impl! ---
         template = rand(3, 3)
         v = AdaptiveArrayPools._similar_impl!(pool, template)
         @test v isa Matrix{Float64}
 
+        v = AdaptiveArrayPools._similar_impl!(pool, template, Float32)
+        @test v isa Matrix{Float32}
+
+        v = AdaptiveArrayPools._similar_impl!(pool, template, 4, 5)
+        @test v isa Matrix{Float64}
+        @test size(v) == (4, 5)
+
+        v = AdaptiveArrayPools._similar_impl!(pool, template, Int32, 2, 2)
+        @test v isa Matrix{Int32}
+
+        # --- _unsafe_zeros_impl! ---
         v = AdaptiveArrayPools._unsafe_zeros_impl!(pool, Float64, 5)
         @test v isa Vector{Float64}
 
+        v = AdaptiveArrayPools._unsafe_zeros_impl!(pool, 5)
+        @test v isa Vector{Float64}
+
+        v = AdaptiveArrayPools._unsafe_zeros_impl!(pool, Float64, (2, 3))
+        @test v isa Matrix{Float64}
+
+        v = AdaptiveArrayPools._unsafe_zeros_impl!(pool, (2, 3))
+        @test v isa Matrix{Float64}
+
+        # --- _unsafe_ones_impl! ---
         v = AdaptiveArrayPools._unsafe_ones_impl!(pool, Float64, 5)
         @test v isa Vector{Float64}
 
+        v = AdaptiveArrayPools._unsafe_ones_impl!(pool, 5)
+        @test v isa Vector{Float64}
+
+        v = AdaptiveArrayPools._unsafe_ones_impl!(pool, Float64, (2, 3))
+        @test v isa Matrix{Float64}
+
+        v = AdaptiveArrayPools._unsafe_ones_impl!(pool, (2, 3))
+        @test v isa Matrix{Float64}
+
+        # --- _unsafe_similar_impl! ---
         v = AdaptiveArrayPools._unsafe_similar_impl!(pool, template)
         @test v isa Matrix{Float64}
 
+        v = AdaptiveArrayPools._unsafe_similar_impl!(pool, template, Float32)
+        @test v isa Matrix{Float32}
+
+        v = AdaptiveArrayPools._unsafe_similar_impl!(pool, template, 4, 5)
+        @test v isa Matrix{Float64}
+
+        v = AdaptiveArrayPools._unsafe_similar_impl!(pool, template, Int32, 2, 2)
+        @test v isa Matrix{Int32}
+
+        # --- _acquire_impl! ---
         v = AdaptiveArrayPools._acquire_impl!(pool, Float64, 5)
         @test v isa Vector{Float64}
 
+        v = AdaptiveArrayPools._acquire_impl!(pool, Float64, 3, 4)
+        @test v isa Matrix{Float64}
+
+        v = AdaptiveArrayPools._acquire_impl!(pool, Float64, (2, 3))
+        @test v isa Matrix{Float64}
+
+        v = AdaptiveArrayPools._acquire_impl!(pool, template)
+        @test v isa Matrix{Float64}
+
+        # --- _unsafe_acquire_impl! ---
         v = AdaptiveArrayPools._unsafe_acquire_impl!(pool, Float64, 5)
         @test v isa Vector{Float64}
+
+        v = AdaptiveArrayPools._unsafe_acquire_impl!(pool, Float64, 3, 4)
+        @test v isa Matrix{Float64}
+
+        v = AdaptiveArrayPools._unsafe_acquire_impl!(pool, Float64, (2, 3))
+        @test v isa Matrix{Float64}
+
+        v = AdaptiveArrayPools._unsafe_acquire_impl!(pool, template)
+        @test v isa Matrix{Float64}
     end
 
     @testset "Macro internals" begin

From c6c5d0933e1d89f2c003e437f189cc5cf878d2a8 Mon Sep 17 00:00:00 2001
From: Min-Gu Yoo <mgyoo86@gmail.com>
Date: Tue, 16 Dec 2025 16:07:53 -0800
Subject: [PATCH 18/20] docs: update documentation for DisabledPool behavior

- Update maybe_with_pool.md: pool becomes DisabledPool{backend}()
  instead of nothing when pooling is disabled
- Update api.md: add DisabledPool{Backend} type and pooling_enabled()
  utility function documentation
- Update configuration.md: document DisabledPool behavior when
  USE_POOLING=false
---
 docs/api.md             |  8 ++++++++
 docs/configuration.md   |  8 ++++++++
 docs/maybe_with_pool.md | 16 ++++++++++++++--
 3 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/docs/api.md b/docs/api.md
index 734c8f1..4d3eb8c 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -59,6 +59,14 @@ All convenience functions support tuple dimensions: `zeros!(pool, (3, 4))`.
 | Type | Description |
 |------|-------------|
 | `AdaptiveArrayPool` | The main pool type. Create with `AdaptiveArrayPool()`. |
+| `DisabledPool{Backend}` | Sentinel type when pooling is disabled. Preserves backend context (`:cpu` or `:cuda`). |
+
+## Utility Functions
+
+| Function | Description |
+|----------|-------------|
+| `pooling_enabled(pool)` | Returns `true` if pool is active, `false` if `DisabledPool`. Use instead of `pool === nothing`. |
+| `default_eltype(pool)` | Returns default element type: `Float64` (CPU) or `Float32` (CUDA). |
 
 ## Constants
 
diff --git a/docs/configuration.md b/docs/configuration.md
index ad09432..fdc1d16 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -27,13 +27,21 @@ Preferences.set_preferences!(AdaptiveArrayPools, "use_pooling" => false)
 ```
 
 When `USE_POOLING = false`:
+- `pool` becomes `DisabledPool{backend}()` instead of an active pool
+- All pool functions fall back to standard allocation
+- Backend context is preserved: `:cuda` still returns `CuArray`
 
 ```julia
 # These become equivalent:
 @with_pool pool acquire!(pool, Float64, n, n)  →  Matrix{Float64}(undef, n, n)
 @with_pool pool acquire!(pool, Float64, n)     →  Vector{Float64}(undef, n)
+
+# With CUDA backend:
+@with_pool :cuda pool zeros!(pool, 100)        →  CUDA.zeros(Float32, 100)
 ```
 
+Use `pooling_enabled(pool)` to check if pooling is active.
+
 **Use cases:**
 - **Debugging**: Compare behavior with/without pooling
 - **Benchmarking**: Measure pooling overhead vs direct allocation
diff --git a/docs/maybe_with_pool.md b/docs/maybe_with_pool.md
index 0a4227c..39c31b7 100644
--- a/docs/maybe_with_pool.md
+++ b/docs/maybe_with_pool.md
@@ -25,8 +25,20 @@ MAYBE_POOLING_ENABLED[] = true   # Uses pool
 ## How It Works
 
 When `MAYBE_POOLING_ENABLED[] == false`:
-- `pool` becomes `nothing`
-- `acquire!(nothing, T, dims...)` allocates normally
+- `pool` becomes `DisabledPool{backend}()` (e.g., `DisabledPool{:cpu}()` or `DisabledPool{:cuda}()`)
+- All pool functions (`acquire!`, `zeros!`, etc.) fall back to standard allocation
+- Backend context is preserved: `:cuda` → `CuArray`, `:cpu` → `Array`
+
+Use `pooling_enabled(pool)` to check if pooling is active:
+```julia
+@maybe_with_pool pool begin
+    if pooling_enabled(pool)
+        # Using pooled memory
+    else
+        # Using standard allocation (DisabledPool)
+    end
+end
+```
 
 ## vs @with_pool
 

From 987c27d4e1db60561d73e070b5642e84b9a2554e Mon Sep 17 00:00:00 2001
From: Min-Gu Yoo <mgyoo86@gmail.com>
Date: Tue, 16 Dec 2025 16:21:52 -0800
Subject: [PATCH 19/20] test: add coverage for AbstractArrayPool _impl! default
 type overloads

---
 test/test_coverage.jl | 44 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/test/test_coverage.jl b/test/test_coverage.jl
index e01c0fb..581abc7 100644
--- a/test/test_coverage.jl
+++ b/test/test_coverage.jl
@@ -513,4 +513,48 @@
         @test :x in locals5
         @test :y in locals5
     end
+
+    @testset "AbstractArrayPool _impl! default type overloads" begin
+        # These are called when convenience functions are used without type parameter
+        # inside @with_pool macro: unsafe_ones!(pool, 10) → _unsafe_ones_impl!(pool, 10)
+        pool = AdaptiveArrayPool()
+
+        # --- _zeros_impl! without type (uses default_eltype) ---
+        v = AdaptiveArrayPools._zeros_impl!(pool, 5)
+        @test eltype(v) == Float64
+        @test length(v) == 5
+
+        v = AdaptiveArrayPools._zeros_impl!(pool, 3, 4)
+        @test eltype(v) == Float64
+        @test size(v) == (3, 4)
+
+        # --- _ones_impl! without type ---
+        v = AdaptiveArrayPools._ones_impl!(pool, 5)
+        @test eltype(v) == Float64
+        @test length(v) == 5
+
+        v = AdaptiveArrayPools._ones_impl!(pool, 3, 4)
+        @test eltype(v) == Float64
+        @test size(v) == (3, 4)
+
+        # --- _unsafe_zeros_impl! without type ---
+        v = AdaptiveArrayPools._unsafe_zeros_impl!(pool, 5)
+        @test v isa Vector{Float64}
+        @test all(v .== 0.0)
+
+        v = AdaptiveArrayPools._unsafe_zeros_impl!(pool, 3, 4)
+        @test v isa Matrix{Float64}
+        @test size(v) == (3, 4)
+
+        # --- _unsafe_ones_impl! without type ---
+        v = AdaptiveArrayPools._unsafe_ones_impl!(pool, 5)
+        @test v isa Vector{Float64}
+        @test all(v .== 1.0)
+
+        v = AdaptiveArrayPools._unsafe_ones_impl!(pool, 3, 4)
+        @test v isa Matrix{Float64}
+        @test size(v) == (3, 4)
+
+        empty!(pool)
+    end
 end

From 4c240ba31d2dd9f37d48db6570f6b2da9d5f2f6b Mon Sep 17 00:00:00 2001
From: Min-Gu Yoo <mgyoo86@gmail.com>
Date: Tue, 16 Dec 2025 16:24:11 -0800
Subject: [PATCH 20/20] test: verify pooling is enabled for fresh
 AdaptiveArrayPool

---
 test/test_task_local_pool.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/test_task_local_pool.jl b/test/test_task_local_pool.jl
index 8a8645b..c408578 100644
--- a/test/test_task_local_pool.jl
+++ b/test/test_task_local_pool.jl
@@ -212,6 +212,8 @@
         # Use a fresh pool to ensure we start from 0
         pool = AdaptiveArrayPool()
 
+        @test pooling_enabled(pool) == true
+
         # Acquire 511 arrays without rewind - no warning yet
         for i in 1:511
             acquire!(pool, Float64, 10)