From 32b1da7fde06e2249713694e7c99075a5cc8d94a Mon Sep 17 00:00:00 2001
From: Sophie L <slequeu@hotmail.com>
Date: Tue, 30 Dec 2025 16:33:12 +0100
Subject: [PATCH 1/2] Add matrix-matrix multiplication and small test

---
 src/reverse_mode.jl | 107 +++++++++++++++++++++++++--------
 src/sizes.jl        |  19 +++---
 test/ArrayDiff.jl   | 142 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 234 insertions(+), 34 deletions(-)

diff --git a/src/reverse_mode.jl b/src/reverse_mode.jl
index 2c7cfc7..505421a 100644
--- a/src/reverse_mode.jl
+++ b/src/reverse_mode.jl
@@ -154,36 +154,62 @@ function _forward_eval(
                     @j f.forward_storage[k] = tmp_sub
                 end
             elseif node.index == 3 # :*
-                tmp_prod = one(T)
-                for c_idx in children_indices
-                    @inbounds tmp_prod *= f.forward_storage[children_arr[c_idx]]
-                end
-                if tmp_prod == zero(T) || N <= 2
-                    # This is inefficient if there are a lot of children.
-                    # 2 is chosen as a limit because (x*y)/y does not always
-                    # equal x for floating-point numbers. This can produce
-                    # unexpected error in partials. There's still an error when
-                    # multiplying three or more terms, but users are less likely
-                    # to complain about it.
-                    for c_idx in children_indices
-                        prod_others = one(T)
-                        for c_idx2 in children_indices
-                            (c_idx == c_idx2) && continue
-                            ix = children_arr[c_idx2]
-                            prod_others *= f.forward_storage[ix]
-                        end
-                        f.partials_storage[children_arr[c_idx]] = prod_others
+                # Node `k` is not scalar, so we do matrix multiplication
+                if f.sizes.ndims[k] != 0
+                    @assert N == 2
+                    idx1 = first(children_indices)
+                    idx2 = last(children_indices)
+                    @inbounds ix1 = children_arr[idx1]
+                    @inbounds ix2 = children_arr[idx2]
+                    v1 = zeros(_size(f.sizes, ix1)...)
+                    v2 = zeros(_size(f.sizes, ix2)...)
+                    for j in _eachindex(f.sizes, ix1)
+                        v1[j] = @j f.forward_storage[ix1]
+                        @j f.partials_storage[ix2] = v1[j]
+                    end
+                    for j in _eachindex(f.sizes, ix2)
+                        v2[j] = @j f.forward_storage[ix2]
+                        @j f.partials_storage[ix1] = v2[j]
+                    end
+                    v_prod = v1 * v2
+                    for j in _eachindex(f.sizes, k)
+                        @j f.forward_storage[k] = v_prod[j]
                     end
+                    # Node `k` is scalar
                 else
-                    # Compute all-minus-one partial derivatives by dividing from
-                    # the total product.
+                    tmp_prod = one(T)
                     for c_idx in children_indices
-                        ix = children_arr[c_idx]
-                        f.partials_storage[ix] =
-                            tmp_prod / f.forward_storage[ix]
+                        @inbounds tmp_prod *=
+                            f.forward_storage[children_arr[c_idx]]
                     end
+                    if tmp_prod == zero(T) || N <= 2
+                        # This is inefficient if there are a lot of children.
+                        # 2 is chosen as a limit because (x*y)/y does not always
+                        # equal x for floating-point numbers. This can produce
+                        # unexpected error in partials. There's still an error when
+                        # multiplying three or more terms, but users are less likely
+                        # to complain about it.
+                        for c_idx in children_indices
+                            prod_others = one(T)
+                            for c_idx2 in children_indices
+                                (c_idx == c_idx2) && continue
+                                ix = children_arr[c_idx2]
+                                prod_others *= f.forward_storage[ix]
+                            end
+                            f.partials_storage[children_arr[c_idx]] =
+                                prod_others
+                        end
+                    else
+                        # Compute all-minus-one partial derivatives by dividing from
+                        # the total product.
+                        for c_idx in children_indices
+                            ix = children_arr[c_idx]
+                            f.partials_storage[ix] =
+                                tmp_prod / f.forward_storage[ix]
+                        end
+                    end
+                    @inbounds f.forward_storage[k] = tmp_prod
                 end
-                @inbounds f.forward_storage[k] = tmp_prod
             elseif node.index == 4 # :^
                 @assert N == 2
                 idx1 = first(children_indices)
@@ -429,7 +455,36 @@ function _reverse_eval(f::_SubexpressionStorage)
             if node.index in
                eachindex(MOI.Nonlinear.DEFAULT_MULTIVARIATE_OPERATORS)
                 op = MOI.Nonlinear.DEFAULT_MULTIVARIATE_OPERATORS[node.index]
-                if op == :vect
+                if op == :*
+                    if f.sizes.ndims[k] != 0
+                        # Node `k` is not scalar, so we do matrix multiplication
+                        idx1 = first(children_indices)
+                        idx2 = last(children_indices)
+                        ix1 = children_arr[idx1]
+                        ix2 = children_arr[idx2]
+                        v1 = zeros(_size(f.sizes, ix1)...)
+                        v2 = zeros(_size(f.sizes, ix2)...)
+                        for j in _eachindex(f.sizes, ix1)
+                            v1[j] = @j f.forward_storage[ix1]
+                        end
+                        for j in _eachindex(f.sizes, ix2)
+                            v2[j] = @j f.forward_storage[ix2]
+                        end
+                        rev_parent = zeros(_size(f.sizes, k)...)
+                        for j in _eachindex(f.sizes, k)
+                            rev_parent[j] = @j f.reverse_storage[k]
+                        end
+                        rev_v1 = rev_parent * v2'
+                        rev_v2 = v1' * rev_parent
+                        for j in _eachindex(f.sizes, ix1)
+                            @j f.reverse_storage[ix1] = rev_v1[j]
+                        end
+                        for j in _eachindex(f.sizes, ix2)
+                            @j f.reverse_storage[ix2] = rev_v2[j]
+                        end
+                        continue
+                    end
+                elseif op == :vect
                     @assert _eachindex(f.sizes, k) ==
                             eachindex(children_indices)
                     for j in eachindex(children_indices)
diff --git a/src/sizes.jl b/src/sizes.jl
index a747656..067736b 100644
--- a/src/sizes.jl
+++ b/src/sizes.jl
@@ -238,11 +238,7 @@ function _infer_sizes(
                     return !iszero(sizes.ndims[children_arr[i]])
                 end
                 if !isnothing(first_matrix)
-                    last_matrix = findfirst(children_indices) do i
-                        return !iszero(sizes.ndims[children_arr[i]])
-                    end
-                    if sizes.ndims[last_matrix] == 0 ||
-                       sizes.ndims[first_matrix] == 0
+                    if sizes.ndims[children_arr[first(children_indices)]] == 0
                         _add_size!(sizes, k, (1, 1))
                         continue
                     else
@@ -250,14 +246,21 @@ function _infer_sizes(
                             sizes,
                             k,
                             (
-                                _size(sizes, first_matrix, 1),
                                 _size(
                                     sizes,
-                                    last_matrix,
-                                    sizes.ndims[last_matrix],
+                                    children_arr[first(children_indices)],
+                                    1,
+                                ),
+                                _size(
+                                    sizes,
+                                    children_arr[last(children_indices)],
+                                    sizes.ndims[children_arr[last(
+                                        children_indices,
+                                    )],],
                                 ),
                             ),
                         )
+                        continue
                     end
                 end
             elseif op == :^ || op == :/
diff --git a/test/ArrayDiff.jl b/test/ArrayDiff.jl
index 5eab3cc..ba9ffb9 100644
--- a/test/ArrayDiff.jl
+++ b/test/ArrayDiff.jl
@@ -356,6 +356,148 @@ function test_objective_norm_of_matrix_with_sum()
     return
 end
 
+function test_objective_norm_of_product_of_matrices()
+    model = Nonlinear.Model()
+    x1 = MOI.VariableIndex(1)
+    x2 = MOI.VariableIndex(2)
+    x3 = MOI.VariableIndex(3)
+    x4 = MOI.VariableIndex(4)
+    Nonlinear.set_objective(model, :(norm([$x1 $x2; $x3 $x4] * [1 0; 0 1])))
+    evaluator = Nonlinear.Evaluator(model, ArrayDiff.Mode(), [x1, x2, x3, x4])
+    MOI.initialize(evaluator, [:Grad])
+    sizes = evaluator.backend.objective.expr.sizes
+    @test sizes.ndims == [0, 2, 2, 2, 0, 0, 2, 0, 0, 2, 2, 0, 0, 2, 0, 0]
+    @test sizes.size_offset ==
+          [0, 12, 10, 8, 0, 0, 6, 0, 0, 4, 2, 0, 0, 0, 0, 0]
+    @test sizes.size == [1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2]
+    @test sizes.storage_offset ==
+          [0, 1, 5, 9, 11, 12, 13, 15, 16, 17, 21, 23, 24, 25, 27, 28, 29]
+    x1 = 1.0
+    x2 = 2.0
+    x3 = 3.0
+    x4 = 4.0
+    @test MOI.eval_objective(evaluator, [x1, x2, x3, x4]) == sqrt(30.0)
+    g = ones(4)
+    MOI.eval_objective_gradient(evaluator, g, [x1, x2, x3, x4])
+    @test g == [
+        1.0 / sqrt(30.0),
+        2.0 / sqrt(30.0),
+        3.0 / sqrt(30.0),
+        4.0 / sqrt(30.0),
+    ]
+    return
+end
+
+function test_objective_norm_of_product_of_matrices_with_sum()
+    model = Nonlinear.Model()
+    x1 = MOI.VariableIndex(1)
+    x2 = MOI.VariableIndex(2)
+    x3 = MOI.VariableIndex(3)
+    x4 = MOI.VariableIndex(4)
+    Nonlinear.set_objective(
+        model,
+        :(norm(([$x1 $x2; $x3 $x4] + [1 1; 1 1]) * [1 0; 0 1])),
+    )
+    evaluator = Nonlinear.Evaluator(model, ArrayDiff.Mode(), [x1, x2, x3, x4])
+    MOI.initialize(evaluator, [:Grad])
+    sizes = evaluator.backend.objective.expr.sizes
+    @test sizes.ndims == [
+        0,
+        2,
+        2,
+        2,
+        2,
+        0,
+        0,
+        2,
+        0,
+        0,
+        2,
+        2,
+        0,
+        0,
+        2,
+        0,
+        0,
+        2,
+        2,
+        0,
+        0,
+        2,
+        0,
+        0,
+    ]
+    @test sizes.size_offset == [
+        0,
+        20,
+        18,
+        16,
+        14,
+        0,
+        0,
+        12,
+        0,
+        0,
+        10,
+        8,
+        0,
+        0,
+        6,
+        0,
+        0,
+        4,
+        2,
+        0,
+        0,
+        0,
+        0,
+        0,
+    ]
+    @test sizes.size ==
+          [1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2]
+    @test sizes.storage_offset == [
+        0,
+        1,
+        5,
+        9,
+        13,
+        15,
+        16,
+        17,
+        19,
+        20,
+        21,
+        25,
+        27,
+        28,
+        29,
+        31,
+        32,
+        33,
+        37,
+        39,
+        40,
+        41,
+        43,
+        44,
+        45,
+    ]
+    x1 = 1.0
+    x2 = 2.0
+    x3 = 3.0
+    x4 = 4.0
+    @test MOI.eval_objective(evaluator, [x1, x2, x3, x4]) == sqrt(54.0)
+    g = ones(4)
+    MOI.eval_objective_gradient(evaluator, g, [x1, x2, x3, x4])
+    @test g == [
+        2.0 / sqrt(54.0),
+        3.0 / sqrt(54.0),
+        4.0 / sqrt(54.0),
+        5.0 / sqrt(54.0),
+    ]
+    return
+end
+
 end  # module
 
 TestArrayDiff.runtests()

From ffb5dffd78bce1fdbbbc2df6e01ad6eb0635074a Mon Sep 17 00:00:00 2001
From: Sophie L <slequeu@hotmail.com>
Date: Tue, 30 Dec 2025 16:46:16 +0100
Subject: [PATCH 2/2] Add matrix-vector test

---
 test/ArrayDiff.jl | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/test/ArrayDiff.jl b/test/ArrayDiff.jl
index ba9ffb9..237de20 100644
--- a/test/ArrayDiff.jl
+++ b/test/ArrayDiff.jl
@@ -498,6 +498,37 @@ function test_objective_norm_of_product_of_matrices_with_sum()
     return
 end
 
+function test_objective_norm_of_mtx_vector_product()
+    model = Nonlinear.Model()
+    x1 = MOI.VariableIndex(1)
+    x2 = MOI.VariableIndex(2)
+    x3 = MOI.VariableIndex(3)
+    x4 = MOI.VariableIndex(4)
+    Nonlinear.set_objective(model, :(norm(([$x1 $x2; $x3 $x4] * [1; 1]))))
+    evaluator = Nonlinear.Evaluator(model, ArrayDiff.Mode(), [x1, x2, x3, x4])
+    MOI.initialize(evaluator, [:Grad])
+    sizes = evaluator.backend.objective.expr.sizes
+    @test sizes.ndims == [0, 2, 2, 2, 0, 0, 2, 0, 0, 2, 0, 0]
+    @test sizes.size_offset == [0, 8, 6, 4, 0, 0, 2, 0, 0, 0, 0, 0]
+    @test sizes.size == [2, 1, 1, 2, 1, 2, 2, 2, 2, 1]
+    @test sizes.storage_offset ==
+          [0, 1, 3, 7, 9, 10, 11, 13, 14, 15, 17, 18, 19]
+    x1 = 1.0
+    x2 = 2.0
+    x3 = 3.0
+    x4 = 4.0
+    @test MOI.eval_objective(evaluator, [x1, x2, x3, x4]) == sqrt(58.0)
+    g = ones(4)
+    MOI.eval_objective_gradient(evaluator, g, [x1, x2, x3, x4])
+    @test g == [
+        3.0 / sqrt(58.0),
+        3.0 / sqrt(58.0),
+        7.0 / sqrt(58.0),
+        7.0 / sqrt(58.0),
+    ]
+    return
+end
+
 end  # module
 
 TestArrayDiff.runtests()