From 32b1da7fde06e2249713694e7c99075a5cc8d94a Mon Sep 17 00:00:00 2001 From: Sophie L Date: Tue, 30 Dec 2025 16:33:12 +0100 Subject: [PATCH 1/2] Add matrix-matrix multiplication and small test --- src/reverse_mode.jl | 107 +++++++++++++++++++++++++-------- src/sizes.jl | 19 +++--- test/ArrayDiff.jl | 142 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 234 insertions(+), 34 deletions(-) diff --git a/src/reverse_mode.jl b/src/reverse_mode.jl index 2c7cfc7..505421a 100644 --- a/src/reverse_mode.jl +++ b/src/reverse_mode.jl @@ -154,36 +154,62 @@ function _forward_eval( @j f.forward_storage[k] = tmp_sub end elseif node.index == 3 # :* - tmp_prod = one(T) - for c_idx in children_indices - @inbounds tmp_prod *= f.forward_storage[children_arr[c_idx]] - end - if tmp_prod == zero(T) || N <= 2 - # This is inefficient if there are a lot of children. - # 2 is chosen as a limit because (x*y)/y does not always - # equal x for floating-point numbers. This can produce - # unexpected error in partials. There's still an error when - # multiplying three or more terms, but users are less likely - # to complain about it. - for c_idx in children_indices - prod_others = one(T) - for c_idx2 in children_indices - (c_idx == c_idx2) && continue - ix = children_arr[c_idx2] - prod_others *= f.forward_storage[ix] - end - f.partials_storage[children_arr[c_idx]] = prod_others + # Node `k` is not scalar, so we do matrix multiplication + if f.sizes.ndims[k] != 0 + @assert N == 2 + idx1 = first(children_indices) + idx2 = last(children_indices) + @inbounds ix1 = children_arr[idx1] + @inbounds ix2 = children_arr[idx2] + v1 = zeros(_size(f.sizes, ix1)...) + v2 = zeros(_size(f.sizes, ix2)...) + for j in _eachindex(f.sizes, ix1) + v1[j] = @j f.forward_storage[ix1] + @j f.partials_storage[ix2] = v1[j] + end + for j in _eachindex(f.sizes, ix2) + v2[j] = @j f.forward_storage[ix2] + @j f.partials_storage[ix1] = v2[j] + end + v_prod = v1 * v2 + for j in _eachindex(f.sizes, k) + @j f.forward_storage[k] = v_prod[j] end + # Node `k` is scalar else - # Compute all-minus-one partial derivatives by dividing from - # the total product. + tmp_prod = one(T) for c_idx in children_indices - ix = children_arr[c_idx] - f.partials_storage[ix] = - tmp_prod / f.forward_storage[ix] + @inbounds tmp_prod *= + f.forward_storage[children_arr[c_idx]] end + if tmp_prod == zero(T) || N <= 2 + # This is inefficient if there are a lot of children. + # 2 is chosen as a limit because (x*y)/y does not always + # equal x for floating-point numbers. This can produce + # unexpected error in partials. There's still an error when + # multiplying three or more terms, but users are less likely + # to complain about it. + for c_idx in children_indices + prod_others = one(T) + for c_idx2 in children_indices + (c_idx == c_idx2) && continue + ix = children_arr[c_idx2] + prod_others *= f.forward_storage[ix] + end + f.partials_storage[children_arr[c_idx]] = + prod_others + end + else + # Compute all-minus-one partial derivatives by dividing from + # the total product. + for c_idx in children_indices + ix = children_arr[c_idx] + f.partials_storage[ix] = + tmp_prod / f.forward_storage[ix] + end + end + @inbounds f.forward_storage[k] = tmp_prod end - @inbounds f.forward_storage[k] = tmp_prod elseif node.index == 4 # :^ @assert N == 2 idx1 = first(children_indices) @@ -429,7 +455,36 @@ function _reverse_eval(f::_SubexpressionStorage) if node.index in eachindex(MOI.Nonlinear.DEFAULT_MULTIVARIATE_OPERATORS) op = MOI.Nonlinear.DEFAULT_MULTIVARIATE_OPERATORS[node.index] - if op == :vect + if op == :* + if f.sizes.ndims[k] != 0 + # Node `k` is not scalar, so we do matrix multiplication + idx1 = first(children_indices) + idx2 = last(children_indices) + ix1 = children_arr[idx1] + ix2 = children_arr[idx2] + v1 = zeros(_size(f.sizes, ix1)...) + v2 = zeros(_size(f.sizes, ix2)...) + for j in _eachindex(f.sizes, ix1) + v1[j] = @j f.forward_storage[ix1] + end + for j in _eachindex(f.sizes, ix2) + v2[j] = @j f.forward_storage[ix2] + end + rev_parent = zeros(_size(f.sizes, k)...) + for j in _eachindex(f.sizes, k) + rev_parent[j] = @j f.reverse_storage[k] + end + rev_v1 = rev_parent * v2' + rev_v2 = v1' * rev_parent + for j in _eachindex(f.sizes, ix1) + @j f.reverse_storage[ix1] = rev_v1[j] + end + for j in _eachindex(f.sizes, ix2) + @j f.reverse_storage[ix2] = rev_v2[j] + end + continue + end + elseif op == :vect @assert _eachindex(f.sizes, k) == eachindex(children_indices) for j in eachindex(children_indices) diff --git a/src/sizes.jl b/src/sizes.jl index a747656..067736b 100644 --- a/src/sizes.jl +++ b/src/sizes.jl @@ -238,11 +238,7 @@ function _infer_sizes( return !iszero(sizes.ndims[children_arr[i]]) end if !isnothing(first_matrix) - last_matrix = findfirst(children_indices) do i - return !iszero(sizes.ndims[children_arr[i]]) - end - if sizes.ndims[last_matrix] == 0 || - sizes.ndims[first_matrix] == 0 + if sizes.ndims[children_arr[first(children_indices)]] == 0 _add_size!(sizes, k, (1, 1)) continue else @@ -250,14 +246,21 @@ function _infer_sizes( sizes, k, ( - _size(sizes, first_matrix, 1), _size( sizes, - last_matrix, - sizes.ndims[last_matrix], + children_arr[first(children_indices)], + 1, + ), + _size( + sizes, + children_arr[last(children_indices)], + sizes.ndims[children_arr[last( + children_indices, + )],], ), ), ) + continue end end elseif op == :^ || op == :/ diff --git a/test/ArrayDiff.jl b/test/ArrayDiff.jl index 5eab3cc..ba9ffb9 100644 --- a/test/ArrayDiff.jl +++ b/test/ArrayDiff.jl @@ -356,6 +356,148 @@ function test_objective_norm_of_matrix_with_sum() return end +function test_objective_norm_of_product_of_matrices() + model = Nonlinear.Model() + x1 = MOI.VariableIndex(1) + x2 = MOI.VariableIndex(2) + x3 = MOI.VariableIndex(3) + x4 = MOI.VariableIndex(4) + Nonlinear.set_objective(model, :(norm([$x1 $x2; $x3 $x4] * [1 0; 0 1]))) + evaluator = Nonlinear.Evaluator(model, ArrayDiff.Mode(), [x1, x2, x3, x4]) + MOI.initialize(evaluator, [:Grad]) + sizes = evaluator.backend.objective.expr.sizes + @test sizes.ndims == [0, 2, 2, 2, 0, 0, 2, 0, 0, 2, 2, 0, 0, 2, 0, 0] + @test sizes.size_offset == + [0, 12, 10, 8, 0, 0, 6, 0, 0, 4, 2, 0, 0, 0, 0, 0] + @test sizes.size == [1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2] + @test sizes.storage_offset == + [0, 1, 5, 9, 11, 12, 13, 15, 16, 17, 21, 23, 24, 25, 27, 28, 29] + x1 = 1.0 + x2 = 2.0 + x3 = 3.0 + x4 = 4.0 + @test MOI.eval_objective(evaluator, [x1, x2, x3, x4]) == sqrt(30.0) + g = ones(4) + MOI.eval_objective_gradient(evaluator, g, [x1, x2, x3, x4]) + @test g == [ + 1.0 / sqrt(30.0), + 2.0 / sqrt(30.0), + 3.0 / sqrt(30.0), + 4.0 / sqrt(30.0), + ] + return +end + +function test_objective_norm_of_product_of_matrices_with_sum() + model = Nonlinear.Model() + x1 = MOI.VariableIndex(1) + x2 = MOI.VariableIndex(2) + x3 = MOI.VariableIndex(3) + x4 = MOI.VariableIndex(4) + Nonlinear.set_objective( + model, + :(norm(([$x1 $x2; $x3 $x4] + [1 1; 1 1]) * [1 0; 0 1])), + ) + evaluator = Nonlinear.Evaluator(model, ArrayDiff.Mode(), [x1, x2, x3, x4]) + MOI.initialize(evaluator, [:Grad]) + sizes = evaluator.backend.objective.expr.sizes + @test sizes.ndims == [ + 0, + 2, + 2, + 2, + 2, + 0, + 0, + 2, + 0, + 0, + 2, + 2, + 0, + 0, + 2, + 0, + 0, + 2, + 2, + 0, + 0, + 2, + 0, + 0, + ] + @test sizes.size_offset == [ + 0, + 20, + 18, + 16, + 14, + 0, + 0, + 12, + 0, + 0, + 10, + 8, + 0, + 0, + 6, + 0, + 0, + 4, + 2, + 0, + 0, + 0, + 0, + 0, + ] + @test sizes.size == + [1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2] + @test sizes.storage_offset == [ + 0, + 1, + 5, + 9, + 13, + 15, + 16, + 17, + 19, + 20, + 21, + 25, + 27, + 28, + 29, + 31, + 32, + 33, + 37, + 39, + 40, + 41, + 43, + 44, + 45, + ] + x1 = 1.0 + x2 = 2.0 + x3 = 3.0 + x4 = 4.0 + @test MOI.eval_objective(evaluator, [x1, x2, x3, x4]) == sqrt(54.0) + g = ones(4) + MOI.eval_objective_gradient(evaluator, g, [x1, x2, x3, x4]) + @test g == [ + 2.0 / sqrt(54.0), + 3.0 / sqrt(54.0), + 4.0 / sqrt(54.0), + 5.0 / sqrt(54.0), + ] + return +end + end # module TestArrayDiff.runtests() From ffb5dffd78bce1fdbbbc2df6e01ad6eb0635074a Mon Sep 17 00:00:00 2001 From: Sophie L Date: Tue, 30 Dec 2025 16:46:16 +0100 Subject: [PATCH 2/2] Add matrix-vector test --- test/ArrayDiff.jl | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/test/ArrayDiff.jl b/test/ArrayDiff.jl index ba9ffb9..237de20 100644 --- a/test/ArrayDiff.jl +++ b/test/ArrayDiff.jl @@ -498,6 +498,37 @@ function test_objective_norm_of_product_of_matrices_with_sum() return end +function test_objective_norm_of_mtx_vector_product() + model = Nonlinear.Model() + x1 = MOI.VariableIndex(1) + x2 = MOI.VariableIndex(2) + x3 = MOI.VariableIndex(3) + x4 = MOI.VariableIndex(4) + Nonlinear.set_objective(model, :(norm(([$x1 $x2; $x3 $x4] * [1; 1])))) + evaluator = Nonlinear.Evaluator(model, ArrayDiff.Mode(), [x1, x2, x3, x4]) + MOI.initialize(evaluator, [:Grad]) + sizes = evaluator.backend.objective.expr.sizes + @test sizes.ndims == [0, 2, 2, 2, 0, 0, 2, 0, 0, 2, 0, 0] + @test sizes.size_offset == [0, 8, 6, 4, 0, 0, 2, 0, 0, 0, 0, 0] + @test sizes.size == [2, 1, 1, 2, 1, 2, 2, 2, 2, 1] + @test sizes.storage_offset == + [0, 1, 3, 7, 9, 10, 11, 13, 14, 15, 17, 18, 19] + x1 = 1.0 + x2 = 2.0 + x3 = 3.0 + x4 = 4.0 + @test MOI.eval_objective(evaluator, [x1, x2, x3, x4]) == sqrt(58.0) + g = ones(4) + MOI.eval_objective_gradient(evaluator, g, [x1, x2, x3, x4]) + @test g == [ + 3.0 / sqrt(58.0), + 3.0 / sqrt(58.0), + 7.0 / sqrt(58.0), + 7.0 / sqrt(58.0), + ] + return +end + end # module TestArrayDiff.runtests()