Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 81 additions & 26 deletions src/reverse_mode.jl
Original file line number Diff line number Diff line change
Expand Up @@ -154,36 +154,62 @@ function _forward_eval(
@j f.forward_storage[k] = tmp_sub
end
elseif node.index == 3 # :*
tmp_prod = one(T)
for c_idx in children_indices
@inbounds tmp_prod *= f.forward_storage[children_arr[c_idx]]
end
if tmp_prod == zero(T) || N <= 2
# This is inefficient if there are a lot of children.
# 2 is chosen as a limit because (x*y)/y does not always
# equal x for floating-point numbers. This can produce
# unexpected error in partials. There's still an error when
# multiplying three or more terms, but users are less likely
# to complain about it.
for c_idx in children_indices
prod_others = one(T)
for c_idx2 in children_indices
(c_idx == c_idx2) && continue
ix = children_arr[c_idx2]
prod_others *= f.forward_storage[ix]
end
f.partials_storage[children_arr[c_idx]] = prod_others
# Node `k` is not scalar, so we do matrix multiplication
if f.sizes.ndims[k] != 0
@assert N == 2
idx1 = first(children_indices)
idx2 = last(children_indices)
@inbounds ix1 = children_arr[idx1]
@inbounds ix2 = children_arr[idx2]
v1 = zeros(_size(f.sizes, ix1)...)
v2 = zeros(_size(f.sizes, ix2)...)
for j in _eachindex(f.sizes, ix1)
v1[j] = @j f.forward_storage[ix1]
@j f.partials_storage[ix2] = v1[j]
end
for j in _eachindex(f.sizes, ix2)
v2[j] = @j f.forward_storage[ix2]
@j f.partials_storage[ix1] = v2[j]
end
v_prod = v1 * v2
for j in _eachindex(f.sizes, k)
@j f.forward_storage[k] = v_prod[j]
end
# Node `k` is scalar
else
# Compute all-minus-one partial derivatives by dividing from
# the total product.
tmp_prod = one(T)
for c_idx in children_indices
ix = children_arr[c_idx]
f.partials_storage[ix] =
tmp_prod / f.forward_storage[ix]
@inbounds tmp_prod *=
f.forward_storage[children_arr[c_idx]]
end
if tmp_prod == zero(T) || N <= 2
# This is inefficient if there are a lot of children.
# 2 is chosen as a limit because (x*y)/y does not always
# equal x for floating-point numbers. This can produce
# unexpected error in partials. There's still an error when
# multiplying three or more terms, but users are less likely
# to complain about it.
for c_idx in children_indices
prod_others = one(T)
for c_idx2 in children_indices
(c_idx == c_idx2) && continue
ix = children_arr[c_idx2]
prod_others *= f.forward_storage[ix]
end
f.partials_storage[children_arr[c_idx]] =
prod_others
end
else
# Compute all-minus-one partial derivatives by dividing from
# the total product.
for c_idx in children_indices
ix = children_arr[c_idx]
f.partials_storage[ix] =
tmp_prod / f.forward_storage[ix]
end
end
@inbounds f.forward_storage[k] = tmp_prod
end
@inbounds f.forward_storage[k] = tmp_prod
elseif node.index == 4 # :^
@assert N == 2
idx1 = first(children_indices)
Expand Down Expand Up @@ -429,7 +455,36 @@ function _reverse_eval(f::_SubexpressionStorage)
if node.index in
eachindex(MOI.Nonlinear.DEFAULT_MULTIVARIATE_OPERATORS)
op = MOI.Nonlinear.DEFAULT_MULTIVARIATE_OPERATORS[node.index]
if op == :vect
if op == :*
if f.sizes.ndims[k] != 0
# Node `k` is not scalar, so we do matrix multiplication
idx1 = first(children_indices)
idx2 = last(children_indices)
ix1 = children_arr[idx1]
ix2 = children_arr[idx2]
v1 = zeros(_size(f.sizes, ix1)...)
v2 = zeros(_size(f.sizes, ix2)...)
for j in _eachindex(f.sizes, ix1)
v1[j] = @j f.forward_storage[ix1]
end
for j in _eachindex(f.sizes, ix2)
v2[j] = @j f.forward_storage[ix2]
end
rev_parent = zeros(_size(f.sizes, k)...)
for j in _eachindex(f.sizes, k)
rev_parent[j] = @j f.reverse_storage[k]
end
rev_v1 = rev_parent * v2'
rev_v2 = v1' * rev_parent
for j in _eachindex(f.sizes, ix1)
@j f.reverse_storage[ix1] = rev_v1[j]
end
for j in _eachindex(f.sizes, ix2)
@j f.reverse_storage[ix2] = rev_v2[j]
end
continue
end
elseif op == :vect
@assert _eachindex(f.sizes, k) ==
eachindex(children_indices)
for j in eachindex(children_indices)
Expand Down
19 changes: 11 additions & 8 deletions src/sizes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -238,26 +238,29 @@ function _infer_sizes(
return !iszero(sizes.ndims[children_arr[i]])
end
if !isnothing(first_matrix)
last_matrix = findfirst(children_indices) do i
return !iszero(sizes.ndims[children_arr[i]])
end
if sizes.ndims[last_matrix] == 0 ||
sizes.ndims[first_matrix] == 0
if sizes.ndims[children_arr[first(children_indices)]] == 0
_add_size!(sizes, k, (1, 1))
continue
else
_add_size!(
sizes,
k,
(
_size(sizes, first_matrix, 1),
_size(
sizes,
last_matrix,
sizes.ndims[last_matrix],
children_arr[first(children_indices)],
1,
),
_size(
sizes,
children_arr[last(children_indices)],
sizes.ndims[children_arr[last(
children_indices,
)],],
),
),
)
continue
end
end
elseif op == :^ || op == :/
Expand Down
173 changes: 173 additions & 0 deletions test/ArrayDiff.jl
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,179 @@ function test_objective_norm_of_matrix_with_sum()
return
end

function test_objective_norm_of_product_of_matrices()
model = Nonlinear.Model()
x1 = MOI.VariableIndex(1)
x2 = MOI.VariableIndex(2)
x3 = MOI.VariableIndex(3)
x4 = MOI.VariableIndex(4)
Nonlinear.set_objective(model, :(norm([$x1 $x2; $x3 $x4] * [1 0; 0 1])))
evaluator = Nonlinear.Evaluator(model, ArrayDiff.Mode(), [x1, x2, x3, x4])
MOI.initialize(evaluator, [:Grad])
sizes = evaluator.backend.objective.expr.sizes
@test sizes.ndims == [0, 2, 2, 2, 0, 0, 2, 0, 0, 2, 2, 0, 0, 2, 0, 0]
@test sizes.size_offset ==
[0, 12, 10, 8, 0, 0, 6, 0, 0, 4, 2, 0, 0, 0, 0, 0]
@test sizes.size == [1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2]
@test sizes.storage_offset ==
[0, 1, 5, 9, 11, 12, 13, 15, 16, 17, 21, 23, 24, 25, 27, 28, 29]
x1 = 1.0
x2 = 2.0
x3 = 3.0
x4 = 4.0
@test MOI.eval_objective(evaluator, [x1, x2, x3, x4]) == sqrt(30.0)
g = ones(4)
MOI.eval_objective_gradient(evaluator, g, [x1, x2, x3, x4])
@test g == [
1.0 / sqrt(30.0),
2.0 / sqrt(30.0),
3.0 / sqrt(30.0),
4.0 / sqrt(30.0),
]
return
end

function test_objective_norm_of_product_of_matrices_with_sum()
model = Nonlinear.Model()
x1 = MOI.VariableIndex(1)
x2 = MOI.VariableIndex(2)
x3 = MOI.VariableIndex(3)
x4 = MOI.VariableIndex(4)
Nonlinear.set_objective(
model,
:(norm(([$x1 $x2; $x3 $x4] + [1 1; 1 1]) * [1 0; 0 1])),
)
evaluator = Nonlinear.Evaluator(model, ArrayDiff.Mode(), [x1, x2, x3, x4])
MOI.initialize(evaluator, [:Grad])
sizes = evaluator.backend.objective.expr.sizes
@test sizes.ndims == [
0,
2,
2,
2,
2,
0,
0,
2,
0,
0,
2,
2,
0,
0,
2,
0,
0,
2,
2,
0,
0,
2,
0,
0,
]
@test sizes.size_offset == [
0,
20,
18,
16,
14,
0,
0,
12,
0,
0,
10,
8,
0,
0,
6,
0,
0,
4,
2,
0,
0,
0,
0,
0,
]
@test sizes.size ==
[1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2]
@test sizes.storage_offset == [
0,
1,
5,
9,
13,
15,
16,
17,
19,
20,
21,
25,
27,
28,
29,
31,
32,
33,
37,
39,
40,
41,
43,
44,
45,
]
x1 = 1.0
x2 = 2.0
x3 = 3.0
x4 = 4.0
@test MOI.eval_objective(evaluator, [x1, x2, x3, x4]) == sqrt(54.0)
g = ones(4)
MOI.eval_objective_gradient(evaluator, g, [x1, x2, x3, x4])
@test g == [
2.0 / sqrt(54.0),
3.0 / sqrt(54.0),
4.0 / sqrt(54.0),
5.0 / sqrt(54.0),
]
return
end

function test_objective_norm_of_mtx_vector_product()
model = Nonlinear.Model()
x1 = MOI.VariableIndex(1)
x2 = MOI.VariableIndex(2)
x3 = MOI.VariableIndex(3)
x4 = MOI.VariableIndex(4)
Nonlinear.set_objective(model, :(norm(([$x1 $x2; $x3 $x4] * [1; 1]))))
evaluator = Nonlinear.Evaluator(model, ArrayDiff.Mode(), [x1, x2, x3, x4])
MOI.initialize(evaluator, [:Grad])
sizes = evaluator.backend.objective.expr.sizes
@test sizes.ndims == [0, 2, 2, 2, 0, 0, 2, 0, 0, 2, 0, 0]
@test sizes.size_offset == [0, 8, 6, 4, 0, 0, 2, 0, 0, 0, 0, 0]
@test sizes.size == [2, 1, 1, 2, 1, 2, 2, 2, 2, 1]
@test sizes.storage_offset ==
[0, 1, 3, 7, 9, 10, 11, 13, 14, 15, 17, 18, 19]
x1 = 1.0
x2 = 2.0
x3 = 3.0
x4 = 4.0
@test MOI.eval_objective(evaluator, [x1, x2, x3, x4]) == sqrt(58.0)
g = ones(4)
MOI.eval_objective_gradient(evaluator, g, [x1, x2, x3, x4])
@test g == [
3.0 / sqrt(58.0),
3.0 / sqrt(58.0),
7.0 / sqrt(58.0),
7.0 / sqrt(58.0),
]
return
end

end # module

TestArrayDiff.runtests()
Loading