From 28ff9a5ef8f6dbccf9311caad55b6921b30e6da3 Mon Sep 17 00:00:00 2001 From: Mohamed Tarek Date: Tue, 9 Feb 2021 02:40:15 +1100 Subject: [PATCH 01/25] proof of concept --- Project.toml | 6 +- src/AbstractDifferentiation.jl | 329 ++++++++++++++++++++++++++++++++- test/runtests.jl | 77 +++++++- 3 files changed, 408 insertions(+), 4 deletions(-) diff --git a/Project.toml b/Project.toml index 184f921..95fb5a4 100644 --- a/Project.toml +++ b/Project.toml @@ -3,11 +3,15 @@ uuid = "c29ec348-61ec-40c8-8164-b8c60e9d9f3d" authors = ["Mohamed Tarek and contributors"] version = "0.1.0" +[deps] +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + [compat] julia = "1" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" [targets] -test = ["Test"] +test = ["Test", "FiniteDifferences"] diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index 93d7351..ad539ca 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -1,5 +1,332 @@ module AbstractDifferentiation -# Write your package code here. +using LinearAlgebra +export AD + +const AD = AbstractDifferentiation + +abstract type AbstractBackend end +abstract type AbstractFiniteDifference <: AbstractBackend end +abstract type AbstractForwardMode <: AbstractBackend end +abstract type AbstractReverseMode <: AbstractBackend end + +struct HigherOrderBackend{B} <: AbstractBackend + backends::B +end +reduceorder(b::AbstractBackend) = b +function reduceorder(b::HigherOrderBackend) + return HigherOrderBackend(reverse(Base.tail(reverse(b.backends)))) +end +lowest(b::AbstractBackend) = b +lowest(b::HigherOrderBackend) = b.backends[end] +secondlowest(b::HigherOrderBackend) = lowest(reduceorder(b)) + +# If the primal value is in y, extract it. +# Otherwise, re-compute it, e.g. in finite diff. +primalvalue(::AbstractFiniteDifference, ::Any, f, xs) = f(xs...) +primalvalue(::AbstractBackend, ys, ::Any, ::Any) = primalvalue(ys) +primalvalue(x::Tuple) = map(primalvalue, x) +primalvalue(x) = x + +function derivative(ab::AbstractBackend, f, xs::Number...) + return getindex.(jacobian(lowest(ab), f, xs...), 1) +end +function gradient(ab::AbstractBackend, f, xs...) + return adjoint.(jacobian(lowest(ab), f, xs...)) +end +function jacobian(ab::AbstractBackend, f, xs...) end +function hessian(ab::AbstractBackend, f, xs...) + return jacobian(secondlowest(ab), (xs...,) -> begin + gradient(lowest(ab), f, xs...) + end, xs...) +end + +function value_and_derivative(ab::AbstractBackend, f, xs::Number...) + value, jacs = value_and_jacobian(lowest(ab), f, xs...) + return value[1], getindex.(jacs, 1) +end +function value_and_gradient(ab::AbstractBackend, f, xs...) + value, jacs = value_and_jacobian(lowest(ab), f, xs...) + return value, adjoint.(jacs) +end +function value_and_jacobian(ab::AbstractBackend, f, xs...) + local value + primalcalled = false + jacs = jacobian(lowest(ab), (_xs...,) -> begin + v = f(_xs...) + if !primalcalled + value = primalvalue(ab, v, f, xs) + primalcalled = true + end + return v + end, xs...) + return value, jacs +end +function value_and_hessian(ab::AbstractBackend, f, xs...) + local value + primalcalled = false + hess = jacobian(secondlowest(ab), (_xs...,) -> begin + v, g = value_and_gradient(lowest(ab), f, _xs...) + if !primalcalled + value = primalvalue(ab, v, f, xs) + primalcalled = true + end + return g + end, xs...) + return value, hess +end +function value_and_hessian(ab::HigherOrderBackend, f, xs...) + local value + primalcalled = false + hess = jacobian(secondlowest(ab), (_xs...,) -> begin + v, g = value_and_gradient(lowest(ab), f, _xs...) + if !primalcalled + value = primalvalue(ab, v, f, xs) + primalcalled = true + end + return g + end, xs...) + return value, hess +end +function value_gradient_and_hessian(ab::AbstractBackend, f, xs...) + local value + primalcalled = false + grads, hess = value_and_jacobian(secondlowest(ab), (_xs...,) -> begin + v, g = value_and_gradient(lowest(ab), f, _xs...) + if !primalcalled + value = primalvalue(secondlowest(ab), v, f, xs) + primalcalled = false + end + return g + end, xs...) + return value, grads, hess +end +function value_gradient_and_hessian(ab::HigherOrderBackend, f, xs...) + local value + primalcalled = false + grads, hess = value_and_jacobian(secondlowest(ab), (_xs...,) -> begin + v, g = value_and_gradient(lowest(ab), f, _xs...) + if !primalcalled + value = primalvalue(secondlowest(ab), v, f, xs) + primalcalled = true + end + return g + end, xs...) + return value, grads, hess +end + +function pushforward_function( + ab::AbstractBackend, + f, + xs::Union{Number, AbstractArray{<:Number}}..., +) + return (ds) -> begin + return jacobian(lowest(ab), (xds...,) -> begin + if ds isa Tuple + @assert length(xs) == length(ds) + newxs = xs .+ ds .* xds + return f(newxs...) + else + @assert length(xs) == length(xds) == 1 + newx = xs[1] + ds * xds[1] + return f(newx) + end + end, _zero.(xs, ds)...) + end +end +function value_and_pushforward_function( + ab::AbstractBackend, + f, + xs::Union{Number, AbstractArray{<:Number}}..., +) + return (ds) -> begin + @assert ds isa Tuple && length(ds) == length(xs) + return value_and_jacobian(lowest(ab), (xds...,) -> begin + if ds isa Tuple + @assert length(xs) == length(ds) + newxs = xs .+ ds .* xds + return f(newxs...) + else + @assert length(xs) == length(xds) == 1 + newx = xs[1] + ds * xds[1] + return f(newx) + end + end, _zero.(xs, ds)...) + end +end + +_zero(::Number, d::Number) = zero(d) +_zero(::Number, d::AbstractVector) = zero(d) +_zero(::AbstractVector, d::AbstractVector) = zero(eltype(d)) +_zero(::AbstractVector, d::AbstractMatrix) = zero(similar(d, size(d, 2))) +_zero(::AbstractMatrix, d::AbstractMatrix) = zero(d) +_zero(::Any, d::Any) = zero(d) + +function pullback_function(ab::AbstractBackend, f, xs...) + return (ws) -> begin + jacs = jacobian(lowest(ab), (xs...,) -> begin + vs = f(xs...) + if ws isa Tuple + @assert length(vs) == length(ws) + return sum(zip(vs, ws)) do v, w + if w isa Union{AbstractMatrix, UniformScaling} && v isa AbstractVector + return w' * v + else + # for arbitrary arrays + return dot(w, v) + end + end + else + w, v = ws, vs + if w isa Union{AbstractMatrix, UniformScaling} && v isa AbstractVector + return w' * v + else + # for arbitrary arrays + return dot(w, v) + end + end + end, xs...) + return adjoint.(jacs) + end +end +function value_and_pullback_function( + ab::AbstractBackend, + f, + xs..., +) + return (ws) -> begin + local value + primalcalled = false + jacs = jacobian(lowest(ab), (_xs...,) -> begin + vs = f(_xs...) + if !primalcalled + value = primalvalue(lowest(ab), vs, f, xs) + primalcalled = true + end + if ws isa Tuple + @assert length(vs) == length(ws) + return sum(zip(vs, ws)) do v, w + if w isa Union{AbstractMatrix, UniformScaling} && v isa AbstractVector + return w' * v + else + # for arbitrary arrays + return dot(w, v) + end + end + else + w, v = ws, vs + if w isa Union{AbstractMatrix, UniformScaling} && v isa AbstractVector + return w' * v + else + # for arbitrary arrays + return dot(w, v) + end + end + end, xs...) + return value, adjoint.(jacs) + end +end + +struct LazyDerivative{B, F, X} + backend::B + f::F + xs::X +end +function Base.:*(d::LazyDerivative, y) + return derivative(d.ab, d.f, d.xs...) * y +end +function Base.:*(y, d::LazyDerivative) + return y * derivative(d.ab, d.f, d.xs...) +end + +struct LazyGradient{B, F, X} + backend::B + f::F + xs::X +end +Base.:*(d::LazyGradient, y) = gradient(d.ab, d.f, d.xs...) * y +Base.:*(y, d::LazyGradient) = y * gradient(d.ab, d.f, d.xs...) + +struct LazyJacobian{B, F, X} + backend::B + f::F + xs::X +end +function Base.:*(d::LazyJacobian, ys) + return pushforward_function(d.ab, d.f, d.xs...)(ys) +end +function Base.:*(ys, d::LazyJacobian) + if ys isa Tuple + ya = adjoint.(ys) + else + ya = adjoint(ys) + end + return pullback_function(d.ab, d.f, d.xs...)(ya) +end + +struct LazyHessian{B, F, X} + backend::B + f::F + xs::X +end +function Base.:*(d::LazyHessian, ys) + return pushforward_function( + secondlowest(d.ab), + (xs...,) -> gradient(lowest(d.ab), d.f, xs...), + d.xs..., + )(ys) +end +function Base.:*(ys, d::LazyHessian) + if ys isa Tuple + ya = adjoint.(ys) + else + ya = adjoint(ys) + end + return pullback_function( + secondlowest(d.ab), + (xs...,) -> gradient(lowest(d.ab), d.f, xs...), + d.xs..., + )(ya) end + +function lazyderivative(ab::AbstractBackend, f, xs::Number...) + return LazyDerivative(ab, f, xs) +end +function lazygradient(ab::AbstractBackend, f, xs...) + return LazyGradient(ab, f, xs) +end +function lazyhessian(ab::AbstractBackend, f, xs...) + return LazyHessian(ab, f, xs) +end +function lazyjacobian(ab::AbstractBackend, f, xs...) + return LazyJacobian(ab, f, xs) +end + +struct D{B, F} + backend::B + f::F +end +D(b::AbstractBackend, d::D) = H(HigherOrderBackend((b, d.b)), d.f) +D(d::D) = H(HigherOrderBackend((d.backend, d.backend)), d.f) +function (d::D)(xs...; lazy = true) + if lazy + return lazyjacobian(d.ab, d.f, xs...) + else + return jacobian(d.ab, d.f, xs...) + end +end + +struct H{B, F} + backend::B + f::F +end +function (h::H)(xs...; lazy = true) + if lazy + return lazyhessian(h.ab, h.f, xs...) + else + return hessian(h.ab, h.f, xs...) + end +end + +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index a8a79fc..d7015ea 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,79 @@ using AbstractDifferentiation -using Test +using Test, FiniteDifferences, LinearAlgebra + +const FDM = FiniteDifferences +struct FDMBackend{A} <: AD.AbstractFiniteDifference + alg::A +end +FDMBackend() = FDMBackend(central_fdm(5, 1)) +const fdm_backend = FDMBackend() + +# Minimal interface +function AD.jacobian(ab::FDMBackend, f, xs...) + return jacobian(ab.alg, f, xs...) +end + +fder(x, y) = exp(y) * x + y * log(x) +fgrad(x, y) = prod(x) + sum(y ./ (1:length(y))) +function fjac(x, y) + x + Bidiagonal(-ones(length(y)) * 3, ones(length(y) - 1) / 2, :U) * y +end + +const xscalar = rand() +const yscalar = rand() + +const xvec = rand(5) +const yvec = rand(5) @testset "AbstractDifferentiation.jl" begin - # Write your tests here. + @testset "FiniteDifferences" begin + @testset "Derivative" begin + der1 = AD.derivative(fdm_backend, fder, xscalar, yscalar) + der2 = ( + fdm_backend.alg(x -> fder(x, yscalar), xscalar), + fdm_backend.alg(y -> fder(xscalar, y), yscalar), + ) + @test norm.(der1 .- der2) == (0, 0) + valscalar, der3 = AD.value_and_derivative(fdm_backend, fder, xscalar, yscalar) + @test valscalar == fder(xscalar, yscalar) + @test der3 .- der1 == (0, 0) + end + @testset "Gradient" begin + grad1 = AD.gradient(fdm_backend, fgrad, xvec, yvec) + grad2 = FDM.grad(fdm_backend.alg, fgrad, xvec, yvec) + @test norm.(grad1 .- grad2) == (0, 0) + valscalar, grad3 = AD.value_and_gradient(fdm_backend, fgrad, xvec, yvec) + @test valscalar == fgrad(xvec, yvec) + @test norm.(grad3 .- grad1) == (0, 0) + end + @testset "Jacobian" begin + jac1 = AD.jacobian(fdm_backend, fjac, xvec, yvec) + jac2 = FDM.jacobian(fdm_backend.alg, fjac, xvec, yvec) + @test norm.(jac1 .- jac2) == (0, 0) + valvec, jac3 = AD.value_and_jacobian(fdm_backend, fjac, xvec, yvec) + @test valvec == fjac(xvec, yvec) + @test norm.(jac3 .- jac1) == (0, 0) + end + @testset "jvp" begin + v = (rand(length(xvec)), rand(length(yvec))) + pf1 = AD.pushforward_function(fdm_backend, fjac, xvec, yvec)(v) + pf2 = ( + FDM.jvp(fdm_backend.alg, x -> fjac(x, yvec), (xvec, v[1])), + FDM.jvp(fdm_backend.alg, y -> fjac(xvec, y), (yvec, v[2])), + ) + @test norm.(pf1 .- pf2) == (0, 0) + valvec, pf3 = AD.value_and_pushforward_function(fdm_backend, fjac, xvec, yvec)(v) + @test valvec == fjac(xvec, yvec) + @test norm.(pf3 .- pf1) == (0, 0) + end + @testset "j′vp" begin + w = rand(length(fjac(xvec, yvec))) + pb1 = AD.pullback_function(fdm_backend, fjac, xvec, yvec)(w) + pb2 = FDM.j′vp(fdm_backend.alg, fjac, w, xvec, yvec) + @test all(norm.(pb1 .- pb2) .<= (1e-10, 1e-10)) + valvec, pb3 = AD.value_and_pullback_function(fdm_backend, fjac, xvec, yvec)(w) + @test valvec == fjac(xvec, yvec) + @test norm.(pb3 .- pb1) == (0, 0) + end + end end From dfc5911d60fcb017f9050fb77b7b41f2059359cf Mon Sep 17 00:00:00 2001 From: Mohamed Tarek Date: Tue, 9 Feb 2021 03:42:15 +1100 Subject: [PATCH 02/25] fix typo --- src/AbstractDifferentiation.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index ad539ca..e119261 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -96,7 +96,7 @@ function value_gradient_and_hessian(ab::AbstractBackend, f, xs...) v, g = value_and_gradient(lowest(ab), f, _xs...) if !primalcalled value = primalvalue(secondlowest(ab), v, f, xs) - primalcalled = false + primalcalled = true end return g end, xs...) From da5aecefaedaeefa65ecf4c742aa738c80ef0970 Mon Sep 17 00:00:00 2001 From: Mohamed Tarek Date: Thu, 11 Feb 2021 04:01:12 +1100 Subject: [PATCH 03/25] primitive macro --- Project.toml | 3 +- src/AbstractDifferentiation.jl | 168 ++++++++++++++++++++++++------- test/runtests.jl | 174 +++++++++++++++++++++++++-------- 3 files changed, 266 insertions(+), 79 deletions(-) diff --git a/Project.toml b/Project.toml index 95fb5a4..8efed07 100644 --- a/Project.toml +++ b/Project.toml @@ -4,14 +4,15 @@ authors = ["Mohamed Tarek and contributors"] version = "0.1.0" [deps] +ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" [compat] julia = "1" [extras] -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] test = ["Test", "FiniteDifferences"] diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index e119261..6992955 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -1,6 +1,6 @@ module AbstractDifferentiation -using LinearAlgebra +using LinearAlgebra, ExprTools export AD @@ -20,6 +20,7 @@ function reduceorder(b::HigherOrderBackend) end lowest(b::AbstractBackend) = b lowest(b::HigherOrderBackend) = b.backends[end] +secondlowest(b::AbstractBackend) = b secondlowest(b::HigherOrderBackend) = lowest(reduceorder(b)) # If the primal value is in y, extract it. @@ -119,7 +120,7 @@ end function pushforward_function( ab::AbstractBackend, f, - xs::Union{Number, AbstractArray{<:Number}}..., + xs..., ) return (ds) -> begin return jacobian(lowest(ab), (xds...,) -> begin @@ -138,21 +139,21 @@ end function value_and_pushforward_function( ab::AbstractBackend, f, - xs::Union{Number, AbstractArray{<:Number}}..., + xs..., ) return (ds) -> begin @assert ds isa Tuple && length(ds) == length(xs) - return value_and_jacobian(lowest(ab), (xds...,) -> begin - if ds isa Tuple - @assert length(xs) == length(ds) - newxs = xs .+ ds .* xds - return f(newxs...) - else - @assert length(xs) == length(xds) == 1 - newx = xs[1] + ds * xds[1] - return f(newx) + local value + primalcalled = false + pf = pushforward_function(lowest(ab), (_xs...,) -> begin + vs = f(_xs...) + if !primalcalled + value = primalvalue(lowest(ab), vs, f, xs) + primalcalled = true end - end, _zero.(xs, ds)...) + return vs + end, xs...)(ds) + return value, pf end end @@ -198,33 +199,23 @@ function value_and_pullback_function( return (ws) -> begin local value primalcalled = false - jacs = jacobian(lowest(ab), (_xs...,) -> begin - vs = f(_xs...) + if ws === nothing + vs = f(xs...) if !primalcalled value = primalvalue(lowest(ab), vs, f, xs) primalcalled = true end - if ws isa Tuple - @assert length(vs) == length(ws) - return sum(zip(vs, ws)) do v, w - if w isa Union{AbstractMatrix, UniformScaling} && v isa AbstractVector - return w' * v - else - # for arbitrary arrays - return dot(w, v) - end - end - else - w, v = ws, vs - if w isa Union{AbstractMatrix, UniformScaling} && v isa AbstractVector - return w' * v - else - # for arbitrary arrays - return dot(w, v) - end + return value, nothing + end + pb = pullback_function(lowest(ab), (_xs...,) -> begin + vs = f(_xs...) + if !primalcalled + value = primalvalue(lowest(ab), vs, f, xs) + primalcalled = true end - end, xs...) - return value, adjoint.(jacs) + return vs + end, xs...)(ws) + return value, pb end end @@ -329,4 +320,111 @@ function (h::H)(xs...; lazy = true) end end +macro primitive(expr) + fdef = ExprTools.splitdef(expr) + name = fdef[:name] + if name == :pushforward_function + return define_pushforward_function_and_friends(fdef) |> esc + elseif name == :pullback_function + return define_pullback_function_and_friends(fdef) |> esc + elseif name == :jacobian + return define_jacobian_and_friends(fdef) |> esc + elseif name == :primalvalue + return define_primalvalue(fdef) |> esc + else + throw("Unsupported AD primitive.") + end +end + +function define_pushforward_function_and_friends(fdef) + fdef[:name] = :(AbstractDifferentiation.pushforward_function) + args = fdef[:args] + funcs = quote + $(ExprTools.combinedef(fdef)) + function AbstractDifferentiation.jacobian($(args...),) + identity_like = AbstractDifferentiation.identity_matrix_like($(args[3:end]...),) + pff = AbstractDifferentiation.pushforward_function($(args...),) + if eltype(identity_like) <: Tuple{Vararg{Union{AbstractMatrix, Number}}} + return map(identity_like) do identity_like_i + return mapreduce(hcat, AbstractDifferentiation._eachcol.(identity_like_i)...) do (cols...) + pff(cols) + end + end + else + return pff(identity_like) + end + end + end + return funcs +end + +function define_pullback_function_and_friends(fdef) + fdef[:name] = :(AbstractDifferentiation.pullback_function) + args = fdef[:args] + funcs = quote + $(ExprTools.combinedef(fdef)) + function AbstractDifferentiation.jacobian($(args...),) + value_and_pbf = AbstractDifferentiation.value_and_pullback_function($(args...),) + value, _ = value_and_pbf(nothing) + identity_like = AbstractDifferentiation.identity_matrix_like(value) + if eltype(identity_like) <: Tuple{Vararg{AbstractMatrix}} + return map(identity_like) do identity_like_i + return mapreduce(vcat, AbstractDifferentiation._eachcol.(identity_like_i)...) do (cols...) + value_and_pbf(cols)[2]' + end + end + else + return adjoint.(value_and_pbf(identity_like)[2]) + end + end + end + return funcs +end + +_eachcol(a::Number) = (a,) +_eachcol(a) = eachcol(a) + +function define_jacobian_and_friends(fdef) + fdef[:name] = :(AbstractDifferentiation.jacobian) + return ExprTools.combinedef(fdef) +end + +function define_primalvalue(fdef) + fdef[:name] = :(AbstractDifferentiation.primalvalue) + return ExprTools.combinedef(fdef) +end + +function identity_matrix_like(x) + throw("The function `identity_matrix_like` is not defined for the type $(typeof(x)).") +end +function identity_matrix_like(x::AbstractVector) + return (Matrix{eltype(x)}(I, length(x), length(x)),) +end +function identity_matrix_like(x::Number) + return (one(x),) +end +identity_matrix_like(x::Tuple) = identity_matrix_like(x...) +@generated function identity_matrix_like(x...) + expr = :(()) + for i in 1:length(x) + push!(expr.args, :(())) + for j in 1:i-1 + push!(expr.args[i].args, :((zero_matrix_like(x[$j])[1]))) + end + push!(expr.args[i].args, :((identity_matrix_like(x[$i]))[1])) + for j in i+1:length(x) + push!(expr.args[i].args, :(zero_matrix_like(x[$j])[1])) + end + end + return expr +end + +zero_matrix_like(x::Tuple) = zero_matrix_like(x...) +zero_matrix_like(x...) = map(zero_matrix_like, x) +zero_matrix_like(x::AbstractVector) = (zero(similar(x, length(x), length(x))),) +zero_matrix_like(x::Number) = (zero(x),) +function zero_matrix_like(x) + throw("The function `zero_matrix_like` is not defined for the type $(typeof(x)).") +end + end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index d7015ea..324a265 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,17 +2,39 @@ using AbstractDifferentiation using Test, FiniteDifferences, LinearAlgebra const FDM = FiniteDifferences -struct FDMBackend{A} <: AD.AbstractFiniteDifference + +struct FDMBackend1{A} <: AD.AbstractFiniteDifference alg::A end -FDMBackend() = FDMBackend(central_fdm(5, 1)) -const fdm_backend = FDMBackend() - +FDMBackend1() = FDMBackend1(central_fdm(5, 1)) +const fdm_backend1 = FDMBackend1() # Minimal interface -function AD.jacobian(ab::FDMBackend, f, xs...) +AD.@primitive function jacobian(ab::FDMBackend1, f, xs...) return jacobian(ab.alg, f, xs...) end +struct FDMBackend2{A} <: AD.AbstractFiniteDifference + alg::A +end +FDMBackend2() = FDMBackend2(central_fdm(5, 1)) +const fdm_backend2 = FDMBackend2() +AD.@primitive function pushforward_function(ab::FDMBackend2, f, xs...) + return (vs) -> jvp(ab.alg, f, tuple.(xs, vs)...) +end + +struct FDMBackend3{A} <: AD.AbstractFiniteDifference + alg::A +end +FDMBackend3() = FDMBackend3(central_fdm(5, 1)) +const fdm_backend3 = FDMBackend3() +AD.@primitive function pullback_function(ab::FDMBackend3, f, xs...) + return function (vs) + # Supports only single output + @assert length(vs) == 1 + return j′vp(ab.alg, f, vs[1], xs...) + end +end + fder(x, y) = exp(y) * x + y * log(x) fgrad(x, y) = prod(x) + sum(y ./ (1:length(y))) function fjac(x, y) @@ -25,55 +47,121 @@ const yscalar = rand() const xvec = rand(5) const yvec = rand(5) +function test_fdm_derivatives(fdm_backend) + der1 = AD.derivative(fdm_backend, fder, xscalar, yscalar) + der2 = ( + fdm_backend.alg(x -> fder(x, yscalar), xscalar), + fdm_backend.alg(y -> fder(xscalar, y), yscalar), + ) + @test norm.(der1 .- der2) == (0, 0) + valscalar, der3 = AD.value_and_derivative(fdm_backend, fder, xscalar, yscalar) + @test valscalar == fder(xscalar, yscalar) + @test der3 .- der1 == (0, 0) +end + +function test_fdm_gradients(fdm_backend) + grad1 = AD.gradient(fdm_backend, fgrad, xvec, yvec) + grad2 = FDM.grad(fdm_backend.alg, fgrad, xvec, yvec) + @test norm.(grad1 .- grad2) == (0, 0) + valscalar, grad3 = AD.value_and_gradient(fdm_backend, fgrad, xvec, yvec) + @test valscalar == fgrad(xvec, yvec) + @test norm.(grad3 .- grad1) == (0, 0) +end + +function test_fdm_jacobians(fdm_backend) + jac1 = AD.jacobian(fdm_backend, fjac, xvec, yvec) + jac2 = FDM.jacobian(fdm_backend.alg, fjac, xvec, yvec) + @test norm.(jac1 .- jac2) == (0, 0) + valvec, jac3 = AD.value_and_jacobian(fdm_backend, fjac, xvec, yvec) + @test valvec == fjac(xvec, yvec) + @test norm.(jac3 .- jac1) == (0, 0) +end + +function test_fdm_hessians(fdm_backend) + fhess = x -> fgrad(x, yvec) + hess1 = AD.hessian(fdm_backend, fhess, xvec) + hess2 = FDM.jacobian( + fdm_backend.alg, + (x) -> begin + FDM.grad( + fdm_backend.alg, + fhess, + x, + ) + end, + xvec, + ) + @test norm.(hess1 .- hess2) == (0,) + valscalar, hess3 = AD.value_and_hessian(fdm_backend, fhess, xvec) + @test valscalar == fgrad(xvec, yvec) + @test norm.(hess3 .- hess1) == (0,) + valscalar, grad, hess4 = AD.value_gradient_and_hessian(fdm_backend, fhess, xvec) + @test valscalar == fgrad(xvec, yvec) + @test norm.(grad .- AD.gradient(fdm_backend, fhess, xvec)) == (0,) + @test norm.(hess4 .- hess1) == (0,) +end + +function test_fdm_jvp(fdm_backend) + v = (rand(length(xvec)), rand(length(yvec))) + pf1 = AD.pushforward_function(fdm_backend, fjac, xvec, yvec)(v) + pf2 = ( + FDM.jvp(fdm_backend.alg, x -> fjac(x, yvec), (xvec, v[1])), + FDM.jvp(fdm_backend.alg, y -> fjac(xvec, y), (yvec, v[2])), + ) + @test norm.(pf1 .- pf2) == (0, 0) + valvec, pf3 = AD.value_and_pushforward_function(fdm_backend, fjac, xvec, yvec)(v) + @test valvec == fjac(xvec, yvec) + @test norm.(pf3 .- pf1) == (0, 0) +end + +function test_fdm_j′vp(fdm_backend) + w = rand(length(fjac(xvec, yvec))) + pb1 = AD.pullback_function(fdm_backend, fjac, xvec, yvec)(w) + pb2 = FDM.j′vp(fdm_backend.alg, fjac, w, xvec, yvec) + @test all(norm.(pb1 .- pb2) .<= (1e-10, 1e-10)) + valvec, pb3 = AD.value_and_pullback_function(fdm_backend, fjac, xvec, yvec)(w) + @test valvec == fjac(xvec, yvec) + @test norm.(pb3 .- pb1) == (0, 0) +end + @testset "AbstractDifferentiation.jl" begin @testset "FiniteDifferences" begin @testset "Derivative" begin - der1 = AD.derivative(fdm_backend, fder, xscalar, yscalar) - der2 = ( - fdm_backend.alg(x -> fder(x, yscalar), xscalar), - fdm_backend.alg(y -> fder(xscalar, y), yscalar), - ) - @test norm.(der1 .- der2) == (0, 0) - valscalar, der3 = AD.value_and_derivative(fdm_backend, fder, xscalar, yscalar) - @test valscalar == fder(xscalar, yscalar) - @test der3 .- der1 == (0, 0) + test_fdm_derivatives(fdm_backend1) + test_fdm_derivatives(fdm_backend2) + test_fdm_derivatives(fdm_backend3) end @testset "Gradient" begin - grad1 = AD.gradient(fdm_backend, fgrad, xvec, yvec) - grad2 = FDM.grad(fdm_backend.alg, fgrad, xvec, yvec) - @test norm.(grad1 .- grad2) == (0, 0) - valscalar, grad3 = AD.value_and_gradient(fdm_backend, fgrad, xvec, yvec) - @test valscalar == fgrad(xvec, yvec) - @test norm.(grad3 .- grad1) == (0, 0) + test_fdm_gradients(fdm_backend1) + test_fdm_gradients(fdm_backend2) + test_fdm_gradients(fdm_backend3) end @testset "Jacobian" begin - jac1 = AD.jacobian(fdm_backend, fjac, xvec, yvec) - jac2 = FDM.jacobian(fdm_backend.alg, fjac, xvec, yvec) - @test norm.(jac1 .- jac2) == (0, 0) - valvec, jac3 = AD.value_and_jacobian(fdm_backend, fjac, xvec, yvec) - @test valvec == fjac(xvec, yvec) - @test norm.(jac3 .- jac1) == (0, 0) + test_fdm_jacobians(fdm_backend1) + test_fdm_jacobians(fdm_backend2) + # Errors + test_fdm_jacobians(fdm_backend3) + end + @testset "Hessian" begin + # Works but super slow + test_fdm_hessians(fdm_backend1) + # Errors + test_fdm_hessians(fdm_backend2) + # Errors + test_fdm_hessians(fdm_backend3) end @testset "jvp" begin - v = (rand(length(xvec)), rand(length(yvec))) - pf1 = AD.pushforward_function(fdm_backend, fjac, xvec, yvec)(v) - pf2 = ( - FDM.jvp(fdm_backend.alg, x -> fjac(x, yvec), (xvec, v[1])), - FDM.jvp(fdm_backend.alg, y -> fjac(xvec, y), (yvec, v[2])), - ) - @test norm.(pf1 .- pf2) == (0, 0) - valvec, pf3 = AD.value_and_pushforward_function(fdm_backend, fjac, xvec, yvec)(v) - @test valvec == fjac(xvec, yvec) - @test norm.(pf3 .- pf1) == (0, 0) + test_fdm_jvp(fdm_backend1) + # Errors + test_fdm_jvp(fdm_backend2) + # Errors + test_fdm_jvp(fdm_backend3) end @testset "j′vp" begin - w = rand(length(fjac(xvec, yvec))) - pb1 = AD.pullback_function(fdm_backend, fjac, xvec, yvec)(w) - pb2 = FDM.j′vp(fdm_backend.alg, fjac, w, xvec, yvec) - @test all(norm.(pb1 .- pb2) .<= (1e-10, 1e-10)) - valvec, pb3 = AD.value_and_pullback_function(fdm_backend, fjac, xvec, yvec)(w) - @test valvec == fjac(xvec, yvec) - @test norm.(pb3 .- pb1) == (0, 0) + test_fdm_j′vp(fdm_backend1) + test_fdm_j′vp(fdm_backend2) + # Errors + test_fdm_j′vp(fdm_backend3) end end end From 512fd2d5691ce10a530c4df64c0ed258dd27b1f1 Mon Sep 17 00:00:00 2001 From: Frank Schaefer Date: Mon, 3 May 2021 11:29:20 +0200 Subject: [PATCH 04/25] fix gradients --- src/AbstractDifferentiation.jl | 11 ++++++++++- test/runtests.jl | 22 ++++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index 6992955..847b4b7 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -54,6 +54,10 @@ end function value_and_jacobian(ab::AbstractBackend, f, xs...) local value primalcalled = false + if ab isa AbstractFiniteDifference + value = primalvalue(ab, nothing, f, xs) + primalcalled = true + end jacs = jacobian(lowest(ab), (_xs...,) -> begin v = f(_xs...) if !primalcalled @@ -62,11 +66,16 @@ function value_and_jacobian(ab::AbstractBackend, f, xs...) end return v end, xs...) + return value, jacs end function value_and_hessian(ab::AbstractBackend, f, xs...) local value primalcalled = false + if ab isa AbstractFiniteDifference + value = primalvalue(ab, nothing, f, xs) + primalcalled = true + end hess = jacobian(secondlowest(ab), (_xs...,) -> begin v, g = value_and_gradient(lowest(ab), f, _xs...) if !primalcalled @@ -427,4 +436,4 @@ function zero_matrix_like(x) throw("The function `zero_matrix_like` is not defined for the type $(typeof(x)).") end -end \ No newline at end of file +end diff --git a/test/runtests.jl b/test/runtests.jl index 324a265..331ff9a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -36,10 +36,18 @@ AD.@primitive function pullback_function(ab::FDMBackend3, f, xs...) end fder(x, y) = exp(y) * x + y * log(x) +dfderdx(x, y) = exp(y) + y * 1/x +dfderdy(x, y) = exp(y) * x + log(x) + fgrad(x, y) = prod(x) + sum(y ./ (1:length(y))) +dfgraddx(x, y) = prod(x)./x +dfgraddy(x, y) = one(eltype(y)) ./ (1:length(y)) + function fjac(x, y) x + Bidiagonal(-ones(length(y)) * 3, ones(length(y) - 1) / 2, :U) * y end +dfjacdx(x, y) = I(length(x)) +dfjacdy(x, y) = Bidiagonal(-ones(length(y)) * 3, ones(length(y) - 1) / 2, :U) const xscalar = rand() const yscalar = rand() @@ -47,6 +55,10 @@ const yscalar = rand() const xvec = rand(5) const yvec = rand(5) +# to check if vectors get mutated +xvec2 = deepcopy(xvec) +yvec2 = deepcopy(yvec) + function test_fdm_derivatives(fdm_backend) der1 = AD.derivative(fdm_backend, fder, xscalar, yscalar) der2 = ( @@ -57,6 +69,8 @@ function test_fdm_derivatives(fdm_backend) valscalar, der3 = AD.value_and_derivative(fdm_backend, fder, xscalar, yscalar) @test valscalar == fder(xscalar, yscalar) @test der3 .- der1 == (0, 0) + der_exact = (dfderdx(xscalar,yscalar), dfderdy(xscalar,yscalar)) + @test minimum(isapprox.(der_exact, der1, rtol=1e-10)) end function test_fdm_gradients(fdm_backend) @@ -66,6 +80,10 @@ function test_fdm_gradients(fdm_backend) valscalar, grad3 = AD.value_and_gradient(fdm_backend, fgrad, xvec, yvec) @test valscalar == fgrad(xvec, yvec) @test norm.(grad3 .- grad1) == (0, 0) + grad_exact = (dfgraddx(xvec,yvec), dfgraddy(xvec,yvec)) + @test minimum(isapprox.(grad_exact, grad1, rtol=1e-10)) + @test xvec == xvec2 + @test yvec == yvec2 end function test_fdm_jacobians(fdm_backend) @@ -75,6 +93,10 @@ function test_fdm_jacobians(fdm_backend) valvec, jac3 = AD.value_and_jacobian(fdm_backend, fjac, xvec, yvec) @test valvec == fjac(xvec, yvec) @test norm.(jac3 .- jac1) == (0, 0) + grad_exact = (dfjacdx(xvec, yvec), dfjacdy(xvec, yvec)) + @test minimum(isapprox.(grad_exact, jac1, rtol=1e-10)) + @test xvec == xvec2 + @test yvec == yvec2 end function test_fdm_hessians(fdm_backend) From caa72e1e6a7a33c8271bf4067b7b41ba51add397 Mon Sep 17 00:00:00 2001 From: Frank Schaefer Date: Mon, 3 May 2021 21:55:51 +0200 Subject: [PATCH 05/25] fix Jacobian tests --- src/AbstractDifferentiation.jl | 4 ++++ test/runtests.jl | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index 847b4b7..3fd0f36 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -382,6 +382,10 @@ function define_pullback_function_and_friends(fdef) value_and_pbf(cols)[2]' end end + elseif eltype(identity_like) <: AbstractMatrix + return vcat.(mapslices(identity_like[1], dims=1) do cols + adjoint.(value_and_pbf((cols,))[2]) + end ...) else return adjoint.(value_and_pbf(identity_like)[2]) end diff --git a/test/runtests.jl b/test/runtests.jl index 331ff9a..2f96c07 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -161,7 +161,6 @@ end @testset "Jacobian" begin test_fdm_jacobians(fdm_backend1) test_fdm_jacobians(fdm_backend2) - # Errors test_fdm_jacobians(fdm_backend3) end @testset "Hessian" begin From 4a763e3693fdf9e14c25dde454121730e45a2158 Mon Sep 17 00:00:00 2001 From: Frank Schaefer Date: Wed, 5 May 2021 15:01:18 +0200 Subject: [PATCH 06/25] fix hessian tests --- src/AbstractDifferentiation.jl | 13 +++++++++++++ test/runtests.jl | 10 +++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index 3fd0f36..0610049 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -359,6 +359,19 @@ function define_pushforward_function_and_friends(fdef) pff(cols) end end + elseif eltype(identity_like) <: AbstractMatrix + ret = hcat.(mapslices(identity_like[1], dims=1) do cols + pf = pff((cols,)) + if typeof(pf) <: AbstractVector + return (pf, ) + elseif typeof(pf) <: AbstractMatrix + return (transpose(pf), ) + else + return pf + end + end ...) + return ret isa Tuple ? ret : (ret,) + else return pff(identity_like) end diff --git a/test/runtests.jl b/test/runtests.jl index 2f96c07..88b8590 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -42,6 +42,7 @@ dfderdy(x, y) = exp(y) * x + log(x) fgrad(x, y) = prod(x) + sum(y ./ (1:length(y))) dfgraddx(x, y) = prod(x)./x dfgraddy(x, y) = one(eltype(y)) ./ (1:length(y)) +dfgraddxdx(x, y) = prod(x)./(x*x') - Diagonal(diag(prod(x)./(x*x'))) function fjac(x, y) x + Bidiagonal(-ones(length(y)) * 3, ones(length(y) - 1) / 2, :U) * y @@ -121,6 +122,12 @@ function test_fdm_hessians(fdm_backend) @test valscalar == fgrad(xvec, yvec) @test norm.(grad .- AD.gradient(fdm_backend, fhess, xvec)) == (0,) @test norm.(hess4 .- hess1) == (0,) + @test dfgraddxdx(xvec,yvec) ≈ hess1[1] atol=1e-10 + @test xvec == xvec2 + @test yvec == yvec2 + fhess2 = x-> dfgraddx(x, yvec) + hess5 = AD.jacobian(fdm_backend, fhess2, xvec) + @test minimum(isapprox.(hess5, hess1, atol=1e-10)) end function test_fdm_jvp(fdm_backend) @@ -166,16 +173,13 @@ end @testset "Hessian" begin # Works but super slow test_fdm_hessians(fdm_backend1) - # Errors test_fdm_hessians(fdm_backend2) - # Errors test_fdm_hessians(fdm_backend3) end @testset "jvp" begin test_fdm_jvp(fdm_backend1) # Errors test_fdm_jvp(fdm_backend2) - # Errors test_fdm_jvp(fdm_backend3) end @testset "j′vp" begin From b6f48045fe9c4babf4efdbf9b2f977bf9cd51d98 Mon Sep 17 00:00:00 2001 From: Frank Schaefer Date: Wed, 5 May 2021 16:24:50 +0200 Subject: [PATCH 07/25] =?UTF-8?q?fix=20=20j=E2=80=B2vp?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Project.toml | 3 ++- src/AbstractDifferentiation.jl | 8 ++++++++ test/runtests.jl | 23 ++++++++++++++++++++--- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/Project.toml b/Project.toml index 8efed07..38eefd4 100644 --- a/Project.toml +++ b/Project.toml @@ -12,7 +12,8 @@ julia = "1" [extras] FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test", "FiniteDifferences"] +test = ["Test", "FiniteDifferences", "Random"] diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index 0610049..bc2ffd6 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -154,6 +154,10 @@ function value_and_pushforward_function( @assert ds isa Tuple && length(ds) == length(xs) local value primalcalled = false + if ab isa AbstractFiniteDifference + value = primalvalue(ab, nothing, f, xs) + primalcalled = true + end pf = pushforward_function(lowest(ab), (_xs...,) -> begin vs = f(_xs...) if !primalcalled @@ -208,6 +212,10 @@ function value_and_pullback_function( return (ws) -> begin local value primalcalled = false + if ab isa AbstractFiniteDifference + value = primalvalue(ab, nothing, f, xs) + primalcalled = true + end if ws === nothing vs = f(xs...) if !primalcalled diff --git a/test/runtests.jl b/test/runtests.jl index 88b8590..d863686 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,7 @@ using AbstractDifferentiation using Test, FiniteDifferences, LinearAlgebra +using Random +Random.seed!(1234) const FDM = FiniteDifferences @@ -30,8 +32,13 @@ const fdm_backend3 = FDMBackend3() AD.@primitive function pullback_function(ab::FDMBackend3, f, xs...) return function (vs) # Supports only single output - @assert length(vs) == 1 - return j′vp(ab.alg, f, vs[1], xs...) + if vs isa AbstractVector + return j′vp(ab.alg, f, vs, xs...) + else + @assert length(vs) == 1 + return j′vp(ab.alg, f, vs[1], xs...) + + end end end @@ -50,6 +57,14 @@ end dfjacdx(x, y) = I(length(x)) dfjacdy(x, y) = Bidiagonal(-ones(length(y)) * 3, ones(length(y) - 1) / 2, :U) +# Jvp +jxvp(x,y,v) = dfjacdx(x,y)*v +jyvp(x,y,v) = dfjacdy(x,y)*v + +# vJp +vJxp(x,y,v) = dfjacdx(x,y)'*v +vJyp(x,y,v) = dfjacdy(x,y)'*v + const xscalar = rand() const yscalar = rand() @@ -151,6 +166,9 @@ function test_fdm_j′vp(fdm_backend) valvec, pb3 = AD.value_and_pullback_function(fdm_backend, fjac, xvec, yvec)(w) @test valvec == fjac(xvec, yvec) @test norm.(pb3 .- pb1) == (0, 0) + @test minimum(isapprox.(pb1, (vJxp(xvec,yvec,w), vJyp(xvec,yvec,w)), atol=1e-10)) + @test xvec == xvec2 + @test yvec == yvec2 end @testset "AbstractDifferentiation.jl" begin @@ -185,7 +203,6 @@ end @testset "j′vp" begin test_fdm_j′vp(fdm_backend1) test_fdm_j′vp(fdm_backend2) - # Errors test_fdm_j′vp(fdm_backend3) end end From 5ed5c4ef256eee0ba7e50400ab708f1f37f833db Mon Sep 17 00:00:00 2001 From: frankschae <42201748+frankschae@users.noreply.github.com> Date: Sun, 9 May 2021 17:37:51 +0200 Subject: [PATCH 08/25] Update src/AbstractDifferentiation.jl Co-authored-by: Mohamed Tarek --- src/AbstractDifferentiation.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index bc2ffd6..e148f67 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -54,7 +54,7 @@ end function value_and_jacobian(ab::AbstractBackend, f, xs...) local value primalcalled = false - if ab isa AbstractFiniteDifference + if lowest(ab) isa AbstractFiniteDifference value = primalvalue(ab, nothing, f, xs) primalcalled = true end From 2991b8a5f7854664212d90c590e7798816b767aa Mon Sep 17 00:00:00 2001 From: Frank Schaefer Date: Tue, 11 May 2021 22:24:30 +0200 Subject: [PATCH 09/25] fix vjp tests --- test/runtests.jl | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index d863686..c9dba17 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -147,15 +147,26 @@ end function test_fdm_jvp(fdm_backend) v = (rand(length(xvec)), rand(length(yvec))) - pf1 = AD.pushforward_function(fdm_backend, fjac, xvec, yvec)(v) + + # augmented version of v + identity_like = AD.identity_matrix_like(v) + vaug = map(identity_like) do identity_like_i + identity_like_i .* v + end + + pf1 = map(v->AD.pushforward_function(fdm_backend2, fjac, xvec, yvec)(v), vaug) pf2 = ( FDM.jvp(fdm_backend.alg, x -> fjac(x, yvec), (xvec, v[1])), FDM.jvp(fdm_backend.alg, y -> fjac(xvec, y), (yvec, v[2])), ) @test norm.(pf1 .- pf2) == (0, 0) - valvec, pf3 = AD.value_and_pushforward_function(fdm_backend, fjac, xvec, yvec)(v) - @test valvec == fjac(xvec, yvec) - @test norm.(pf3 .- pf1) == (0, 0) + ((valvec1, pf3x), (valvec2, pf3y)) = map(v->AD.value_and_pushforward_function(fdm_backend2, fjac, xvec, yvec)(v), vaug) + @test valvec1 == fjac(xvec, yvec) + @test valvec2 == fjac(xvec, yvec) + @test norm.((pf3x,pf3y) .- pf1) == (0, 0) + @test minimum(isapprox.(pf1, (jxvp(xvec,yvec,v[1]), jyvp(xvec,yvec,v[2])), atol=1e-10)) + @test xvec == xvec2 + @test yvec == yvec2 end function test_fdm_j′vp(fdm_backend) @@ -196,7 +207,6 @@ end end @testset "jvp" begin test_fdm_jvp(fdm_backend1) - # Errors test_fdm_jvp(fdm_backend2) test_fdm_jvp(fdm_backend3) end From ddf2863ba89c7809b50ff8da21136b16ba3db913 Mon Sep 17 00:00:00 2001 From: Frank Schaefer Date: Tue, 11 May 2021 22:24:30 +0200 Subject: [PATCH 10/25] fix jvp tests --- test/runtests.jl | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index d863686..c9dba17 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -147,15 +147,26 @@ end function test_fdm_jvp(fdm_backend) v = (rand(length(xvec)), rand(length(yvec))) - pf1 = AD.pushforward_function(fdm_backend, fjac, xvec, yvec)(v) + + # augmented version of v + identity_like = AD.identity_matrix_like(v) + vaug = map(identity_like) do identity_like_i + identity_like_i .* v + end + + pf1 = map(v->AD.pushforward_function(fdm_backend2, fjac, xvec, yvec)(v), vaug) pf2 = ( FDM.jvp(fdm_backend.alg, x -> fjac(x, yvec), (xvec, v[1])), FDM.jvp(fdm_backend.alg, y -> fjac(xvec, y), (yvec, v[2])), ) @test norm.(pf1 .- pf2) == (0, 0) - valvec, pf3 = AD.value_and_pushforward_function(fdm_backend, fjac, xvec, yvec)(v) - @test valvec == fjac(xvec, yvec) - @test norm.(pf3 .- pf1) == (0, 0) + ((valvec1, pf3x), (valvec2, pf3y)) = map(v->AD.value_and_pushforward_function(fdm_backend2, fjac, xvec, yvec)(v), vaug) + @test valvec1 == fjac(xvec, yvec) + @test valvec2 == fjac(xvec, yvec) + @test norm.((pf3x,pf3y) .- pf1) == (0, 0) + @test minimum(isapprox.(pf1, (jxvp(xvec,yvec,v[1]), jyvp(xvec,yvec,v[2])), atol=1e-10)) + @test xvec == xvec2 + @test yvec == yvec2 end function test_fdm_j′vp(fdm_backend) @@ -196,7 +207,6 @@ end end @testset "jvp" begin test_fdm_jvp(fdm_backend1) - # Errors test_fdm_jvp(fdm_backend2) test_fdm_jvp(fdm_backend3) end From 959fa013026b59e9c3fa1c622d366e376f7bdc8f Mon Sep 17 00:00:00 2001 From: Frank Schaefer Date: Fri, 21 May 2021 16:15:27 +0200 Subject: [PATCH 11/25] lazy derivative fixes --- src/AbstractDifferentiation.jl | 31 ++++++++++++-- test/runtests.jl | 75 ++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 3 deletions(-) diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index e148f67..0cd44fe 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -31,8 +31,14 @@ primalvalue(x::Tuple) = map(primalvalue, x) primalvalue(x) = x function derivative(ab::AbstractBackend, f, xs::Number...) - return getindex.(jacobian(lowest(ab), f, xs...), 1) + der = getindex.(jacobian(lowest(ab), f, xs...), 1) + if der isa Tuple + return der + else + return (der,) + end end + function gradient(ab::AbstractBackend, f, xs...) return adjoint.(jacobian(lowest(ab), f, xs...)) end @@ -241,13 +247,32 @@ struct LazyDerivative{B, F, X} f::F xs::X end + function Base.:*(d::LazyDerivative, y) - return derivative(d.ab, d.f, d.xs...) * y + return derivative(d.backend, d.f, d.xs...) * y end + function Base.:*(y, d::LazyDerivative) - return y * derivative(d.ab, d.f, d.xs...) + return y * derivative(d.backend, d.f, d.xs...) +end + +function Base.:*(d::LazyDerivative, y::Union{Number,Tuple}) + return derivative(d.backend, d.f, d.xs...) .* y end +function Base.:*(y::Union{Number,Tuple}, d::LazyDerivative) + return y .* derivative(d.backend, d.f, d.xs...) +end + +function Base.:*(d::LazyDerivative, y::AbstractArray) + return map((d)-> d*y, derivative(d.backend, d.f, d.xs...)) +end + +function Base.:*(y::AbstractArray, d::LazyDerivative) + return map((d)-> y*d, derivative(d.backend, d.f, d.xs...)) +end + + struct LazyGradient{B, F, X} backend::B f::F diff --git a/test/runtests.jl b/test/runtests.jl index c9dba17..843dde4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -182,6 +182,76 @@ function test_fdm_j′vp(fdm_backend) @test yvec == yvec2 end +function test_fdm_lazy_derivatives(fdm_backend) + # single input function + der1 = AD.derivative(fdm_backend, x->fder(x, yscalar), xscalar) + der2 = ( + fdm_backend.alg(x -> fder(x, yscalar), xscalar), + fdm_backend.alg(y -> fder(xscalar, y), yscalar), + ) + + lazyder = AD.LazyDerivative(fdm_backend, x->fder(x, yscalar), xscalar) + + # multiplication with scalar + @test der1[1]*yscalar == der2[1]*yscalar + @test lazyder*yscalar == der1.*yscalar + @test lazyder*yscalar isa Tuple + + @test yscalar*der1[1] == yscalar*der2[1] + @test yscalar*lazyder == yscalar.*der1 + @test yscalar*lazyder isa Tuple + + # multiplication with array + @test der1[1]*yvec == der2[1]*yvec + @test lazyder*yvec == (der1.*yvec,) + @test lazyder*yvec isa Tuple + + @test yvec*der1[1] == yvec*der2[1] + @test yvec*lazyder == (yvec.*der1,) + @test yvec*lazyder isa Tuple + + # multiplication with tuple + @test lazyder*(yscalar,) == lazyder*yscalar + @test lazyder*(yvec,) == lazyder*yvec + + @test (yscalar,)*lazyder == yscalar*lazyder + @test (yvec,)*lazyder == yvec*lazyder + + # two input function + der1 = AD.derivative(fdm_backend, fder, xscalar, yscalar) + der2 = ( + fdm_backend.alg(x -> fder(x, yscalar), xscalar), + fdm_backend.alg(y -> fder(xscalar, y), yscalar), + ) + + lazyder = AD.LazyDerivative(fdm_backend, fder, (xscalar, yscalar)) + + # multiplication with scalar + @test der1.*yscalar == der2.*yscalar + @test lazyder*yscalar == der1.*yscalar + @test lazyder*yscalar isa Tuple + + @test yscalar.*der1 == yscalar.*der2 + @test yscalar*lazyder == yscalar.*der1 + @test yscalar*lazyder isa Tuple + + # multiplication with array + @test (der1[1]*yvec, der1[2]*yvec) == (der2[1]*yvec, der2[2]*yvec) + @test lazyder*yvec == (der1[1]*yvec, der1[2]*yvec) + @test lazyder*yvec isa Tuple + + @test (yvec*der1[1], yvec*der1[2]) == (yvec*der2[1], yvec*der2[2]) + @test yvec*lazyder == (yvec*der1[1], yvec*der1[2]) + @test lazyder*yvec isa Tuple + + # multiplication with tuple + @test lazyder*(yscalar,) == lazyder*yscalar + @test lazyder*(yvec,) == lazyder*yvec + + @test (yscalar,)*lazyder == yscalar*lazyder + @test (yvec,)*lazyder == yvec*lazyder +end + @testset "AbstractDifferentiation.jl" begin @testset "FiniteDifferences" begin @testset "Derivative" begin @@ -215,5 +285,10 @@ end test_fdm_j′vp(fdm_backend2) test_fdm_j′vp(fdm_backend3) end + @testset "Lazy Derivative" begin + test_fdm_lazy_derivatives(fdm_backend1) + test_fdm_lazy_derivatives(fdm_backend2) + test_fdm_lazy_derivatives(fdm_backend3) + end end end From 5102f01cfe90e660eafdbe190ae89e867e56f049 Mon Sep 17 00:00:00 2001 From: Frank Schaefer Date: Fri, 21 May 2021 17:19:16 +0200 Subject: [PATCH 12/25] lazy gradient --- src/AbstractDifferentiation.jl | 22 +++++++++++++++-- test/runtests.jl | 43 ++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index 0cd44fe..636a344 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -278,8 +278,26 @@ struct LazyGradient{B, F, X} f::F xs::X end -Base.:*(d::LazyGradient, y) = gradient(d.ab, d.f, d.xs...) * y -Base.:*(y, d::LazyGradient) = y * gradient(d.ab, d.f, d.xs...) +Base.:*(d::LazyGradient, y) = gradient(d.backend, d.f, d.xs...) * y +Base.:*(y, d::LazyGradient) = y * gradient(d.backend, d.f, d.xs...) + + +function Base.:*(d::LazyGradient, y::Union{Number,Tuple}) + if d.xs isa Tuple + return gradient(d.backend, d.f, d.xs...) .* y + else + return gradient(d.backend, d.f, d.xs) .* y + end +end + +function Base.:*(y::Union{Number,Tuple}, d::LazyGradient) + if d.xs isa Tuple + return y .* gradient(d.backend, d.f, d.xs...) + else + return y .* gradient(d.backend, d.f, d.xs) + end +end + struct LazyJacobian{B, F, X} backend::B diff --git a/test/runtests.jl b/test/runtests.jl index 843dde4..ead5e28 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -252,6 +252,44 @@ function test_fdm_lazy_derivatives(fdm_backend) @test (yvec,)*lazyder == yvec*lazyder end +function test_fdm_lazy_gradients(fdm_backend) + # single input function + grad1 = AD.gradient(fdm_backend, x->fgrad(x, yvec), xvec) + grad2 = FDM.grad(fdm_backend.alg, x->fgrad(x, yvec), xvec) + lazygrad = AD.LazyGradient(fdm_backend, x->fgrad(x, yvec), xvec) + + # multiplication with scalar + @test norm.(grad1.*yscalar .- grad2.*yscalar) == (0,) + @test norm.(lazygrad*yscalar .- grad1.*yscalar) == (0,) + @test lazygrad*yscalar isa Tuple + + @test norm.(yscalar.*grad1 .- yscalar.*grad2) == (0,) + @test norm.(yscalar*lazygrad .- yscalar.*grad1) == (0,) + @test yscalar*lazygrad isa Tuple + + # multiplication with tuple + @test lazygrad*(yscalar,) == lazygrad*yscalar + @test (yscalar,)*lazygrad == yscalar*lazygrad + + # two input function + grad1 = AD.gradient(fdm_backend, fgrad, xvec, yvec) + grad2 = FDM.grad(fdm_backend.alg, fgrad, xvec, yvec) + lazygrad = AD.LazyGradient(fdm_backend, fgrad, (xvec, yvec)) + + # multiplication with scalar + @test norm.(grad1.*yscalar .- grad2.*yscalar) == (0,0) + @test norm.(lazygrad*yscalar .- grad1.*yscalar) == (0,0) + @test lazygrad*yscalar isa Tuple + + @test norm.(yscalar.*grad1 .- yscalar.*grad2) == (0,0) + @test norm.(yscalar*lazygrad .- yscalar.*grad1) == (0,0) + @test yscalar*lazygrad isa Tuple + + # multiplication with tuple + @test lazygrad*(yscalar,) == lazygrad*yscalar + @test (yscalar,)*lazygrad == yscalar*lazygrad +end + @testset "AbstractDifferentiation.jl" begin @testset "FiniteDifferences" begin @testset "Derivative" begin @@ -290,5 +328,10 @@ end test_fdm_lazy_derivatives(fdm_backend2) test_fdm_lazy_derivatives(fdm_backend3) end + @testset "Lazy Gradient" begin + test_fdm_lazy_gradients(fdm_backend1) + test_fdm_lazy_gradients(fdm_backend2) + test_fdm_lazy_gradients(fdm_backend3) + end end end From 6941f5ccfcc6c4726ebbec6dc051bd6c30d7848e Mon Sep 17 00:00:00 2001 From: Frank Schaefer Date: Sun, 6 Jun 2021 20:24:52 +0200 Subject: [PATCH 13/25] lazy jacobian --- src/AbstractDifferentiation.jl | 47 +++++++++++++++- test/runtests.jl | 100 ++++++++++++++++++++++++++++++--- 2 files changed, 137 insertions(+), 10 deletions(-) diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index 636a344..77b1883 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -305,7 +305,16 @@ struct LazyJacobian{B, F, X} xs::X end function Base.:*(d::LazyJacobian, ys) - return pushforward_function(d.ab, d.f, d.xs...)(ys) + if d.xs isa Tuple + jvp = pushforward_function(d.backend, d.f, d.xs...)(ys) + if jvp isa Tuple + return vec.(jvp) + else + return vec(jvp) + end + else + return vec.(pushforward_function(d.backend, d.f, d.xs)(ys)) + end end function Base.:*(ys, d::LazyJacobian) if ys isa Tuple @@ -313,9 +322,43 @@ function Base.:*(ys, d::LazyJacobian) else ya = adjoint(ys) end - return pullback_function(d.ab, d.f, d.xs...)(ya) + if d.xs isa Tuple + return pullback_function(d.backend, d.f, d.xs...)(ya) + else + return pullback_function(d.backend, d.f, d.xs)(ya) + end +end + +function Base.:*(d::LazyJacobian, ys::Number) + if d.xs isa Tuple + return jacobian(d.backend, d.f, d.xs...) .* ys + else + return jacobian(d.backend, d.f, d.xs) .* ys + end end +function Base.:*(ys::Number, d::LazyJacobian) + if d.xs isa Tuple + return jacobian(d.backend, d.f, d.xs...) .* ys + else + return jacobian(d.backend, d.f, d.xs) .* ys + end +end + +function Base.:*(d::LazyJacobian, ys::AbstractArray) + if d.xs isa Tuple + return vec.(pushforward_function(d.backend, d.f, d.xs...)((ys,))) + else + vjp = (pushforward_function(d.backend, d.f, d.xs)((ys,))) + if vjp isa Tuple + return vec.(vjp) + else + return (vec(vjp),) + end + end +end + + struct LazyHessian{B, F, X} backend::B f::F diff --git a/test/runtests.jl b/test/runtests.jl index ead5e28..9247eb2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -21,7 +21,9 @@ end FDMBackend2() = FDMBackend2(central_fdm(5, 1)) const fdm_backend2 = FDMBackend2() AD.@primitive function pushforward_function(ab::FDMBackend2, f, xs...) - return (vs) -> jvp(ab.alg, f, tuple.(xs, vs)...) + return function (vs) + jvp(ab.alg, f, tuple.(xs, vs)...) + end end struct FDMBackend3{A} <: AD.AbstractFiniteDifference @@ -148,19 +150,25 @@ end function test_fdm_jvp(fdm_backend) v = (rand(length(xvec)), rand(length(yvec))) - # augmented version of v - identity_like = AD.identity_matrix_like(v) - vaug = map(identity_like) do identity_like_i - identity_like_i .* v - end + if fdm_backend isa FDMBackend2 # augmented version of v + identity_like = AD.identity_matrix_like(v) + vaug = map(identity_like) do identity_like_i + identity_like_i .* v + end - pf1 = map(v->AD.pushforward_function(fdm_backend2, fjac, xvec, yvec)(v), vaug) + pf1 = map(v->AD.pushforward_function(fdm_backend, fjac, xvec, yvec)(v), vaug) + ((valvec1, pf3x), (valvec2, pf3y)) = map(v->AD.value_and_pushforward_function(fdm_backend, fjac, xvec, yvec)(v), vaug) + else + pf1 = AD.pushforward_function(fdm_backend, fjac, xvec, yvec)(v) + valvec, pf3 = AD.value_and_pushforward_function(fdm_backend, fjac, xvec, yvec)(v) + ((valvec1, pf3x), (valvec2, pf3y)) = (valvec, pf3[1]), (valvec, pf3[2]) + end pf2 = ( FDM.jvp(fdm_backend.alg, x -> fjac(x, yvec), (xvec, v[1])), FDM.jvp(fdm_backend.alg, y -> fjac(xvec, y), (yvec, v[2])), ) @test norm.(pf1 .- pf2) == (0, 0) - ((valvec1, pf3x), (valvec2, pf3y)) = map(v->AD.value_and_pushforward_function(fdm_backend2, fjac, xvec, yvec)(v), vaug) + @test valvec1 == fjac(xvec, yvec) @test valvec2 == fjac(xvec, yvec) @test norm.((pf3x,pf3y) .- pf1) == (0, 0) @@ -290,6 +298,77 @@ function test_fdm_lazy_gradients(fdm_backend) @test (yscalar,)*lazygrad == yscalar*lazygrad end +function test_fdm_lazy_jacobians(fdm_backend) + # single input function + jac1 = AD.jacobian(fdm_backend, x->fjac(x, yvec), xvec) + jac2 = FDM.jacobian(fdm_backend.alg, x->fjac(x, yvec), xvec) + lazyjac = AD.LazyJacobian(fdm_backend, x->fjac(x, yvec), xvec) + + # multiplication with scalar + @test norm.(jac1.*yscalar .- jac2.*yscalar) == (0,) + @test norm.(lazyjac*yscalar .- jac1.*yscalar) == (0,) + @test lazyjac*yscalar isa Tuple + + @test norm.(yscalar.*jac1 .- yscalar.*jac2) == (0,) + @test norm.(yscalar*lazyjac .- yscalar.*jac1) == (0,) + @test yscalar*lazyjac isa Tuple + + w = adjoint(rand(length(fjac(xvec, yvec)))) + v = (rand(length(xvec)),rand(length(xvec))) + + # vjp + pb1 = FDM.j′vp(fdm_backend.alg, x -> fjac(x, yvec), w, xvec) + res = w*lazyjac + @test minimum(isapprox.(pb1, res, atol=1e-10)) + @test res isa Tuple + + # jvp + pf1 = (FDM.jvp(fdm_backend.alg, x -> fjac(x, yvec), (xvec, v[1])),) + res = lazyjac*v[1] + @test minimum(isapprox.(pf1, res, atol=1e-10)) + @test res isa Tuple + + # two input function + jac1 = AD.jacobian(fdm_backend, fjac, xvec, yvec) + jac2 = FDM.jacobian(fdm_backend.alg, fjac, xvec, yvec) + lazyjac = AD.LazyJacobian(fdm_backend, fjac, (xvec, yvec)) + + # multiplication with scalar + @test norm.(jac1.*yscalar .- jac2.*yscalar) == (0,0) + @test norm.(lazyjac*yscalar .- jac1.*yscalar) == (0,0) + @test lazyjac*yscalar isa Tuple + + @test norm.(yscalar.*jac1 .- yscalar.*jac2) == (0,0) + @test norm.(yscalar*lazyjac .- yscalar.*jac1) == (0,0) + @test yscalar*lazyjac isa Tuple + + # vjp + pb1 = FDM.j′vp(fdm_backend.alg, fjac, w, xvec, yvec) + res = w*lazyjac + @test minimum(isapprox.(pb1, res, atol=1e-10)) + @test res isa Tuple + + # jvp + pf1 = ( + FDM.jvp(fdm_backend.alg, x -> fjac(x, yvec), (xvec, v[1])), + FDM.jvp(fdm_backend.alg, y -> fjac(xvec, y), (yvec, v[2])), + ) + + if fdm_backend isa FDMBackend2 # augmented version of v + identity_like = AD.identity_matrix_like(v) + vaug = map(identity_like) do identity_like_i + identity_like_i .* v + end + + res = map(v->lazyjac*v, vaug) + else + res = lazyjac*v + end + + @test minimum(isapprox.(pf1, res, atol=1e-10)) + @test res isa Tuple +end + @testset "AbstractDifferentiation.jl" begin @testset "FiniteDifferences" begin @testset "Derivative" begin @@ -333,5 +412,10 @@ end test_fdm_lazy_gradients(fdm_backend2) test_fdm_lazy_gradients(fdm_backend3) end + @testset "Lazy Jacobian" begin + test_fdm_lazy_jacobians(fdm_backend1) + test_fdm_lazy_jacobians(fdm_backend2) + test_fdm_lazy_jacobians(fdm_backend3) + end end end From c744f923cd48780fd123c75359bb230abe164737 Mon Sep 17 00:00:00 2001 From: Frank Schaefer Date: Tue, 8 Jun 2021 16:38:08 +0200 Subject: [PATCH 14/25] lazy hessian tests --- src/AbstractDifferentiation.jl | 73 ++++++++++++++++++++++++++++++---- test/runtests.jl | 35 ++++++++++++++++ 2 files changed, 100 insertions(+), 8 deletions(-) diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index 77b1883..5014052 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -281,7 +281,6 @@ end Base.:*(d::LazyGradient, y) = gradient(d.backend, d.f, d.xs...) * y Base.:*(y, d::LazyGradient) = y * gradient(d.backend, d.f, d.xs...) - function Base.:*(d::LazyGradient, y::Union{Number,Tuple}) if d.xs isa Tuple return gradient(d.backend, d.f, d.xs...) .* y @@ -304,6 +303,7 @@ struct LazyJacobian{B, F, X} f::F xs::X end + function Base.:*(d::LazyJacobian, ys) if d.xs isa Tuple jvp = pushforward_function(d.backend, d.f, d.xs...)(ys) @@ -316,6 +316,7 @@ function Base.:*(d::LazyJacobian, ys) return vec.(pushforward_function(d.backend, d.f, d.xs)(ys)) end end + function Base.:*(ys, d::LazyJacobian) if ys isa Tuple ya = adjoint.(ys) @@ -364,13 +365,20 @@ struct LazyHessian{B, F, X} f::F xs::X end + function Base.:*(d::LazyHessian, ys) - return pushforward_function( - secondlowest(d.ab), - (xs...,) -> gradient(lowest(d.ab), d.f, xs...), - d.xs..., - )(ys) + if d.xs isa Tuple + return pushforward_function( + secondlowest(d.backend), + (xs...,) -> gradient(lowest(d.backend), d.f, xs...), d.xs...,)(ys) + else + return pushforward_function( + secondlowest(d.backend), + (xs,) -> gradient(lowest(d.backend), d.f, xs),d.xs,)(ys) + end + end + function Base.:*(ys, d::LazyHessian) if ys isa Tuple ya = adjoint.(ys) @@ -378,12 +386,61 @@ function Base.:*(ys, d::LazyHessian) ya = adjoint(ys) end return pullback_function( - secondlowest(d.ab), - (xs...,) -> gradient(lowest(d.ab), d.f, xs...), + secondlowest(d.backend), + (xs...,) -> gradient(lowest(d.backend), d.f, xs...), d.xs..., )(ya) end +function Base.:*(d::LazyHessian, ys::Number) + if d.xs isa Tuple + return hessian(d.backend, d.f, d.xs...).*ys + else + return hessian(d.backend, d.f, d.xs).*ys + end +end + +function Base.:*(ys::Number, d::LazyHessian) + if d.xs isa Tuple + return ys.*hessian(d.backend, d.f, d.xs...) + else + return ys.*hessian(d.backend, d.f, d.xs) + end +end + +function Base.:*(d::LazyHessian, ys::AbstractArray) + if d.xs isa Tuple + return pushforward_function( + secondlowest(d.backend), + (xs...,) -> gradient(lowest(d.backend), d.f, xs...), d.xs...,)((ys,)) + else + return pushforward_function( + secondlowest(d.backend), + (xs,) -> gradient(lowest(d.backend), d.f, xs),d.xs,)((ys,)) + end +end + +function Base.:*(ys::AbstractArray, d::LazyHessian) + if ys isa Tuple + ya = adjoint.(ys) + else + ya = adjoint(ys) + end + if d.xs isa Tuple + return pullback_function( + secondlowest(d.backend), + (xs...,) -> gradient(lowest(d.backend), d.f, xs...), + d.xs..., + )(ya) + else + return pullback_function( + secondlowest(d.backend), + (xs,) -> gradient(lowest(d.backend), d.f, xs)[1], + d.xs, + )(ya) + end +end + function lazyderivative(ab::AbstractBackend, f, xs::Number...) return LazyDerivative(ab, f, xs) end diff --git a/test/runtests.jl b/test/runtests.jl index 9247eb2..06d8365 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -369,6 +369,36 @@ function test_fdm_lazy_jacobians(fdm_backend) @test res isa Tuple end +function test_fdm_lazy_hessians(fdm_backend) + # single input function + fhess = x -> fgrad(x, yvec) + hess1 = (dfgraddxdx(xvec,yvec),) + lazyhess = AD.LazyHessian(fdm_backend, x->fgrad(x, yvec), xvec) + + # multiplication with scalar + @test minimum(isapprox.(lazyhess*yscalar, hess1.*yscalar, atol=1e-10)) + @test lazyhess*yscalar isa Tuple + + # multiplication with scalar + @test minimum(isapprox.(yscalar*lazyhess, yscalar.*hess1, atol=1e-10)) + @test yscalar*lazyhess isa Tuple + + w = adjoint(rand(length(xvec))) + v = rand(length(xvec)) + + # Hvp + Hv = map(h->h*v, hess1) + res = lazyhess*v + @test minimum(isapprox.(Hv, res, atol=1e-10)) + @test res isa Tuple + + # H′vp + wH = map(h->h'*adjoint(w), hess1) + res = w*lazyhess + @test minimum(isapprox.(wH, res, atol=1e-10)) + @test res isa Tuple +end + @testset "AbstractDifferentiation.jl" begin @testset "FiniteDifferences" begin @testset "Derivative" begin @@ -417,5 +447,10 @@ end test_fdm_lazy_jacobians(fdm_backend2) test_fdm_lazy_jacobians(fdm_backend3) end + @testset "Lazy Hessian" begin + test_fdm_lazy_hessians(fdm_backend1) + test_fdm_lazy_hessians(fdm_backend2) + test_fdm_lazy_hessians(fdm_backend3) + end end end From 5c0a147a47ed40df4b9157610a3bb6a2760a3412 Mon Sep 17 00:00:00 2001 From: Frank Schaefer Date: Tue, 29 Jun 2021 23:42:13 +0200 Subject: [PATCH 15/25] combine general fallback with abstract array --- src/AbstractDifferentiation.jl | 91 ++++++++++++---------------------- test/runtests.jl | 2 +- 2 files changed, 32 insertions(+), 61 deletions(-) diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index 5014052..263ad26 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -305,15 +305,18 @@ struct LazyJacobian{B, F, X} end function Base.:*(d::LazyJacobian, ys) + if !(ys isa Tuple) + ys = (ys, ) + end if d.xs isa Tuple - jvp = pushforward_function(d.backend, d.f, d.xs...)(ys) - if jvp isa Tuple - return vec.(jvp) - else - return vec(jvp) - end + vjp = pushforward_function(d.backend, d.f, d.xs...)(ys) + else + vjp = pushforward_function(d.backend, d.f, d.xs)(ys) + end + if vjp isa Tuple + return vjp else - return vec.(pushforward_function(d.backend, d.f, d.xs)(ys)) + return (vjp,) end end @@ -346,19 +349,6 @@ function Base.:*(ys::Number, d::LazyJacobian) end end -function Base.:*(d::LazyJacobian, ys::AbstractArray) - if d.xs isa Tuple - return vec.(pushforward_function(d.backend, d.f, d.xs...)((ys,))) - else - vjp = (pushforward_function(d.backend, d.f, d.xs)((ys,))) - if vjp isa Tuple - return vec.(vjp) - else - return (vec(vjp),) - end - end -end - struct LazyHessian{B, F, X} backend::B @@ -367,16 +357,19 @@ struct LazyHessian{B, F, X} end function Base.:*(d::LazyHessian, ys) + if !(ys isa Tuple) + ys = (ys, ) + end + if d.xs isa Tuple return pushforward_function( secondlowest(d.backend), - (xs...,) -> gradient(lowest(d.backend), d.f, xs...), d.xs...,)(ys) + (xs...,) -> gradient(lowest(d.backend), d.f, xs...), d.xs...,)(ys) else return pushforward_function( secondlowest(d.backend), - (xs,) -> gradient(lowest(d.backend), d.f, xs),d.xs,)(ys) + (xs,) -> gradient(lowest(d.backend), d.f, xs),d.xs,)(ys) end - end function Base.:*(ys, d::LazyHessian) @@ -385,11 +378,19 @@ function Base.:*(ys, d::LazyHessian) else ya = adjoint(ys) end - return pullback_function( - secondlowest(d.backend), - (xs...,) -> gradient(lowest(d.backend), d.f, xs...), - d.xs..., - )(ya) + if d.xs isa Tuple + return pullback_function( + secondlowest(d.backend), + (xs...,) -> gradient(lowest(d.backend), d.f, xs...), + d.xs..., + )(ya) + else + return pullback_function( + secondlowest(d.backend), + (xs,) -> gradient(lowest(d.backend), d.f, xs)[1], + d.xs, + )(ya) + end end function Base.:*(d::LazyHessian, ys::Number) @@ -408,38 +409,6 @@ function Base.:*(ys::Number, d::LazyHessian) end end -function Base.:*(d::LazyHessian, ys::AbstractArray) - if d.xs isa Tuple - return pushforward_function( - secondlowest(d.backend), - (xs...,) -> gradient(lowest(d.backend), d.f, xs...), d.xs...,)((ys,)) - else - return pushforward_function( - secondlowest(d.backend), - (xs,) -> gradient(lowest(d.backend), d.f, xs),d.xs,)((ys,)) - end -end - -function Base.:*(ys::AbstractArray, d::LazyHessian) - if ys isa Tuple - ya = adjoint.(ys) - else - ya = adjoint(ys) - end - if d.xs isa Tuple - return pullback_function( - secondlowest(d.backend), - (xs...,) -> gradient(lowest(d.backend), d.f, xs...), - d.xs..., - )(ya) - else - return pullback_function( - secondlowest(d.backend), - (xs,) -> gradient(lowest(d.backend), d.f, xs)[1], - d.xs, - )(ya) - end -end function lazyderivative(ab::AbstractBackend, f, xs::Number...) return LazyDerivative(ab, f, xs) @@ -511,7 +480,9 @@ function define_pushforward_function_and_friends(fdef) end end elseif eltype(identity_like) <: AbstractMatrix + # needed for the computation of the Hessian ret = hcat.(mapslices(identity_like[1], dims=1) do cols + # cols loop over basis states pf = pff((cols,)) if typeof(pf) <: AbstractVector return (pf, ) diff --git a/test/runtests.jl b/test/runtests.jl index 06d8365..c50bdd1 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -360,7 +360,7 @@ function test_fdm_lazy_jacobians(fdm_backend) identity_like_i .* v end - res = map(v->lazyjac*v, vaug) + res = map(v->(lazyjac*v)[1], vaug) else res = lazyjac*v end From 48af1083ad9e7e5e41397ad332b0278dd8c7add5 Mon Sep 17 00:00:00 2001 From: Frank Schaefer Date: Mon, 26 Jul 2021 19:34:57 +0200 Subject: [PATCH 16/25] reshape gradient and fix for AD.hessian --- src/AbstractDifferentiation.jl | 26 +++++++++++++++++++++----- test/runtests.jl | 27 +++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index 263ad26..2a0a4eb 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -40,13 +40,23 @@ function derivative(ab::AbstractBackend, f, xs::Number...) end function gradient(ab::AbstractBackend, f, xs...) - return adjoint.(jacobian(lowest(ab), f, xs...)) + return reshape.(adjoint.(jacobian(lowest(ab), f, xs...)),size.(xs)) end function jacobian(ab::AbstractBackend, f, xs...) end function hessian(ab::AbstractBackend, f, xs...) - return jacobian(secondlowest(ab), (xs...,) -> begin - gradient(lowest(ab), f, xs...) - end, xs...) + xss = collect((xs...,)) + counter = 0 + # gradient returns tuple of gradient values with respect to inputs x,y ∈ xs + # Hessian is the Jacobian of the individual gradients, i.e., the tuple of matrices + # defined by ∂x∂x f, ∂y∂y f, in the case of a scalar valued function `f`. + hess = map((xs...,)) do x + counter += 1 + _f = _x->f(setindex!(deepcopy(xss),x,counter)...) + return jacobian(secondlowest(ab),(x,)-> begin + return gradient(lowest(ab), _f, x) + end, x)[1] + end + return hess end function value_and_derivative(ab::AbstractBackend, f, xs::Number...) @@ -55,7 +65,7 @@ function value_and_derivative(ab::AbstractBackend, f, xs::Number...) end function value_and_gradient(ab::AbstractBackend, f, xs...) value, jacs = value_and_jacobian(lowest(ab), f, xs...) - return value, adjoint.(jacs) + return value, reshape.(adjoint.(jacs),size.(xs)) end function value_and_jacobian(ab::AbstractBackend, f, xs...) local value @@ -481,9 +491,12 @@ function define_pushforward_function_and_friends(fdef) end elseif eltype(identity_like) <: AbstractMatrix # needed for the computation of the Hessian + println("define_pushforward_function_and_friends") + @show identity_like ret = hcat.(mapslices(identity_like[1], dims=1) do cols # cols loop over basis states pf = pff((cols,)) + @show cols pf if typeof(pf) <: AbstractVector return (pf, ) elseif typeof(pf) <: AbstractMatrix @@ -518,6 +531,9 @@ function define_pullback_function_and_friends(fdef) end end elseif eltype(identity_like) <: AbstractMatrix + # needed for Hessian computation: + # value is a (grad,). Then, identity_like is a (matrix,). + # cols loops over columns of the matrix return vcat.(mapslices(identity_like[1], dims=1) do cols adjoint.(value_and_pbf((cols,))[2]) end ...) diff --git a/test/runtests.jl b/test/runtests.jl index c50bdd1..23ce6dc 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -52,6 +52,7 @@ fgrad(x, y) = prod(x) + sum(y ./ (1:length(y))) dfgraddx(x, y) = prod(x)./x dfgraddy(x, y) = one(eltype(y)) ./ (1:length(y)) dfgraddxdx(x, y) = prod(x)./(x*x') - Diagonal(diag(prod(x)./(x*x'))) +dfgraddydy(x, y) = zeros(length(y),length(y)) function fjac(x, y) x + Bidiagonal(-ones(length(y)) * 3, ones(length(y) - 1) / 2, :U) * y @@ -89,6 +90,13 @@ function test_fdm_derivatives(fdm_backend) @test der3 .- der1 == (0, 0) der_exact = (dfderdx(xscalar,yscalar), dfderdy(xscalar,yscalar)) @test minimum(isapprox.(der_exact, der1, rtol=1e-10)) + # test if single input (no tuple works) + valscalara, dera = AD.value_and_derivative(fdm_backend, x -> fder(x, yscalar), xscalar) + valscalarb, derb = AD.value_and_derivative(fdm_backend, y -> fder(xscalar, y), yscalar) + @test valscalar == valscalara + @test valscalar == valscalarb + @test isapprox(dera[1], der1[1], rtol=1e-10) + @test isapprox(derb[1], der1[2], rtol=1e-10) end function test_fdm_gradients(fdm_backend) @@ -102,6 +110,13 @@ function test_fdm_gradients(fdm_backend) @test minimum(isapprox.(grad_exact, grad1, rtol=1e-10)) @test xvec == xvec2 @test yvec == yvec2 + # test if single input (no tuple works) + valscalara, grada = AD.value_and_gradient(fdm_backend, x -> fgrad(x, yvec), xvec) + valscalarb, gradb = AD.value_and_gradient(fdm_backend, y -> fgrad(xvec, y), yvec) + @test valscalar == valscalara + @test valscalar == valscalarb + @test isapprox(grada[1], grad1[1], rtol=1e-10) + @test isapprox(gradb[1], grad1[2], rtol=1e-10) end function test_fdm_jacobians(fdm_backend) @@ -115,9 +130,21 @@ function test_fdm_jacobians(fdm_backend) @test minimum(isapprox.(grad_exact, jac1, rtol=1e-10)) @test xvec == xvec2 @test yvec == yvec2 + # test if single input (no tuple works) + valveca, jaca = AD.value_and_jacobian(fdm_backend, x -> fjac(x, yvec), xvec) + valvecb, jacb = AD.value_and_jacobian(fdm_backend, y -> fjac(xvec, y), yvec) + @test valvec == valveca + @test valvec == valvecb + @test isapprox(jaca[1], jac1[1], rtol=1e-10) + @test isapprox(jacb[1], jac1[2], rtol=1e-10) end function test_fdm_hessians(fdm_backend) + H1 = AD.hessian(fdm_backend, fgrad, xvec, yvec) + @test dfgraddxdx(xvec,yvec) ≈ H1[1] atol=1e-10 + @test dfgraddydy(xvec,yvec) ≈ H1[2] atol=1e-10 + + # test if single input (no tuple works) fhess = x -> fgrad(x, yvec) hess1 = AD.hessian(fdm_backend, fhess, xvec) hess2 = FDM.jacobian( From a9f1538d8154c40e53d8bea0aae2198932b40250 Mon Sep 17 00:00:00 2001 From: Frank Schaefer Date: Tue, 27 Jul 2021 16:17:18 +0200 Subject: [PATCH 17/25] remove prints --- src/AbstractDifferentiation.jl | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index 2a0a4eb..15ba75b 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -51,7 +51,7 @@ function hessian(ab::AbstractBackend, f, xs...) # defined by ∂x∂x f, ∂y∂y f, in the case of a scalar valued function `f`. hess = map((xs...,)) do x counter += 1 - _f = _x->f(setindex!(deepcopy(xss),x,counter)...) + _f = _x->f(setindex!(deepcopy(xss),_x,counter)...) return jacobian(secondlowest(ab),(x,)-> begin return gradient(lowest(ab), _f, x) end, x)[1] @@ -490,17 +490,13 @@ function define_pushforward_function_and_friends(fdef) end end elseif eltype(identity_like) <: AbstractMatrix - # needed for the computation of the Hessian - println("define_pushforward_function_and_friends") - @show identity_like + # needed for the computation of the Hessian and Jacobian ret = hcat.(mapslices(identity_like[1], dims=1) do cols # cols loop over basis states pf = pff((cols,)) - @show cols pf if typeof(pf) <: AbstractVector + # to make the hcat. work / get correct matrix-like, non-flat output dimension return (pf, ) - elseif typeof(pf) <: AbstractMatrix - return (transpose(pf), ) else return pf end From 60ef4528d0cf1b19122c2376f1982631dc0cad61 Mon Sep 17 00:00:00 2001 From: Frank Schaefer Date: Thu, 29 Jul 2021 11:23:18 +0200 Subject: [PATCH 18/25] add ForwardDiff --- Project.toml | 3 +- src/AbstractDifferentiation.jl | 1 + test/runtests.jl | 323 ++++++++++++++++++++++----------- 3 files changed, 225 insertions(+), 102 deletions(-) diff --git a/Project.toml b/Project.toml index 38eefd4..0ca1a6e 100644 --- a/Project.toml +++ b/Project.toml @@ -12,8 +12,9 @@ julia = "1" [extras] FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test", "FiniteDifferences", "Random"] +test = ["Test", "FiniteDifferences", "ForwardDiff", "Random"] diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index 15ba75b..306fe65 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -51,6 +51,7 @@ function hessian(ab::AbstractBackend, f, xs...) # defined by ∂x∂x f, ∂y∂y f, in the case of a scalar valued function `f`. hess = map((xs...,)) do x counter += 1 + # needs to be modified for forward mode AD; no method matching error from setindex! _f = _x->f(setindex!(deepcopy(xss),_x,counter)...) return jacobian(secondlowest(ab),(x,)-> begin return gradient(lowest(ab), _f, x) diff --git a/test/runtests.jl b/test/runtests.jl index 23ce6dc..05e9df6 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,10 +1,12 @@ using AbstractDifferentiation using Test, FiniteDifferences, LinearAlgebra +using ForwardDiff using Random Random.seed!(1234) const FDM = FiniteDifferences +## FiniteDifferences struct FDMBackend1{A} <: AD.AbstractFiniteDifference alg::A end @@ -43,6 +45,77 @@ AD.@primitive function pullback_function(ab::FDMBackend3, f, xs...) end end end +## + + +## ForwardDiff +struct ForwardDiffBackend1 <: AD.AbstractForwardMode end +const forwarddiff_backend1 = ForwardDiffBackend1() +AD.@primitive function jacobian(ab::ForwardDiffBackend1, f, xs...) + if xs isa AbstractArray + return ForwardDiff.jacobian(f, xs) + elseif xs isa Number + return ForwardDiff.derivative(f, xs) + elseif xs isa Tuple + @assert length(xs) <= 2 + if eltype(xs) <: Number + # Can this be avoided for the derivative computation? It seems to defeat the purpose of AbstractDifferentiation. + # If eltype(xs) isa Number, ForwardDiff will error by saying that Jacobian expects an array. + # if we convert the number to an array in AbstractDifferentiation, we get a no method error due to log(x) and exp(x) + if length(xs) == 1 + (ForwardDiff.derivative(f, xs[1]),) + else + _f1 = x -> f(x,xs[2]) + _f2 = x -> f(xs[1], x) + (ForwardDiff.derivative(_f1, xs[1]),ForwardDiff.derivative(_f2, xs[2])) + end + else + if length(xs) == 1 + out = f(xs[1]) + if out isa Number + # Lazy Jacobian test pb fails otherwise with "dimensions must match: a has dims (Base.OneTo(1), Base.OneTo(5)), b has dims (Base.OneTo(5),), mismatch at 1" + (reshape(ForwardDiff.gradient(f, xs[1]),(1,length(xs[1]))),) + else + (ForwardDiff.jacobian(f, xs[1]),) + end + else + out = f(xs...) + _f1 = x -> f(x,xs[2]) + _f2 = x -> f(xs[1], x) + if out isa Number + # reshape for pullback tests (similar as above) + (reshape(ForwardDiff.gradient(_f1, xs[1]),(1,length(xs[1]))), + reshape(ForwardDiff.gradient(_f2, xs[2]), (1, length(xs[2])))) + else + (ForwardDiff.jacobian(_f1, xs[1]),ForwardDiff.jacobian(_f2, xs[2])) + end + + end + end + else + error(typeof(xs)) + end +end + +struct ForwardDiffBackend2 <: AD.AbstractForwardMode end +const forwarddiff_backend2 = ForwardDiffBackend2() +AD.@primitive function pushforward_function(ab::ForwardDiffBackend2, f, xs...) + # jvp = f'(x)*v, i.e., differentiate f(x + h*v) wrt h at 0 + return function (vs) + if xs isa Tuple + @assert length(xs) <= 2 + if length(xs) == 1 + (ForwardDiff.derivative(h->f(xs[1]+h*vs[1]),0),) + else + ForwardDiff.derivative(h->f(xs[1]+h*vs[1], xs[2]+h*vs[2]),0) + end + else + ForwardDiff.derivative(h->f(xs+h*vs),0) + end + end +end +## + fder(x, y) = exp(y) * x + y * log(x) dfderdx(x, y) = exp(y) + y * 1/x @@ -78,32 +151,34 @@ const yvec = rand(5) xvec2 = deepcopy(xvec) yvec2 = deepcopy(yvec) -function test_fdm_derivatives(fdm_backend) - der1 = AD.derivative(fdm_backend, fder, xscalar, yscalar) + +function test_fdm_derivatives(backend, fdm_backend) + # fdm_backend for comparison with finite differences + der1 = AD.derivative(backend, fder, xscalar, yscalar) der2 = ( fdm_backend.alg(x -> fder(x, yscalar), xscalar), fdm_backend.alg(y -> fder(xscalar, y), yscalar), ) - @test norm.(der1 .- der2) == (0, 0) - valscalar, der3 = AD.value_and_derivative(fdm_backend, fder, xscalar, yscalar) + @test minimum(isapprox.(der1, der2, rtol=1e-10)) + valscalar, der3 = AD.value_and_derivative(backend, fder, xscalar, yscalar) @test valscalar == fder(xscalar, yscalar) @test der3 .- der1 == (0, 0) der_exact = (dfderdx(xscalar,yscalar), dfderdy(xscalar,yscalar)) @test minimum(isapprox.(der_exact, der1, rtol=1e-10)) # test if single input (no tuple works) - valscalara, dera = AD.value_and_derivative(fdm_backend, x -> fder(x, yscalar), xscalar) - valscalarb, derb = AD.value_and_derivative(fdm_backend, y -> fder(xscalar, y), yscalar) + valscalara, dera = AD.value_and_derivative(backend, x -> fder(x, yscalar), xscalar) + valscalarb, derb = AD.value_and_derivative(backend, y -> fder(xscalar, y), yscalar) @test valscalar == valscalara @test valscalar == valscalarb @test isapprox(dera[1], der1[1], rtol=1e-10) @test isapprox(derb[1], der1[2], rtol=1e-10) end -function test_fdm_gradients(fdm_backend) - grad1 = AD.gradient(fdm_backend, fgrad, xvec, yvec) +function test_fdm_gradients(backend, fdm_backend) + grad1 = AD.gradient(backend, fgrad, xvec, yvec) grad2 = FDM.grad(fdm_backend.alg, fgrad, xvec, yvec) - @test norm.(grad1 .- grad2) == (0, 0) - valscalar, grad3 = AD.value_and_gradient(fdm_backend, fgrad, xvec, yvec) + @test minimum(isapprox.(grad1, grad2, rtol=1e-10)) + valscalar, grad3 = AD.value_and_gradient(backend, fgrad, xvec, yvec) @test valscalar == fgrad(xvec, yvec) @test norm.(grad3 .- grad1) == (0, 0) grad_exact = (dfgraddx(xvec,yvec), dfgraddy(xvec,yvec)) @@ -111,19 +186,19 @@ function test_fdm_gradients(fdm_backend) @test xvec == xvec2 @test yvec == yvec2 # test if single input (no tuple works) - valscalara, grada = AD.value_and_gradient(fdm_backend, x -> fgrad(x, yvec), xvec) - valscalarb, gradb = AD.value_and_gradient(fdm_backend, y -> fgrad(xvec, y), yvec) + valscalara, grada = AD.value_and_gradient(backend, x -> fgrad(x, yvec), xvec) + valscalarb, gradb = AD.value_and_gradient(backend, y -> fgrad(xvec, y), yvec) @test valscalar == valscalara @test valscalar == valscalarb @test isapprox(grada[1], grad1[1], rtol=1e-10) @test isapprox(gradb[1], grad1[2], rtol=1e-10) end -function test_fdm_jacobians(fdm_backend) - jac1 = AD.jacobian(fdm_backend, fjac, xvec, yvec) +function test_fdm_jacobians(backend,fdm_backend) + jac1 = AD.jacobian(backend, fjac, xvec, yvec) jac2 = FDM.jacobian(fdm_backend.alg, fjac, xvec, yvec) - @test norm.(jac1 .- jac2) == (0, 0) - valvec, jac3 = AD.value_and_jacobian(fdm_backend, fjac, xvec, yvec) + @test minimum(isapprox.(jac1, jac2, rtol=1e-10)) + valvec, jac3 = AD.value_and_jacobian(backend, fjac, xvec, yvec) @test valvec == fjac(xvec, yvec) @test norm.(jac3 .- jac1) == (0, 0) grad_exact = (dfjacdx(xvec, yvec), dfjacdy(xvec, yvec)) @@ -131,22 +206,22 @@ function test_fdm_jacobians(fdm_backend) @test xvec == xvec2 @test yvec == yvec2 # test if single input (no tuple works) - valveca, jaca = AD.value_and_jacobian(fdm_backend, x -> fjac(x, yvec), xvec) - valvecb, jacb = AD.value_and_jacobian(fdm_backend, y -> fjac(xvec, y), yvec) + valveca, jaca = AD.value_and_jacobian(backend, x -> fjac(x, yvec), xvec) + valvecb, jacb = AD.value_and_jacobian(backend, y -> fjac(xvec, y), yvec) @test valvec == valveca @test valvec == valvecb @test isapprox(jaca[1], jac1[1], rtol=1e-10) @test isapprox(jacb[1], jac1[2], rtol=1e-10) end -function test_fdm_hessians(fdm_backend) - H1 = AD.hessian(fdm_backend, fgrad, xvec, yvec) +function test_fdm_hessians(backend, fdm_backend) + H1 = AD.hessian(backend, fgrad, xvec, yvec) @test dfgraddxdx(xvec,yvec) ≈ H1[1] atol=1e-10 @test dfgraddydy(xvec,yvec) ≈ H1[2] atol=1e-10 # test if single input (no tuple works) fhess = x -> fgrad(x, yvec) - hess1 = AD.hessian(fdm_backend, fhess, xvec) + hess1 = AD.hessian(backend, fhess, xvec) hess2 = FDM.jacobian( fdm_backend.alg, (x) -> begin @@ -158,43 +233,43 @@ function test_fdm_hessians(fdm_backend) end, xvec, ) - @test norm.(hess1 .- hess2) == (0,) - valscalar, hess3 = AD.value_and_hessian(fdm_backend, fhess, xvec) + @test minimum(isapprox.(hess1, hess2, rtol=1e-10)) + valscalar, hess3 = AD.value_and_hessian(backend, fhess, xvec) @test valscalar == fgrad(xvec, yvec) @test norm.(hess3 .- hess1) == (0,) - valscalar, grad, hess4 = AD.value_gradient_and_hessian(fdm_backend, fhess, xvec) + valscalar, grad, hess4 = AD.value_gradient_and_hessian(backend, fhess, xvec) @test valscalar == fgrad(xvec, yvec) - @test norm.(grad .- AD.gradient(fdm_backend, fhess, xvec)) == (0,) + @test norm.(grad .- AD.gradient(backend, fhess, xvec)) == (0,) @test norm.(hess4 .- hess1) == (0,) @test dfgraddxdx(xvec,yvec) ≈ hess1[1] atol=1e-10 @test xvec == xvec2 @test yvec == yvec2 fhess2 = x-> dfgraddx(x, yvec) - hess5 = AD.jacobian(fdm_backend, fhess2, xvec) + hess5 = AD.jacobian(backend, fhess2, xvec) @test minimum(isapprox.(hess5, hess1, atol=1e-10)) end -function test_fdm_jvp(fdm_backend) +function test_fdm_jvp(backend,fdm_backend) v = (rand(length(xvec)), rand(length(yvec))) - if fdm_backend isa FDMBackend2 # augmented version of v + if backend isa Union{FDMBackend2,ForwardDiffBackend2} # augmented version of v identity_like = AD.identity_matrix_like(v) vaug = map(identity_like) do identity_like_i identity_like_i .* v end - pf1 = map(v->AD.pushforward_function(fdm_backend, fjac, xvec, yvec)(v), vaug) - ((valvec1, pf3x), (valvec2, pf3y)) = map(v->AD.value_and_pushforward_function(fdm_backend, fjac, xvec, yvec)(v), vaug) + pf1 = map(v->AD.pushforward_function(backend, fjac, xvec, yvec)(v), vaug) + ((valvec1, pf3x), (valvec2, pf3y)) = map(v->AD.value_and_pushforward_function(backend, fjac, xvec, yvec)(v), vaug) else - pf1 = AD.pushforward_function(fdm_backend, fjac, xvec, yvec)(v) - valvec, pf3 = AD.value_and_pushforward_function(fdm_backend, fjac, xvec, yvec)(v) + pf1 = AD.pushforward_function(backend, fjac, xvec, yvec)(v) + valvec, pf3 = AD.value_and_pushforward_function(backend, fjac, xvec, yvec)(v) ((valvec1, pf3x), (valvec2, pf3y)) = (valvec, pf3[1]), (valvec, pf3[2]) end pf2 = ( FDM.jvp(fdm_backend.alg, x -> fjac(x, yvec), (xvec, v[1])), FDM.jvp(fdm_backend.alg, y -> fjac(xvec, y), (yvec, v[2])), ) - @test norm.(pf1 .- pf2) == (0, 0) + @test minimum(isapprox.(pf1, pf2, rtol=1e-10)) @test valvec1 == fjac(xvec, yvec) @test valvec2 == fjac(xvec, yvec) @@ -204,12 +279,12 @@ function test_fdm_jvp(fdm_backend) @test yvec == yvec2 end -function test_fdm_j′vp(fdm_backend) +function test_fdm_j′vp(backend,fdm_backend) w = rand(length(fjac(xvec, yvec))) - pb1 = AD.pullback_function(fdm_backend, fjac, xvec, yvec)(w) + pb1 = AD.pullback_function(backend, fjac, xvec, yvec)(w) pb2 = FDM.j′vp(fdm_backend.alg, fjac, w, xvec, yvec) @test all(norm.(pb1 .- pb2) .<= (1e-10, 1e-10)) - valvec, pb3 = AD.value_and_pullback_function(fdm_backend, fjac, xvec, yvec)(w) + valvec, pb3 = AD.value_and_pullback_function(backend, fjac, xvec, yvec)(w) @test valvec == fjac(xvec, yvec) @test norm.(pb3 .- pb1) == (0, 0) @test minimum(isapprox.(pb1, (vJxp(xvec,yvec,w), vJyp(xvec,yvec,w)), atol=1e-10)) @@ -217,31 +292,31 @@ function test_fdm_j′vp(fdm_backend) @test yvec == yvec2 end -function test_fdm_lazy_derivatives(fdm_backend) +function test_fdm_lazy_derivatives(backend,fdm_backend) # single input function - der1 = AD.derivative(fdm_backend, x->fder(x, yscalar), xscalar) + der1 = AD.derivative(backend, x->fder(x, yscalar), xscalar) der2 = ( fdm_backend.alg(x -> fder(x, yscalar), xscalar), fdm_backend.alg(y -> fder(xscalar, y), yscalar), ) - lazyder = AD.LazyDerivative(fdm_backend, x->fder(x, yscalar), xscalar) + lazyder = AD.LazyDerivative(backend, x->fder(x, yscalar), xscalar) # multiplication with scalar - @test der1[1]*yscalar == der2[1]*yscalar + @test isapprox(der1[1]*yscalar, der2[1]*yscalar, atol=1e-10) @test lazyder*yscalar == der1.*yscalar @test lazyder*yscalar isa Tuple - @test yscalar*der1[1] == yscalar*der2[1] - @test yscalar*lazyder == yscalar.*der1 + @test isapprox(yscalar*der1[1], yscalar*der2[1], atol=1e-10) + @test yscalar*lazyder == yscalar.*der1 @test yscalar*lazyder isa Tuple # multiplication with array - @test der1[1]*yvec == der2[1]*yvec + @test isapprox(der1[1]*yvec, der2[1]*yvec, atol=1e-10) @test lazyder*yvec == (der1.*yvec,) @test lazyder*yvec isa Tuple - @test yvec*der1[1] == yvec*der2[1] + @test isapprox(yvec*der1[1], yvec*der2[1], atol=1e-10) @test yvec*lazyder == (yvec.*der1,) @test yvec*lazyder isa Tuple @@ -253,29 +328,29 @@ function test_fdm_lazy_derivatives(fdm_backend) @test (yvec,)*lazyder == yvec*lazyder # two input function - der1 = AD.derivative(fdm_backend, fder, xscalar, yscalar) + der1 = AD.derivative(backend, fder, xscalar, yscalar) der2 = ( fdm_backend.alg(x -> fder(x, yscalar), xscalar), fdm_backend.alg(y -> fder(xscalar, y), yscalar), ) - lazyder = AD.LazyDerivative(fdm_backend, fder, (xscalar, yscalar)) + lazyder = AD.LazyDerivative(backend, fder, (xscalar, yscalar)) # multiplication with scalar - @test der1.*yscalar == der2.*yscalar + @test minimum(isapprox.(der1.*yscalar, der2.*yscalar, atol=1e-10)) @test lazyder*yscalar == der1.*yscalar @test lazyder*yscalar isa Tuple - @test yscalar.*der1 == yscalar.*der2 + @test minimum(isapprox.(yscalar.*der1, yscalar.*der2, atol=1e-10)) @test yscalar*lazyder == yscalar.*der1 @test yscalar*lazyder isa Tuple # multiplication with array - @test (der1[1]*yvec, der1[2]*yvec) == (der2[1]*yvec, der2[2]*yvec) + @test minimum(isapprox.((der1[1]*yvec, der1[2]*yvec), (der2[1]*yvec, der2[2]*yvec), atol=1e-10)) @test lazyder*yvec == (der1[1]*yvec, der1[2]*yvec) @test lazyder*yvec isa Tuple - @test (yvec*der1[1], yvec*der1[2]) == (yvec*der2[1], yvec*der2[2]) + @test minimum(isapprox.((yvec*der1[1], yvec*der1[2]), (yvec*der2[1], yvec*der2[2]), atol=1e-10)) @test yvec*lazyder == (yvec*der1[1], yvec*der1[2]) @test lazyder*yvec isa Tuple @@ -287,18 +362,18 @@ function test_fdm_lazy_derivatives(fdm_backend) @test (yvec,)*lazyder == yvec*lazyder end -function test_fdm_lazy_gradients(fdm_backend) +function test_fdm_lazy_gradients(backend,fdm_backend) # single input function - grad1 = AD.gradient(fdm_backend, x->fgrad(x, yvec), xvec) + grad1 = AD.gradient(backend, x->fgrad(x, yvec), xvec) grad2 = FDM.grad(fdm_backend.alg, x->fgrad(x, yvec), xvec) - lazygrad = AD.LazyGradient(fdm_backend, x->fgrad(x, yvec), xvec) + lazygrad = AD.LazyGradient(backend, x->fgrad(x, yvec), xvec) # multiplication with scalar - @test norm.(grad1.*yscalar .- grad2.*yscalar) == (0,) + @test minimum(isapprox.(grad1.*yscalar, grad2.*yscalar, atol=1e-10)) @test norm.(lazygrad*yscalar .- grad1.*yscalar) == (0,) @test lazygrad*yscalar isa Tuple - @test norm.(yscalar.*grad1 .- yscalar.*grad2) == (0,) + @test minimum(isapprox.(yscalar.*grad1, yscalar.*grad2, atol=1e-10)) @test norm.(yscalar*lazygrad .- yscalar.*grad1) == (0,) @test yscalar*lazygrad isa Tuple @@ -307,16 +382,16 @@ function test_fdm_lazy_gradients(fdm_backend) @test (yscalar,)*lazygrad == yscalar*lazygrad # two input function - grad1 = AD.gradient(fdm_backend, fgrad, xvec, yvec) + grad1 = AD.gradient(backend, fgrad, xvec, yvec) grad2 = FDM.grad(fdm_backend.alg, fgrad, xvec, yvec) - lazygrad = AD.LazyGradient(fdm_backend, fgrad, (xvec, yvec)) + lazygrad = AD.LazyGradient(backend, fgrad, (xvec, yvec)) # multiplication with scalar - @test norm.(grad1.*yscalar .- grad2.*yscalar) == (0,0) + @test minimum(isapprox.(grad1.*yscalar, grad2.*yscalar, atol=1e-10)) @test norm.(lazygrad*yscalar .- grad1.*yscalar) == (0,0) @test lazygrad*yscalar isa Tuple - @test norm.(yscalar.*grad1 .- yscalar.*grad2) == (0,0) + @test minimum(isapprox.(yscalar.*grad1, yscalar.*grad2, atol=1e-10)) @test norm.(yscalar*lazygrad .- yscalar.*grad1) == (0,0) @test yscalar*lazygrad isa Tuple @@ -325,18 +400,18 @@ function test_fdm_lazy_gradients(fdm_backend) @test (yscalar,)*lazygrad == yscalar*lazygrad end -function test_fdm_lazy_jacobians(fdm_backend) +function test_fdm_lazy_jacobians(backend,fdm_backend) # single input function - jac1 = AD.jacobian(fdm_backend, x->fjac(x, yvec), xvec) + jac1 = AD.jacobian(backend, x->fjac(x, yvec), xvec) jac2 = FDM.jacobian(fdm_backend.alg, x->fjac(x, yvec), xvec) - lazyjac = AD.LazyJacobian(fdm_backend, x->fjac(x, yvec), xvec) + lazyjac = AD.LazyJacobian(backend, x->fjac(x, yvec), xvec) # multiplication with scalar - @test norm.(jac1.*yscalar .- jac2.*yscalar) == (0,) + @test minimum(isapprox.(jac1.*yscalar, jac2.*yscalar, atol=1e-10)) @test norm.(lazyjac*yscalar .- jac1.*yscalar) == (0,) @test lazyjac*yscalar isa Tuple - @test norm.(yscalar.*jac1 .- yscalar.*jac2) == (0,) + @test minimum(isapprox.(yscalar.*jac1, yscalar.*jac2, atol=1e-10)) @test norm.(yscalar*lazyjac .- yscalar.*jac1) == (0,) @test yscalar*lazyjac isa Tuple @@ -356,16 +431,16 @@ function test_fdm_lazy_jacobians(fdm_backend) @test res isa Tuple # two input function - jac1 = AD.jacobian(fdm_backend, fjac, xvec, yvec) + jac1 = AD.jacobian(backend, fjac, xvec, yvec) jac2 = FDM.jacobian(fdm_backend.alg, fjac, xvec, yvec) - lazyjac = AD.LazyJacobian(fdm_backend, fjac, (xvec, yvec)) + lazyjac = AD.LazyJacobian(backend, fjac, (xvec, yvec)) # multiplication with scalar - @test norm.(jac1.*yscalar .- jac2.*yscalar) == (0,0) + @test minimum(isapprox.(jac1.*yscalar, jac2.*yscalar, atol=1e-10)) @test norm.(lazyjac*yscalar .- jac1.*yscalar) == (0,0) @test lazyjac*yscalar isa Tuple - @test norm.(yscalar.*jac1 .- yscalar.*jac2) == (0,0) + @test minimum(isapprox.(yscalar.*jac1, yscalar.*jac2, atol=1e-10)) @test norm.(yscalar*lazyjac .- yscalar.*jac1) == (0,0) @test yscalar*lazyjac isa Tuple @@ -381,7 +456,7 @@ function test_fdm_lazy_jacobians(fdm_backend) FDM.jvp(fdm_backend.alg, y -> fjac(xvec, y), (yvec, v[2])), ) - if fdm_backend isa FDMBackend2 # augmented version of v + if backend isa Union{FDMBackend2,ForwardDiffBackend2} # augmented version of v identity_like = AD.identity_matrix_like(v) vaug = map(identity_like) do identity_like_i identity_like_i .* v @@ -396,11 +471,12 @@ function test_fdm_lazy_jacobians(fdm_backend) @test res isa Tuple end -function test_fdm_lazy_hessians(fdm_backend) +function test_fdm_lazy_hessians(backend,fdm_backend) + # fdm_backend not used here yet.. # single input function fhess = x -> fgrad(x, yvec) hess1 = (dfgraddxdx(xvec,yvec),) - lazyhess = AD.LazyHessian(fdm_backend, x->fgrad(x, yvec), xvec) + lazyhess = AD.LazyHessian(backend, fhess, xvec) # multiplication with scalar @test minimum(isapprox.(lazyhess*yscalar, hess1.*yscalar, atol=1e-10)) @@ -427,57 +503,102 @@ function test_fdm_lazy_hessians(fdm_backend) end @testset "AbstractDifferentiation.jl" begin + testFDMbackend = fdm_backend1 @testset "FiniteDifferences" begin @testset "Derivative" begin - test_fdm_derivatives(fdm_backend1) - test_fdm_derivatives(fdm_backend2) - test_fdm_derivatives(fdm_backend3) + test_fdm_derivatives(fdm_backend1,testFDMbackend) + test_fdm_derivatives(fdm_backend2,testFDMbackend) + test_fdm_derivatives(fdm_backend3,testFDMbackend) end @testset "Gradient" begin - test_fdm_gradients(fdm_backend1) - test_fdm_gradients(fdm_backend2) - test_fdm_gradients(fdm_backend3) + test_fdm_gradients(fdm_backend1,testFDMbackend) + test_fdm_gradients(fdm_backend2,testFDMbackend) + test_fdm_gradients(fdm_backend3,testFDMbackend + ) end @testset "Jacobian" begin - test_fdm_jacobians(fdm_backend1) - test_fdm_jacobians(fdm_backend2) - test_fdm_jacobians(fdm_backend3) + test_fdm_jacobians(fdm_backend1,testFDMbackend) + test_fdm_jacobians(fdm_backend2,testFDMbackend) + test_fdm_jacobians(fdm_backend3,testFDMbackend) end @testset "Hessian" begin # Works but super slow - test_fdm_hessians(fdm_backend1) - test_fdm_hessians(fdm_backend2) - test_fdm_hessians(fdm_backend3) + test_fdm_hessians(fdm_backend1,testFDMbackend) + test_fdm_hessians(fdm_backend2,testFDMbackend) + test_fdm_hessians(fdm_backend3,testFDMbackend) + end + @testset "jvp" begin + test_fdm_jvp(fdm_backend1,testFDMbackend) + test_fdm_jvp(fdm_backend2,testFDMbackend) + test_fdm_jvp(fdm_backend3,testFDMbackend) + end + @testset "j′vp" begin + test_fdm_j′vp(fdm_backend1,testFDMbackend) + test_fdm_j′vp(fdm_backend2,testFDMbackend) + test_fdm_j′vp(fdm_backend3,testFDMbackend) + end + @testset "Lazy Derivative" begin + test_fdm_lazy_derivatives(fdm_backend1,testFDMbackend) + test_fdm_lazy_derivatives(fdm_backend2,testFDMbackend) + test_fdm_lazy_derivatives(fdm_backend3,testFDMbackend) + end + @testset "Lazy Gradient" begin + test_fdm_lazy_gradients(fdm_backend1,testFDMbackend) + test_fdm_lazy_gradients(fdm_backend2,testFDMbackend) + test_fdm_lazy_gradients(fdm_backend3,testFDMbackend) + end + @testset "Lazy Jacobian" begin + test_fdm_lazy_jacobians(fdm_backend1,testFDMbackend) + test_fdm_lazy_jacobians(fdm_backend2,testFDMbackend) + test_fdm_lazy_jacobians(fdm_backend3,testFDMbackend) + end + @testset "Lazy Hessian" begin + test_fdm_lazy_hessians(fdm_backend1,testFDMbackend) + test_fdm_lazy_hessians(fdm_backend2,testFDMbackend) + test_fdm_lazy_hessians(fdm_backend3,testFDMbackend) + end + end + @testset "ForwardDiff" begin + @testset "Derivative" begin + test_fdm_derivatives(forwarddiff_backend1,testFDMbackend) + test_fdm_derivatives(forwarddiff_backend2,testFDMbackend) + end + @testset "Gradient" begin + test_fdm_gradients(forwarddiff_backend1,testFDMbackend) + test_fdm_gradients(forwarddiff_backend2,testFDMbackend) + end + @testset "Jacobian" begin + test_fdm_jacobians(forwarddiff_backend1,testFDMbackend) + test_fdm_jacobians(forwarddiff_backend2,testFDMbackend) + end + @testset "Hessian" begin + # Fails due to setindex! in AbstractDifferentiation Hessian function + @test_broken test_fdm_hessians(forwarddiff_backend1,testFDMbackend) + @test_broken test_fdm_hessians(forwarddiff_backend2,testFDMbackend) end @testset "jvp" begin - test_fdm_jvp(fdm_backend1) - test_fdm_jvp(fdm_backend2) - test_fdm_jvp(fdm_backend3) + test_fdm_jvp(forwarddiff_backend1,testFDMbackend) + test_fdm_jvp(forwarddiff_backend2,testFDMbackend) end @testset "j′vp" begin - test_fdm_j′vp(fdm_backend1) - test_fdm_j′vp(fdm_backend2) - test_fdm_j′vp(fdm_backend3) + test_fdm_j′vp(forwarddiff_backend1,testFDMbackend) + test_fdm_j′vp(forwarddiff_backend2,testFDMbackend) end @testset "Lazy Derivative" begin - test_fdm_lazy_derivatives(fdm_backend1) - test_fdm_lazy_derivatives(fdm_backend2) - test_fdm_lazy_derivatives(fdm_backend3) + test_fdm_lazy_derivatives(forwarddiff_backend1,testFDMbackend) + test_fdm_lazy_derivatives(forwarddiff_backend2,testFDMbackend) end @testset "Lazy Gradient" begin - test_fdm_lazy_gradients(fdm_backend1) - test_fdm_lazy_gradients(fdm_backend2) - test_fdm_lazy_gradients(fdm_backend3) + test_fdm_lazy_gradients(forwarddiff_backend1,testFDMbackend) + test_fdm_lazy_gradients(forwarddiff_backend2,testFDMbackend) end @testset "Lazy Jacobian" begin - test_fdm_lazy_jacobians(fdm_backend1) - test_fdm_lazy_jacobians(fdm_backend2) - test_fdm_lazy_jacobians(fdm_backend3) + test_fdm_lazy_jacobians(forwarddiff_backend1,testFDMbackend) + test_fdm_lazy_jacobians(forwarddiff_backend2,testFDMbackend) end @testset "Lazy Hessian" begin - test_fdm_lazy_hessians(fdm_backend1) - test_fdm_lazy_hessians(fdm_backend2) - test_fdm_lazy_hessians(fdm_backend3) + #@test_broken test_fdm_lazy_hessians(forwarddiff_backend1,testFDMbackend) + #@test_broken test_fdm_lazy_hessians(forwarddiff_backend2,testFDMbackend) end end end From c0c489aeb89d2c35eac3e97f2a4e7f288f7e09a3 Mon Sep 17 00:00:00 2001 From: Frank Schaefer Date: Mon, 2 Aug 2021 21:36:53 +0200 Subject: [PATCH 19/25] ForwardDiff and test updates --- src/AbstractDifferentiation.jl | 39 ++- test/runtests.jl | 554 +++++++++++++++------------------ 2 files changed, 280 insertions(+), 313 deletions(-) diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index 306fe65..643d60f 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -44,20 +44,13 @@ function gradient(ab::AbstractBackend, f, xs...) end function jacobian(ab::AbstractBackend, f, xs...) end function hessian(ab::AbstractBackend, f, xs...) - xss = collect((xs...,)) - counter = 0 - # gradient returns tuple of gradient values with respect to inputs x,y ∈ xs - # Hessian is the Jacobian of the individual gradients, i.e., the tuple of matrices - # defined by ∂x∂x f, ∂y∂y f, in the case of a scalar valued function `f`. - hess = map((xs...,)) do x - counter += 1 - # needs to be modified for forward mode AD; no method matching error from setindex! - _f = _x->f(setindex!(deepcopy(xss),_x,counter)...) - return jacobian(secondlowest(ab),(x,)-> begin - return gradient(lowest(ab), _f, x) - end, x)[1] - end - return hess + if xs isa Tuple + # only support computation of Hessian for functions with single input argument + @assert length(xs) == 1 + end + return jacobian(secondlowest(ab), (xs...,) -> begin + gradient(lowest(ab), f, xs...) + end, xs...) end function value_and_derivative(ab::AbstractBackend, f, xs::Number...) @@ -168,7 +161,10 @@ function value_and_pushforward_function( xs..., ) return (ds) -> begin - @assert ds isa Tuple && length(ds) == length(xs) + if !(ds isa Tuple) + ds = (ds,) + end + @assert length(ds) == length(xs) local value primalcalled = false if ab isa AbstractFiniteDifference @@ -183,6 +179,7 @@ function value_and_pushforward_function( end return vs end, xs...)(ds) + return value, pf end end @@ -268,10 +265,16 @@ function Base.:*(y, d::LazyDerivative) end function Base.:*(d::LazyDerivative, y::Union{Number,Tuple}) + if y isa Tuple && d.xs isa Tuple + @assert length(y) == length(d.xs) + end return derivative(d.backend, d.f, d.xs...) .* y end function Base.:*(y::Union{Number,Tuple}, d::LazyDerivative) + if y isa Tuple && d.xs isa Tuple + @assert length(y) == length(d.xs) + end return y .* derivative(d.backend, d.f, d.xs...) end @@ -293,6 +296,9 @@ Base.:*(d::LazyGradient, y) = gradient(d.backend, d.f, d.xs...) * y Base.:*(y, d::LazyGradient) = y * gradient(d.backend, d.f, d.xs...) function Base.:*(d::LazyGradient, y::Union{Number,Tuple}) + if y isa Tuple && d.xs isa Tuple + @assert length(y) == length(d.xs) + end if d.xs isa Tuple return gradient(d.backend, d.f, d.xs...) .* y else @@ -301,6 +307,9 @@ function Base.:*(d::LazyGradient, y::Union{Number,Tuple}) end function Base.:*(y::Union{Number,Tuple}, d::LazyGradient) + if y isa Tuple && d.xs isa Tuple + @assert length(y) == length(d.xs) + end if d.xs isa Tuple return y .* gradient(d.backend, d.f, d.xs...) else diff --git a/test/runtests.jl b/test/runtests.jl index 05e9df6..97f9687 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -14,7 +14,7 @@ FDMBackend1() = FDMBackend1(central_fdm(5, 1)) const fdm_backend1 = FDMBackend1() # Minimal interface AD.@primitive function jacobian(ab::FDMBackend1, f, xs...) - return jacobian(ab.alg, f, xs...) + return FDM.jacobian(ab.alg, f, xs...) end struct FDMBackend2{A} <: AD.AbstractFiniteDifference @@ -24,7 +24,7 @@ FDMBackend2() = FDMBackend2(central_fdm(5, 1)) const fdm_backend2 = FDMBackend2() AD.@primitive function pushforward_function(ab::FDMBackend2, f, xs...) return function (vs) - jvp(ab.alg, f, tuple.(xs, vs)...) + FDM.jvp(ab.alg, f, tuple.(xs, vs)...) end end @@ -37,10 +37,10 @@ AD.@primitive function pullback_function(ab::FDMBackend3, f, xs...) return function (vs) # Supports only single output if vs isa AbstractVector - return j′vp(ab.alg, f, vs, xs...) + return FDM.j′vp(ab.alg, f, vs, xs...) else @assert length(vs) == 1 - return j′vp(ab.alg, f, vs[1], xs...) + return FDM.j′vp(ab.alg, f, vs[1], xs...) end end @@ -51,51 +51,23 @@ end ## ForwardDiff struct ForwardDiffBackend1 <: AD.AbstractForwardMode end const forwarddiff_backend1 = ForwardDiffBackend1() -AD.@primitive function jacobian(ab::ForwardDiffBackend1, f, xs...) - if xs isa AbstractArray - return ForwardDiff.jacobian(f, xs) - elseif xs isa Number - return ForwardDiff.derivative(f, xs) - elseif xs isa Tuple - @assert length(xs) <= 2 - if eltype(xs) <: Number - # Can this be avoided for the derivative computation? It seems to defeat the purpose of AbstractDifferentiation. - # If eltype(xs) isa Number, ForwardDiff will error by saying that Jacobian expects an array. - # if we convert the number to an array in AbstractDifferentiation, we get a no method error due to log(x) and exp(x) - if length(xs) == 1 - (ForwardDiff.derivative(f, xs[1]),) - else - _f1 = x -> f(x,xs[2]) - _f2 = x -> f(xs[1], x) - (ForwardDiff.derivative(_f1, xs[1]),ForwardDiff.derivative(_f2, xs[2])) - end +AD.@primitive function jacobian(ab::ForwardDiffBackend1, f, xs) + if xs isa Number + return (ForwardDiff.derivative(f, xs),) + elseif xs isa AbstractArray + out = f(xs) + if out isa Number + return (adjoint(ForwardDiff.gradient(f, xs)),) else - if length(xs) == 1 - out = f(xs[1]) - if out isa Number - # Lazy Jacobian test pb fails otherwise with "dimensions must match: a has dims (Base.OneTo(1), Base.OneTo(5)), b has dims (Base.OneTo(5),), mismatch at 1" - (reshape(ForwardDiff.gradient(f, xs[1]),(1,length(xs[1]))),) - else - (ForwardDiff.jacobian(f, xs[1]),) - end - else - out = f(xs...) - _f1 = x -> f(x,xs[2]) - _f2 = x -> f(xs[1], x) - if out isa Number - # reshape for pullback tests (similar as above) - (reshape(ForwardDiff.gradient(_f1, xs[1]),(1,length(xs[1]))), - reshape(ForwardDiff.gradient(_f2, xs[2]), (1, length(xs[2])))) - else - (ForwardDiff.jacobian(_f1, xs[1]),ForwardDiff.jacobian(_f2, xs[2])) - end - - end - end + return (ForwardDiff.jacobian(f, xs),) + end + elseif xs isa Tuple + error(typeof(xs)) else error(typeof(xs)) end end +AD.primalvalue(::ForwardDiffBackend1, ::Any, f, xs) = ForwardDiff.value.(f(xs...)) struct ForwardDiffBackend2 <: AD.AbstractForwardMode end const forwarddiff_backend2 = ForwardDiffBackend2() @@ -114,6 +86,8 @@ AD.@primitive function pushforward_function(ab::ForwardDiffBackend2, f, xs...) end end end +AD.primalvalue(::ForwardDiffBackend2, ::Any, f, xs) = ForwardDiff.value.(f(xs...)) + ## @@ -128,7 +102,7 @@ dfgraddxdx(x, y) = prod(x)./(x*x') - Diagonal(diag(prod(x)./(x*x'))) dfgraddydy(x, y) = zeros(length(y),length(y)) function fjac(x, y) - x + Bidiagonal(-ones(length(y)) * 3, ones(length(y) - 1) / 2, :U) * y + x + -3*y + [y[2:end];zero(y[end])]/2# Bidiagonal(-ones(length(y)) * 3, ones(length(y) - 1) / 2, :U) * y end dfjacdx(x, y) = I(length(x)) dfjacdy(x, y) = Bidiagonal(-ones(length(y)) * 3, ones(length(y) - 1) / 2, :U) @@ -152,171 +126,177 @@ xvec2 = deepcopy(xvec) yvec2 = deepcopy(yvec) -function test_fdm_derivatives(backend, fdm_backend) - # fdm_backend for comparison with finite differences - der1 = AD.derivative(backend, fder, xscalar, yscalar) - der2 = ( - fdm_backend.alg(x -> fder(x, yscalar), xscalar), - fdm_backend.alg(y -> fder(xscalar, y), yscalar), - ) - @test minimum(isapprox.(der1, der2, rtol=1e-10)) - valscalar, der3 = AD.value_and_derivative(backend, fder, xscalar, yscalar) - @test valscalar == fder(xscalar, yscalar) - @test der3 .- der1 == (0, 0) +function test_derivatives(backend; multiple_inputs=true) + # test with respect to analytical solution der_exact = (dfderdx(xscalar,yscalar), dfderdy(xscalar,yscalar)) - @test minimum(isapprox.(der_exact, der1, rtol=1e-10)) + if multiple_inputs + der1 = AD.derivative(backend, fder, xscalar, yscalar) + @test minimum(isapprox.(der_exact, der1, rtol=1e-10)) + valscalar, der2 = AD.value_and_derivative(backend, fder, xscalar, yscalar) + @test valscalar == fder(xscalar, yscalar) + @test der2 .- der1 == (0, 0) + end # test if single input (no tuple works) valscalara, dera = AD.value_and_derivative(backend, x -> fder(x, yscalar), xscalar) valscalarb, derb = AD.value_and_derivative(backend, y -> fder(xscalar, y), yscalar) - @test valscalar == valscalara - @test valscalar == valscalarb - @test isapprox(dera[1], der1[1], rtol=1e-10) - @test isapprox(derb[1], der1[2], rtol=1e-10) + @test fder(xscalar, yscalar) == valscalara + @test fder(xscalar, yscalar) == valscalarb + @test isapprox(dera[1], der_exact[1], rtol=1e-10) + @test isapprox(derb[1], der_exact[2], rtol=1e-10) end -function test_fdm_gradients(backend, fdm_backend) - grad1 = AD.gradient(backend, fgrad, xvec, yvec) - grad2 = FDM.grad(fdm_backend.alg, fgrad, xvec, yvec) - @test minimum(isapprox.(grad1, grad2, rtol=1e-10)) - valscalar, grad3 = AD.value_and_gradient(backend, fgrad, xvec, yvec) - @test valscalar == fgrad(xvec, yvec) - @test norm.(grad3 .- grad1) == (0, 0) +function test_gradients(backend; multiple_inputs=true) + # test with respect to analytical solution grad_exact = (dfgraddx(xvec,yvec), dfgraddy(xvec,yvec)) - @test minimum(isapprox.(grad_exact, grad1, rtol=1e-10)) - @test xvec == xvec2 - @test yvec == yvec2 + if multiple_inputs + grad1 = AD.gradient(backend, fgrad, xvec, yvec) + @test minimum(isapprox.(grad_exact, grad1, rtol=1e-10)) + valscalar, grad2 = AD.value_and_gradient(backend, fgrad, xvec, yvec) + @test valscalar == fgrad(xvec, yvec) + @test norm.(grad2 .- grad1) == (0, 0) + end # test if single input (no tuple works) valscalara, grada = AD.value_and_gradient(backend, x -> fgrad(x, yvec), xvec) valscalarb, gradb = AD.value_and_gradient(backend, y -> fgrad(xvec, y), yvec) - @test valscalar == valscalara - @test valscalar == valscalarb - @test isapprox(grada[1], grad1[1], rtol=1e-10) - @test isapprox(gradb[1], grad1[2], rtol=1e-10) -end - -function test_fdm_jacobians(backend,fdm_backend) - jac1 = AD.jacobian(backend, fjac, xvec, yvec) - jac2 = FDM.jacobian(fdm_backend.alg, fjac, xvec, yvec) - @test minimum(isapprox.(jac1, jac2, rtol=1e-10)) - valvec, jac3 = AD.value_and_jacobian(backend, fjac, xvec, yvec) - @test valvec == fjac(xvec, yvec) - @test norm.(jac3 .- jac1) == (0, 0) - grad_exact = (dfjacdx(xvec, yvec), dfjacdy(xvec, yvec)) - @test minimum(isapprox.(grad_exact, jac1, rtol=1e-10)) + @test fgrad(xvec, yvec) == valscalara + @test fgrad(xvec, yvec) == valscalarb + @test isapprox(grada[1], grad_exact[1], rtol=1e-10) + @test isapprox(gradb[1], grad_exact[2], rtol=1e-10) @test xvec == xvec2 @test yvec == yvec2 +end + +function test_jacobians(backend; multiple_inputs=true) + # test with respect to analytical solution + jac_exact = (dfjacdx(xvec, yvec), dfjacdy(xvec, yvec)) + if multiple_inputs + jac1 = AD.jacobian(backend, fjac, xvec, yvec) + @test minimum(isapprox.(jac_exact, jac1, rtol=1e-10)) + valvec, jac2 = AD.value_and_jacobian(backend, fjac, xvec, yvec) + @test valvec == fjac(xvec, yvec) + @test norm.(jac2 .- jac1) == (0, 0) + end + # test if single input (no tuple works) valveca, jaca = AD.value_and_jacobian(backend, x -> fjac(x, yvec), xvec) valvecb, jacb = AD.value_and_jacobian(backend, y -> fjac(xvec, y), yvec) - @test valvec == valveca - @test valvec == valvecb - @test isapprox(jaca[1], jac1[1], rtol=1e-10) - @test isapprox(jacb[1], jac1[2], rtol=1e-10) + @test fjac(xvec, yvec) == valveca + @test fjac(xvec, yvec) == valvecb + @test isapprox(jaca[1], jac_exact[1], rtol=1e-10) + @test isapprox(jacb[1], jac_exact[2], rtol=1e-10) + @test xvec == xvec2 + @test yvec == yvec2 end -function test_fdm_hessians(backend, fdm_backend) - H1 = AD.hessian(backend, fgrad, xvec, yvec) - @test dfgraddxdx(xvec,yvec) ≈ H1[1] atol=1e-10 - @test dfgraddydy(xvec,yvec) ≈ H1[2] atol=1e-10 +function test_hessians(backend; multiple_inputs=false) + if multiple_inputs + # ... but + error("multiple_inputs=true is not supported.") + else + # explicit test that AbstractDifferentiation throws an error + # don't support tuple of Hessians + @test_throws AssertionError H1 = AD.hessian(backend, fgrad, xvec, yvec) + end + + # @test dfgraddxdx(xvec,yvec) ≈ H1[1] atol=1e-10 + # @test dfgraddydy(xvec,yvec) ≈ H1[2] atol=1e-10 # test if single input (no tuple works) fhess = x -> fgrad(x, yvec) hess1 = AD.hessian(backend, fhess, xvec) - hess2 = FDM.jacobian( - fdm_backend.alg, - (x) -> begin - FDM.grad( - fdm_backend.alg, - fhess, - x, - ) - end, - xvec, - ) - @test minimum(isapprox.(hess1, hess2, rtol=1e-10)) - valscalar, hess3 = AD.value_and_hessian(backend, fhess, xvec) + # test with respect to analytical solution + @test dfgraddxdx(xvec,yvec) ≈ hess1[1] atol=1e-10 + + valscalar, hess2 = AD.value_and_hessian(backend, fhess, xvec) @test valscalar == fgrad(xvec, yvec) - @test norm.(hess3 .- hess1) == (0,) - valscalar, grad, hess4 = AD.value_gradient_and_hessian(backend, fhess, xvec) + @test norm.(hess2 .- hess1) == (0,) + valscalar, grad, hess3 = AD.value_gradient_and_hessian(backend, fhess, xvec) @test valscalar == fgrad(xvec, yvec) @test norm.(grad .- AD.gradient(backend, fhess, xvec)) == (0,) - @test norm.(hess4 .- hess1) == (0,) - @test dfgraddxdx(xvec,yvec) ≈ hess1[1] atol=1e-10 + @test norm.(hess3 .- hess1) == (0,) + @test xvec == xvec2 @test yvec == yvec2 fhess2 = x-> dfgraddx(x, yvec) - hess5 = AD.jacobian(backend, fhess2, xvec) - @test minimum(isapprox.(hess5, hess1, atol=1e-10)) + hess4 = AD.jacobian(backend, fhess2, xvec) + @test minimum(isapprox.(hess4, hess1, atol=1e-10)) end -function test_fdm_jvp(backend,fdm_backend) +function test_jvp(backend; multiple_inputs=true) v = (rand(length(xvec)), rand(length(yvec))) - if backend isa Union{FDMBackend2,ForwardDiffBackend2} # augmented version of v - identity_like = AD.identity_matrix_like(v) - vaug = map(identity_like) do identity_like_i - identity_like_i .* v - end + if multiple_inputs + if backend isa Union{FDMBackend2,ForwardDiffBackend2} # augmented version of v + identity_like = AD.identity_matrix_like(v) + vaug = map(identity_like) do identity_like_i + identity_like_i .* v + end - pf1 = map(v->AD.pushforward_function(backend, fjac, xvec, yvec)(v), vaug) - ((valvec1, pf3x), (valvec2, pf3y)) = map(v->AD.value_and_pushforward_function(backend, fjac, xvec, yvec)(v), vaug) - else - pf1 = AD.pushforward_function(backend, fjac, xvec, yvec)(v) - valvec, pf3 = AD.value_and_pushforward_function(backend, fjac, xvec, yvec)(v) - ((valvec1, pf3x), (valvec2, pf3y)) = (valvec, pf3[1]), (valvec, pf3[2]) + pf1 = map(v->AD.pushforward_function(backend, fjac, xvec, yvec)(v), vaug) + ((valvec1, pf2x), (valvec2, pf2y)) = map(v->AD.value_and_pushforward_function(backend, fjac, xvec, yvec)(v), vaug) + else + pf1 = AD.pushforward_function(backend, fjac, xvec, yvec)(v) + valvec, pf2 = AD.value_and_pushforward_function(backend, fjac, xvec, yvec)(v) + ((valvec1, pf2x), (valvec2, pf2y)) = (valvec, pf2[1]), (valvec, pf2[2]) + end + + @test valvec1 == fjac(xvec, yvec) + @test valvec2 == fjac(xvec, yvec) + @test norm.((pf2x,pf2y) .- pf1) == (0, 0) + # test with respect to analytical solution + @test minimum(isapprox.(pf1, (jxvp(xvec,yvec,v[1]), jyvp(xvec,yvec,v[2])), atol=1e-10)) + @test xvec == xvec2 + @test yvec == yvec2 end - pf2 = ( - FDM.jvp(fdm_backend.alg, x -> fjac(x, yvec), (xvec, v[1])), - FDM.jvp(fdm_backend.alg, y -> fjac(xvec, y), (yvec, v[2])), - ) - @test minimum(isapprox.(pf1, pf2, rtol=1e-10)) + valvec1, pf1 = AD.value_and_pushforward_function(backend, x -> fjac(x, yvec), xvec)(v[1]) + valvec2, pf2 = AD.value_and_pushforward_function(backend, y -> fjac(xvec, y), yvec)(v[2]) + + if backend isa Union{FDMBackend2} + pf1 = (pf1,) + pf2 = (pf2,) + end @test valvec1 == fjac(xvec, yvec) @test valvec2 == fjac(xvec, yvec) - @test norm.((pf3x,pf3y) .- pf1) == (0, 0) - @test minimum(isapprox.(pf1, (jxvp(xvec,yvec,v[1]), jyvp(xvec,yvec,v[2])), atol=1e-10)) - @test xvec == xvec2 - @test yvec == yvec2 + @test minimum(isapprox.((pf1[1],pf2[1]), (jxvp(xvec,yvec,v[1]), jyvp(xvec,yvec,v[2])), atol=1e-10)) end -function test_fdm_j′vp(backend,fdm_backend) +function test_j′vp(backend; multiple_inputs=true) + # test with respect to analytical solution w = rand(length(fjac(xvec, yvec))) - pb1 = AD.pullback_function(backend, fjac, xvec, yvec)(w) - pb2 = FDM.j′vp(fdm_backend.alg, fjac, w, xvec, yvec) - @test all(norm.(pb1 .- pb2) .<= (1e-10, 1e-10)) - valvec, pb3 = AD.value_and_pullback_function(backend, fjac, xvec, yvec)(w) - @test valvec == fjac(xvec, yvec) - @test norm.(pb3 .- pb1) == (0, 0) - @test minimum(isapprox.(pb1, (vJxp(xvec,yvec,w), vJyp(xvec,yvec,w)), atol=1e-10)) - @test xvec == xvec2 - @test yvec == yvec2 + if multiple_inputs + pb1 = AD.pullback_function(backend, fjac, xvec, yvec)(w) + valvec, pb2 = AD.value_and_pullback_function(backend, fjac, xvec, yvec)(w) + @test valvec == fjac(xvec, yvec) + @test norm.(pb2 .- pb1) == (0, 0) + @test minimum(isapprox.(pb1, (vJxp(xvec,yvec,w), vJyp(xvec,yvec,w)), atol=1e-10)) + @test xvec == xvec2 + @test yvec == yvec2 + end + + valvec1, pb1 = AD.value_and_pullback_function(backend, x -> fjac(x, yvec), xvec)(w) + valvec2, pb2 = AD.value_and_pullback_function(backend, y -> fjac(xvec, y), yvec)(w) + @test valvec1 == fjac(xvec, yvec) + @test valvec2 == fjac(xvec, yvec) + @test minimum(isapprox.((pb1[1],pb2[1]), (vJxp(xvec,yvec,w), vJyp(xvec,yvec,w)), atol=1e-10)) end -function test_fdm_lazy_derivatives(backend,fdm_backend) +function test_lazy_derivatives(backend; multiple_inputs=true) # single input function der1 = AD.derivative(backend, x->fder(x, yscalar), xscalar) - der2 = ( - fdm_backend.alg(x -> fder(x, yscalar), xscalar), - fdm_backend.alg(y -> fder(xscalar, y), yscalar), - ) - lazyder = AD.LazyDerivative(backend, x->fder(x, yscalar), xscalar) # multiplication with scalar - @test isapprox(der1[1]*yscalar, der2[1]*yscalar, atol=1e-10) @test lazyder*yscalar == der1.*yscalar @test lazyder*yscalar isa Tuple - @test isapprox(yscalar*der1[1], yscalar*der2[1], atol=1e-10) @test yscalar*lazyder == yscalar.*der1 @test yscalar*lazyder isa Tuple # multiplication with array - @test isapprox(der1[1]*yvec, der2[1]*yvec, atol=1e-10) @test lazyder*yvec == (der1.*yvec,) @test lazyder*yvec isa Tuple - @test isapprox(yvec*der1[1], yvec*der2[1], atol=1e-10) @test yvec*lazyder == (yvec.*der1,) @test yvec*lazyder isa Tuple @@ -328,52 +308,42 @@ function test_fdm_lazy_derivatives(backend,fdm_backend) @test (yvec,)*lazyder == yvec*lazyder # two input function - der1 = AD.derivative(backend, fder, xscalar, yscalar) - der2 = ( - fdm_backend.alg(x -> fder(x, yscalar), xscalar), - fdm_backend.alg(y -> fder(xscalar, y), yscalar), - ) + if multiple_inputs + der1 = AD.derivative(backend, fder, xscalar, yscalar) + lazyder = AD.LazyDerivative(backend, fder, (xscalar, yscalar)) - lazyder = AD.LazyDerivative(backend, fder, (xscalar, yscalar)) + # multiplication with scalar + @test lazyder*yscalar == der1.*yscalar + @test lazyder*yscalar isa Tuple - # multiplication with scalar - @test minimum(isapprox.(der1.*yscalar, der2.*yscalar, atol=1e-10)) - @test lazyder*yscalar == der1.*yscalar - @test lazyder*yscalar isa Tuple + @test yscalar*lazyder == yscalar.*der1 + @test yscalar*lazyder isa Tuple - @test minimum(isapprox.(yscalar.*der1, yscalar.*der2, atol=1e-10)) - @test yscalar*lazyder == yscalar.*der1 - @test yscalar*lazyder isa Tuple + # multiplication with array + @test lazyder*yvec == (der1[1]*yvec, der1[2]*yvec) + @test lazyder*yvec isa Tuple - # multiplication with array - @test minimum(isapprox.((der1[1]*yvec, der1[2]*yvec), (der2[1]*yvec, der2[2]*yvec), atol=1e-10)) - @test lazyder*yvec == (der1[1]*yvec, der1[2]*yvec) - @test lazyder*yvec isa Tuple + @test yvec*lazyder == (yvec*der1[1], yvec*der1[2]) + @test lazyder*yvec isa Tuple - @test minimum(isapprox.((yvec*der1[1], yvec*der1[2]), (yvec*der2[1], yvec*der2[2]), atol=1e-10)) - @test yvec*lazyder == (yvec*der1[1], yvec*der1[2]) - @test lazyder*yvec isa Tuple + # multiplication with tuple + @test_throws AssertionError lazyder*(yscalar,) + @test_throws AssertionError lazyder*(yvec,) - # multiplication with tuple - @test lazyder*(yscalar,) == lazyder*yscalar - @test lazyder*(yvec,) == lazyder*yvec - - @test (yscalar,)*lazyder == yscalar*lazyder - @test (yvec,)*lazyder == yvec*lazyder + @test_throws AssertionError (yscalar,)*lazyder + @test_throws AssertionError (yvec,)*lazyder + end end -function test_fdm_lazy_gradients(backend,fdm_backend) +function test_lazy_gradients(backend; multiple_inputs=true) # single input function grad1 = AD.gradient(backend, x->fgrad(x, yvec), xvec) - grad2 = FDM.grad(fdm_backend.alg, x->fgrad(x, yvec), xvec) lazygrad = AD.LazyGradient(backend, x->fgrad(x, yvec), xvec) # multiplication with scalar - @test minimum(isapprox.(grad1.*yscalar, grad2.*yscalar, atol=1e-10)) @test norm.(lazygrad*yscalar .- grad1.*yscalar) == (0,) @test lazygrad*yscalar isa Tuple - @test minimum(isapprox.(yscalar.*grad1, yscalar.*grad2, atol=1e-10)) @test norm.(yscalar*lazygrad .- yscalar.*grad1) == (0,) @test yscalar*lazygrad isa Tuple @@ -382,96 +352,87 @@ function test_fdm_lazy_gradients(backend,fdm_backend) @test (yscalar,)*lazygrad == yscalar*lazygrad # two input function - grad1 = AD.gradient(backend, fgrad, xvec, yvec) - grad2 = FDM.grad(fdm_backend.alg, fgrad, xvec, yvec) - lazygrad = AD.LazyGradient(backend, fgrad, (xvec, yvec)) + if multiple_inputs + grad1 = AD.gradient(backend, fgrad, xvec, yvec) + lazygrad = AD.LazyGradient(backend, fgrad, (xvec, yvec)) - # multiplication with scalar - @test minimum(isapprox.(grad1.*yscalar, grad2.*yscalar, atol=1e-10)) - @test norm.(lazygrad*yscalar .- grad1.*yscalar) == (0,0) - @test lazygrad*yscalar isa Tuple + # multiplication with scalar + @test norm.(lazygrad*yscalar .- grad1.*yscalar) == (0,0) + @test lazygrad*yscalar isa Tuple - @test minimum(isapprox.(yscalar.*grad1, yscalar.*grad2, atol=1e-10)) - @test norm.(yscalar*lazygrad .- yscalar.*grad1) == (0,0) - @test yscalar*lazygrad isa Tuple + @test norm.(yscalar*lazygrad .- yscalar.*grad1) == (0,0) + @test yscalar*lazygrad isa Tuple - # multiplication with tuple - @test lazygrad*(yscalar,) == lazygrad*yscalar - @test (yscalar,)*lazygrad == yscalar*lazygrad + # multiplication with tuple + @test_throws AssertionError lazygrad*(yscalar,) == lazygrad*yscalar + @test_throws AssertionError (yscalar,)*lazygrad == yscalar*lazygrad + end end -function test_fdm_lazy_jacobians(backend,fdm_backend) +function test_lazy_jacobians(backend; multiple_inputs=true) # single input function jac1 = AD.jacobian(backend, x->fjac(x, yvec), xvec) - jac2 = FDM.jacobian(fdm_backend.alg, x->fjac(x, yvec), xvec) lazyjac = AD.LazyJacobian(backend, x->fjac(x, yvec), xvec) # multiplication with scalar - @test minimum(isapprox.(jac1.*yscalar, jac2.*yscalar, atol=1e-10)) @test norm.(lazyjac*yscalar .- jac1.*yscalar) == (0,) @test lazyjac*yscalar isa Tuple - @test minimum(isapprox.(yscalar.*jac1, yscalar.*jac2, atol=1e-10)) @test norm.(yscalar*lazyjac .- yscalar.*jac1) == (0,) @test yscalar*lazyjac isa Tuple - w = adjoint(rand(length(fjac(xvec, yvec)))) + w = rand(length(fjac(xvec, yvec))) v = (rand(length(xvec)),rand(length(xvec))) # vjp - pb1 = FDM.j′vp(fdm_backend.alg, x -> fjac(x, yvec), w, xvec) - res = w*lazyjac + pb1 = (vJxp(xvec,yvec,w),) + res = w'*lazyjac @test minimum(isapprox.(pb1, res, atol=1e-10)) @test res isa Tuple # jvp - pf1 = (FDM.jvp(fdm_backend.alg, x -> fjac(x, yvec), (xvec, v[1])),) + pf1 = (jxvp(xvec,yvec,v[1]),) res = lazyjac*v[1] @test minimum(isapprox.(pf1, res, atol=1e-10)) @test res isa Tuple # two input function - jac1 = AD.jacobian(backend, fjac, xvec, yvec) - jac2 = FDM.jacobian(fdm_backend.alg, fjac, xvec, yvec) - lazyjac = AD.LazyJacobian(backend, fjac, (xvec, yvec)) - - # multiplication with scalar - @test minimum(isapprox.(jac1.*yscalar, jac2.*yscalar, atol=1e-10)) - @test norm.(lazyjac*yscalar .- jac1.*yscalar) == (0,0) - @test lazyjac*yscalar isa Tuple - - @test minimum(isapprox.(yscalar.*jac1, yscalar.*jac2, atol=1e-10)) - @test norm.(yscalar*lazyjac .- yscalar.*jac1) == (0,0) - @test yscalar*lazyjac isa Tuple - - # vjp - pb1 = FDM.j′vp(fdm_backend.alg, fjac, w, xvec, yvec) - res = w*lazyjac - @test minimum(isapprox.(pb1, res, atol=1e-10)) - @test res isa Tuple + if multiple_inputs + jac1 = AD.jacobian(backend, fjac, xvec, yvec) + lazyjac = AD.LazyJacobian(backend, fjac, (xvec, yvec)) + + # multiplication with scalar + @test norm.(lazyjac*yscalar .- jac1.*yscalar) == (0,0) + @test lazyjac*yscalar isa Tuple + + @test norm.(yscalar*lazyjac .- yscalar.*jac1) == (0,0) + @test yscalar*lazyjac isa Tuple + + # vjp + pb1 = (vJxp(xvec,yvec,w), vJyp(xvec,yvec,w)) + res = w'lazyjac + @test minimum(isapprox.(pb1, res, atol=1e-10)) + @test res isa Tuple + + # jvp + pf1 = (jxvp(xvec,yvec,v[1]), jyvp(xvec,yvec,v[2])) + + if backend isa Union{FDMBackend2,ForwardDiffBackend2} # augmented version of v + identity_like = AD.identity_matrix_like(v) + vaug = map(identity_like) do identity_like_i + identity_like_i .* v + end - # jvp - pf1 = ( - FDM.jvp(fdm_backend.alg, x -> fjac(x, yvec), (xvec, v[1])), - FDM.jvp(fdm_backend.alg, y -> fjac(xvec, y), (yvec, v[2])), - ) - - if backend isa Union{FDMBackend2,ForwardDiffBackend2} # augmented version of v - identity_like = AD.identity_matrix_like(v) - vaug = map(identity_like) do identity_like_i - identity_like_i .* v + res = map(v->(lazyjac*v)[1], vaug) + else + res = lazyjac*v end - - res = map(v->(lazyjac*v)[1], vaug) - else - res = lazyjac*v + @test minimum(isapprox.(pf1, res, atol=1e-10)) + @test res isa Tuple end - - @test minimum(isapprox.(pf1, res, atol=1e-10)) - @test res isa Tuple end -function test_fdm_lazy_hessians(backend,fdm_backend) +function test_lazy_hessians(backend; multiple_inputs=true) # fdm_backend not used here yet.. # single input function fhess = x -> fgrad(x, yvec) @@ -486,7 +447,7 @@ function test_fdm_lazy_hessians(backend,fdm_backend) @test minimum(isapprox.(yscalar*lazyhess, yscalar.*hess1, atol=1e-10)) @test yscalar*lazyhess isa Tuple - w = adjoint(rand(length(xvec))) + w = rand(length(xvec)) v = rand(length(xvec)) # Hvp @@ -496,109 +457,106 @@ function test_fdm_lazy_hessians(backend,fdm_backend) @test res isa Tuple # H′vp - wH = map(h->h'*adjoint(w), hess1) - res = w*lazyhess + wH = map(h->h'*w, hess1) + res = w'*lazyhess @test minimum(isapprox.(wH, res, atol=1e-10)) @test res isa Tuple end @testset "AbstractDifferentiation.jl" begin - testFDMbackend = fdm_backend1 @testset "FiniteDifferences" begin @testset "Derivative" begin - test_fdm_derivatives(fdm_backend1,testFDMbackend) - test_fdm_derivatives(fdm_backend2,testFDMbackend) - test_fdm_derivatives(fdm_backend3,testFDMbackend) + test_derivatives(fdm_backend1) + test_derivatives(fdm_backend2) + test_derivatives(fdm_backend3) end @testset "Gradient" begin - test_fdm_gradients(fdm_backend1,testFDMbackend) - test_fdm_gradients(fdm_backend2,testFDMbackend) - test_fdm_gradients(fdm_backend3,testFDMbackend - ) + test_gradients(fdm_backend1) + test_gradients(fdm_backend2) + test_gradients(fdm_backend3) end @testset "Jacobian" begin - test_fdm_jacobians(fdm_backend1,testFDMbackend) - test_fdm_jacobians(fdm_backend2,testFDMbackend) - test_fdm_jacobians(fdm_backend3,testFDMbackend) + test_jacobians(fdm_backend1) + test_jacobians(fdm_backend2) + test_jacobians(fdm_backend3) end @testset "Hessian" begin - # Works but super slow - test_fdm_hessians(fdm_backend1,testFDMbackend) - test_fdm_hessians(fdm_backend2,testFDMbackend) - test_fdm_hessians(fdm_backend3,testFDMbackend) + test_hessians(fdm_backend1) + test_hessians(fdm_backend2) + test_hessians(fdm_backend3) end @testset "jvp" begin - test_fdm_jvp(fdm_backend1,testFDMbackend) - test_fdm_jvp(fdm_backend2,testFDMbackend) - test_fdm_jvp(fdm_backend3,testFDMbackend) + test_jvp(fdm_backend1) + test_jvp(fdm_backend2) + test_jvp(fdm_backend3) end @testset "j′vp" begin - test_fdm_j′vp(fdm_backend1,testFDMbackend) - test_fdm_j′vp(fdm_backend2,testFDMbackend) - test_fdm_j′vp(fdm_backend3,testFDMbackend) + test_j′vp(fdm_backend1) + test_j′vp(fdm_backend2) + test_j′vp(fdm_backend3) end @testset "Lazy Derivative" begin - test_fdm_lazy_derivatives(fdm_backend1,testFDMbackend) - test_fdm_lazy_derivatives(fdm_backend2,testFDMbackend) - test_fdm_lazy_derivatives(fdm_backend3,testFDMbackend) + test_lazy_derivatives(fdm_backend1) + test_lazy_derivatives(fdm_backend2) + test_lazy_derivatives(fdm_backend3) end @testset "Lazy Gradient" begin - test_fdm_lazy_gradients(fdm_backend1,testFDMbackend) - test_fdm_lazy_gradients(fdm_backend2,testFDMbackend) - test_fdm_lazy_gradients(fdm_backend3,testFDMbackend) + test_lazy_gradients(fdm_backend1) + test_lazy_gradients(fdm_backend2) + test_lazy_gradients(fdm_backend3) end @testset "Lazy Jacobian" begin - test_fdm_lazy_jacobians(fdm_backend1,testFDMbackend) - test_fdm_lazy_jacobians(fdm_backend2,testFDMbackend) - test_fdm_lazy_jacobians(fdm_backend3,testFDMbackend) + test_lazy_jacobians(fdm_backend1) + test_lazy_jacobians(fdm_backend2) + test_lazy_jacobians(fdm_backend3) end @testset "Lazy Hessian" begin - test_fdm_lazy_hessians(fdm_backend1,testFDMbackend) - test_fdm_lazy_hessians(fdm_backend2,testFDMbackend) - test_fdm_lazy_hessians(fdm_backend3,testFDMbackend) + test_lazy_hessians(fdm_backend1) + test_lazy_hessians(fdm_backend2) + test_lazy_hessians(fdm_backend3) end end @testset "ForwardDiff" begin @testset "Derivative" begin - test_fdm_derivatives(forwarddiff_backend1,testFDMbackend) - test_fdm_derivatives(forwarddiff_backend2,testFDMbackend) + test_derivatives(forwarddiff_backend1; multiple_inputs=false) + test_derivatives(forwarddiff_backend2) end @testset "Gradient" begin - test_fdm_gradients(forwarddiff_backend1,testFDMbackend) - test_fdm_gradients(forwarddiff_backend2,testFDMbackend) + test_gradients(forwarddiff_backend1; multiple_inputs=false) + test_gradients(forwarddiff_backend2) end @testset "Jacobian" begin - test_fdm_jacobians(forwarddiff_backend1,testFDMbackend) - test_fdm_jacobians(forwarddiff_backend2,testFDMbackend) + test_jacobians(forwarddiff_backend1; multiple_inputs=false) + test_jacobians(forwarddiff_backend2) end @testset "Hessian" begin - # Fails due to setindex! in AbstractDifferentiation Hessian function - @test_broken test_fdm_hessians(forwarddiff_backend1,testFDMbackend) - @test_broken test_fdm_hessians(forwarddiff_backend2,testFDMbackend) + #test_hessians(forwarddiff_backend1; multiple_inputs=false) + #test_hessians(forwarddiff_backend2) end @testset "jvp" begin - test_fdm_jvp(forwarddiff_backend1,testFDMbackend) - test_fdm_jvp(forwarddiff_backend2,testFDMbackend) + test_jvp(forwarddiff_backend1; multiple_inputs=false) + test_jvp(forwarddiff_backend2) end @testset "j′vp" begin - test_fdm_j′vp(forwarddiff_backend1,testFDMbackend) - test_fdm_j′vp(forwarddiff_backend2,testFDMbackend) + test_j′vp(forwarddiff_backend1; multiple_inputs=false) + test_j′vp(forwarddiff_backend2) end @testset "Lazy Derivative" begin - test_fdm_lazy_derivatives(forwarddiff_backend1,testFDMbackend) - test_fdm_lazy_derivatives(forwarddiff_backend2,testFDMbackend) + test_lazy_derivatives(forwarddiff_backend1; multiple_inputs=false) + test_lazy_derivatives(forwarddiff_backend2) end @testset "Lazy Gradient" begin - test_fdm_lazy_gradients(forwarddiff_backend1,testFDMbackend) - test_fdm_lazy_gradients(forwarddiff_backend2,testFDMbackend) + test_lazy_gradients(forwarddiff_backend1; multiple_inputs=false) + test_lazy_gradients(forwarddiff_backend2) end @testset "Lazy Jacobian" begin - test_fdm_lazy_jacobians(forwarddiff_backend1,testFDMbackend) - test_fdm_lazy_jacobians(forwarddiff_backend2,testFDMbackend) + test_lazy_jacobians(forwarddiff_backend1; multiple_inputs=false) + test_lazy_jacobians(forwarddiff_backend2) end @testset "Lazy Hessian" begin - #@test_broken test_fdm_lazy_hessians(forwarddiff_backend1,testFDMbackend) - #@test_broken test_fdm_lazy_hessians(forwarddiff_backend2,testFDMbackend) + #test_lazy_hessians(forwarddiff_backend1; multiple_inputs=false) + #test_lazy_hessians(forwarddiff_backend2) end end end + From 0135125db9214bb9bd2ab47897a496c2fd115e45 Mon Sep 17 00:00:00 2001 From: Frank Schaefer Date: Tue, 3 Aug 2021 10:53:03 +0200 Subject: [PATCH 20/25] add Zygote tests --- Project.toml | 3 ++- test/runtests.jl | 65 +++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 63 insertions(+), 5 deletions(-) diff --git a/Project.toml b/Project.toml index 0ca1a6e..3033ef9 100644 --- a/Project.toml +++ b/Project.toml @@ -15,6 +15,7 @@ FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["Test", "FiniteDifferences", "ForwardDiff", "Random"] +test = ["Test", "FiniteDifferences", "ForwardDiff", "Random", "Zygote"] diff --git a/test/runtests.jl b/test/runtests.jl index 97f9687..d2ec67a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,7 @@ using AbstractDifferentiation using Test, FiniteDifferences, LinearAlgebra using ForwardDiff +using Zygote using Random Random.seed!(1234) @@ -87,9 +88,33 @@ AD.@primitive function pushforward_function(ab::ForwardDiffBackend2, f, xs...) end end AD.primalvalue(::ForwardDiffBackend2, ::Any, f, xs) = ForwardDiff.value.(f(xs...)) +## +## Zygote +struct ZygoteBackend1 <: AD.AbstractReverseMode end +const zygote_backend1 = ZygoteBackend1() +AD.@primitive function pullback_function(ab::ZygoteBackend1, f, xs...) + return function (vs) + # Supports only single output + _, back = Zygote.pullback(f, xs...) + if vs isa AbstractVector + back(vs) + else + @assert length(vs) == 1 + back(vs[1]) + end + end +end ## +struct ZygoteBackend <: AD.AbstractReverseMode end +AD.@primitive function pullback_function(ab::ZygoteBackend, f, xs...) + return function (vs) + # Supports only single output and tuple as input + _, back = Zygote.pullback(f, xs...) + back(vs[1]) + end +end fder(x, y) = exp(y) * x + y * log(x) dfderdx(x, y) = exp(y) + y * 1/x @@ -530,8 +555,8 @@ end test_jacobians(forwarddiff_backend2) end @testset "Hessian" begin - #test_hessians(forwarddiff_backend1; multiple_inputs=false) - #test_hessians(forwarddiff_backend2) + test_hessians(forwarddiff_backend1; multiple_inputs=false) + test_hessians(forwarddiff_backend2) end @testset "jvp" begin test_jvp(forwarddiff_backend1; multiple_inputs=false) @@ -554,8 +579,40 @@ end test_lazy_jacobians(forwarddiff_backend2) end @testset "Lazy Hessian" begin - #test_lazy_hessians(forwarddiff_backend1; multiple_inputs=false) - #test_lazy_hessians(forwarddiff_backend2) + test_lazy_hessians(forwarddiff_backend1; multiple_inputs=false) + test_lazy_hessians(forwarddiff_backend2) + end + end + @testset "Zygote" begin + @testset "Derivative" begin + test_derivatives(zygote_backend1) + end + @testset "Gradient" begin + test_gradients(zygote_backend1) + end + @testset "Jacobian" begin + test_jacobians(zygote_backend1) + end + @testset "Hessian" begin + test_hessians(zygote_backend1) + end + @testset "jvp" begin + test_jvp(zygote_backend1) + end + @testset "j′vp" begin + test_j′vp(zygote_backend1) + end + @testset "Lazy Derivative" begin + test_lazy_derivatives(zygote_backend1) + end + @testset "Lazy Gradient" begin + test_lazy_gradients(zygote_backend1) + end + @testset "Lazy Jacobian" begin + test_lazy_jacobians(zygote_backend1) + end + @testset "Lazy Hessian" begin + test_lazy_hessians(zygote_backend1) end end end From 28da613bd379e67ddfb1eb357dbe6a55adca6c0f Mon Sep 17 00:00:00 2001 From: Frank Schaefer Date: Wed, 4 Aug 2021 14:16:14 +0200 Subject: [PATCH 21/25] fix ForwardDiff --- src/AbstractDifferentiation.jl | 38 +++++++++++++++++++++------------- test/runtests.jl | 10 --------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index 643d60f..a5673a9 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -47,10 +47,11 @@ function hessian(ab::AbstractBackend, f, xs...) if xs isa Tuple # only support computation of Hessian for functions with single input argument @assert length(xs) == 1 + xs = xs[1] end - return jacobian(secondlowest(ab), (xs...,) -> begin - gradient(lowest(ab), f, xs...) - end, xs...) + return jacobian(secondlowest(ab), xs -> begin + gradient(lowest(ab), f, xs)[1] # gradient returns a tuple + end, xs) end function value_and_derivative(ab::AbstractBackend, f, xs::Number...) @@ -92,7 +93,7 @@ function value_and_hessian(ab::AbstractBackend, f, xs...) value = primalvalue(ab, v, f, xs) primalcalled = true end - return g + return g[1] # gradient returns a tuple end, xs...) return value, hess end @@ -105,7 +106,7 @@ function value_and_hessian(ab::HigherOrderBackend, f, xs...) value = primalvalue(ab, v, f, xs) primalcalled = true end - return g + return g[1] # gradient returns a tuple end, xs...) return value, hess end @@ -118,9 +119,9 @@ function value_gradient_and_hessian(ab::AbstractBackend, f, xs...) value = primalvalue(secondlowest(ab), v, f, xs) primalcalled = true end - return g + return g[1] # gradient returns a tuple end, xs...) - return value, grads, hess + return value, (grads,) , hess end function value_gradient_and_hessian(ab::HigherOrderBackend, f, xs...) local value @@ -131,9 +132,9 @@ function value_gradient_and_hessian(ab::HigherOrderBackend, f, xs...) value = primalvalue(secondlowest(ab), v, f, xs) primalcalled = true end - return g + return g[1] # gradient returns a tuple end, xs...) - return value, grads, hess + return value, (grads,) , hess end function pushforward_function( @@ -244,7 +245,11 @@ function value_and_pullback_function( value = primalvalue(lowest(ab), vs, f, xs) primalcalled = true end - return vs + if vs isa Tuple + return vs[1] + else + return vs + end end, xs...)(ws) return value, pb end @@ -382,13 +387,18 @@ function Base.:*(d::LazyHessian, ys) end if d.xs isa Tuple - return pushforward_function( + res = pushforward_function( secondlowest(d.backend), - (xs...,) -> gradient(lowest(d.backend), d.f, xs...), d.xs...,)(ys) + (xs...,) -> gradient(lowest(d.backend), d.f, xs...)[1], d.xs...,)(ys) # [1] because gradient returns a tuple else - return pushforward_function( + res = pushforward_function( secondlowest(d.backend), - (xs,) -> gradient(lowest(d.backend), d.f, xs),d.xs,)(ys) + (xs,) -> gradient(lowest(d.backend), d.f, xs)[1],d.xs,)(ys) # gradient returns a tuple + end + if res isa Tuple + return res + else + return (res,) end end diff --git a/test/runtests.jl b/test/runtests.jl index d2ec67a..8432230 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -107,15 +107,6 @@ AD.@primitive function pullback_function(ab::ZygoteBackend1, f, xs...) end ## -struct ZygoteBackend <: AD.AbstractReverseMode end -AD.@primitive function pullback_function(ab::ZygoteBackend, f, xs...) - return function (vs) - # Supports only single output and tuple as input - _, back = Zygote.pullback(f, xs...) - back(vs[1]) - end -end - fder(x, y) = exp(y) * x + y * log(x) dfderdx(x, y) = exp(y) + y * 1/x dfderdy(x, y) = exp(y) * x + log(x) @@ -616,4 +607,3 @@ end end end end - From 9d0f27e65a920a5a922ddbfb6c3595ea27c4f9ef Mon Sep 17 00:00:00 2001 From: Frank Schaefer Date: Wed, 4 Aug 2021 15:48:57 +0200 Subject: [PATCH 22/25] use Higher order backend --- src/AbstractDifferentiation.jl | 10 +++++++++- test/runtests.jl | 30 ++++++++++++++++++++++++++++-- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index a5673a9..1ad7fd5 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -16,7 +16,11 @@ struct HigherOrderBackend{B} <: AbstractBackend end reduceorder(b::AbstractBackend) = b function reduceorder(b::HigherOrderBackend) - return HigherOrderBackend(reverse(Base.tail(reverse(b.backends)))) + if length(b.backends)==1 + return lowest(b) # prevent zero tuple and subsequent error when reducing over HigherOrderBackend + else + return HigherOrderBackend(reverse(Base.tail(reverse(b.backends)))) + end end lowest(b::AbstractBackend) = b lowest(b::HigherOrderBackend) = b.backends[end] @@ -43,6 +47,10 @@ function gradient(ab::AbstractBackend, f, xs...) return reshape.(adjoint.(jacobian(lowest(ab), f, xs...)),size.(xs)) end function jacobian(ab::AbstractBackend, f, xs...) end +function jacobian(ab::HigherOrderBackend, f, xs...) + jacobian(lowest(ab), f, xs...) +end + function hessian(ab::AbstractBackend, f, xs...) if xs isa Tuple # only support computation of Hessian for functions with single input argument diff --git a/test/runtests.jl b/test/runtests.jl index 8432230..aba4c0e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -141,6 +141,17 @@ const yvec = rand(5) xvec2 = deepcopy(xvec) yvec2 = deepcopy(yvec) +function test_higher_order_backend(backends...) + ADbackends = AD.HigherOrderBackend(backends) + @test backends[end] == AD.lowest(ADbackends) + @test backends[end-1] == AD.secondlowest(ADbackends) + + for i in length(backends):-1:1 + @test backends[i] == AD.lowest(ADbackends) + ADbackends = AD.reduceorder(ADbackends) + end + backends[1] == AD.reduceorder(ADbackends) +end function test_derivatives(backend; multiple_inputs=true) # test with respect to analytical solution @@ -480,6 +491,9 @@ function test_lazy_hessians(backend; multiple_inputs=true) end @testset "AbstractDifferentiation.jl" begin + @testset "Utils" begin + test_higher_order_backend(fdm_backend1, fdm_backend2, fdm_backend3, zygote_backend1, forwarddiff_backend2) + end @testset "FiniteDifferences" begin @testset "Derivative" begin test_derivatives(fdm_backend1) @@ -585,7 +599,11 @@ end test_jacobians(zygote_backend1) end @testset "Hessian" begin - test_hessians(zygote_backend1) + # Zygote over Zygote problems + backends = AD.HigherOrderBackend((forwarddiff_backend2,zygote_backend1)) + test_hessians(backends) + backends = AD.HigherOrderBackend((zygote_backend1,forwarddiff_backend1)) + test_hessians(backends) end @testset "jvp" begin test_jvp(zygote_backend1) @@ -603,7 +621,15 @@ end test_lazy_jacobians(zygote_backend1) end @testset "Lazy Hessian" begin - test_lazy_hessians(zygote_backend1) + # Zygote over Zygote problems + backends = AD.HigherOrderBackend((forwarddiff_backend2,zygote_backend1)) + test_lazy_hessians(backends) + backends = AD.HigherOrderBackend((zygote_backend1,forwarddiff_backend1)) + test_lazy_hessians(backends) end end end + + +backends = AD.HigherOrderBackend((zygote_backend1,forwarddiff_backend2)) +test_hessians(backends) \ No newline at end of file From eca9fda2975acae9976d2dee23537f6648da93d9 Mon Sep 17 00:00:00 2001 From: Frank Schaefer Date: Wed, 4 Aug 2021 15:52:14 +0200 Subject: [PATCH 23/25] comment failing test. --- test/runtests.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index aba4c0e..f4d8f20 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -604,6 +604,9 @@ end test_hessians(backends) backends = AD.HigherOrderBackend((zygote_backend1,forwarddiff_backend1)) test_hessians(backends) + # fails: + # backends = AD.HigherOrderBackend((zygote_backend1,forwarddiff_backend2)) + # test_hessians(backends) end @testset "jvp" begin test_jvp(zygote_backend1) @@ -631,5 +634,3 @@ end end -backends = AD.HigherOrderBackend((zygote_backend1,forwarddiff_backend2)) -test_hessians(backends) \ No newline at end of file From 96b1a263c4e0c2d9492dac20b89b0d145be5ae60 Mon Sep 17 00:00:00 2001 From: Frank Schaefer Date: Wed, 11 Aug 2021 11:06:39 +0200 Subject: [PATCH 24/25] xs ... -> x for Hessian input --- src/AbstractDifferentiation.jl | 81 +++++++++++++++++++++------------- test/runtests.jl | 3 +- 2 files changed, 53 insertions(+), 31 deletions(-) diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index 1ad7fd5..fc1f2e3 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -51,15 +51,15 @@ function jacobian(ab::HigherOrderBackend, f, xs...) jacobian(lowest(ab), f, xs...) end -function hessian(ab::AbstractBackend, f, xs...) - if xs isa Tuple +function hessian(ab::AbstractBackend, f, x) + if x isa Tuple # only support computation of Hessian for functions with single input argument - @assert length(xs) == 1 - xs = xs[1] + @assert length(x) == 1 + x = x[1] end - return jacobian(secondlowest(ab), xs -> begin - gradient(lowest(ab), f, xs)[1] # gradient returns a tuple - end, xs) + return jacobian(secondlowest(ab), x -> begin + gradient(lowest(ab), f, x)[1] # gradient returns a tuple + end, x) end function value_and_derivative(ab::AbstractBackend, f, xs::Number...) @@ -88,61 +88,82 @@ function value_and_jacobian(ab::AbstractBackend, f, xs...) return value, jacs end -function value_and_hessian(ab::AbstractBackend, f, xs...) +function value_and_hessian(ab::AbstractBackend, f, x) + if x isa Tuple + # only support computation of Hessian for functions with single input argument + @assert length(x) == 1 + x = x[1] + end + local value primalcalled = false if ab isa AbstractFiniteDifference - value = primalvalue(ab, nothing, f, xs) + value = primalvalue(ab, nothing, f, (x,)) primalcalled = true end - hess = jacobian(secondlowest(ab), (_xs...,) -> begin - v, g = value_and_gradient(lowest(ab), f, _xs...) + hess = jacobian(secondlowest(ab), _x -> begin + v, g = value_and_gradient(lowest(ab), f, _x) if !primalcalled - value = primalvalue(ab, v, f, xs) + value = primalvalue(ab, v, f, (x,)) primalcalled = true end return g[1] # gradient returns a tuple - end, xs...) + end, x) return value, hess end -function value_and_hessian(ab::HigherOrderBackend, f, xs...) +function value_and_hessian(ab::HigherOrderBackend, f, x) + if x isa Tuple + # only support computation of Hessian for functions with single input argument + @assert length(x) == 1 + x = x[1] + end local value primalcalled = false - hess = jacobian(secondlowest(ab), (_xs...,) -> begin - v, g = value_and_gradient(lowest(ab), f, _xs...) + hess = jacobian(secondlowest(ab), (_x,) -> begin + v, g = value_and_gradient(lowest(ab), f, _x) if !primalcalled - value = primalvalue(ab, v, f, xs) + value = primalvalue(ab, v, f, (x,)) primalcalled = true end return g[1] # gradient returns a tuple - end, xs...) + end, x) return value, hess end -function value_gradient_and_hessian(ab::AbstractBackend, f, xs...) +function value_gradient_and_hessian(ab::AbstractBackend, f, x) + if x isa Tuple + # only support computation of Hessian for functions with single input argument + @assert length(x) == 1 + x = x[1] + end local value primalcalled = false - grads, hess = value_and_jacobian(secondlowest(ab), (_xs...,) -> begin - v, g = value_and_gradient(lowest(ab), f, _xs...) + grads, hess = value_and_jacobian(secondlowest(ab), _x -> begin + v, g = value_and_gradient(lowest(ab), f, _x) if !primalcalled - value = primalvalue(secondlowest(ab), v, f, xs) + value = primalvalue(secondlowest(ab), v, f, (x,)) primalcalled = true end return g[1] # gradient returns a tuple - end, xs...) - return value, (grads,) , hess + end, x) + return value, (grads,), hess end -function value_gradient_and_hessian(ab::HigherOrderBackend, f, xs...) +function value_gradient_and_hessian(ab::HigherOrderBackend, f, x) + if x isa Tuple + # only support computation of Hessian for functions with single input argument + @assert length(x) == 1 + x = x[1] + end local value primalcalled = false - grads, hess = value_and_jacobian(secondlowest(ab), (_xs...,) -> begin - v, g = value_and_gradient(lowest(ab), f, _xs...) + grads, hess = value_and_jacobian(secondlowest(ab), _x -> begin + v, g = value_and_gradient(lowest(ab), f, _x) if !primalcalled - value = primalvalue(secondlowest(ab), v, f, xs) + value = primalvalue(secondlowest(ab), v, f, (x,)) primalcalled = true end return g[1] # gradient returns a tuple - end, xs...) - return value, (grads,) , hess + end, x) + return value, (grads,), hess end function pushforward_function( diff --git a/test/runtests.jl b/test/runtests.jl index f4d8f20..1bf305a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -222,7 +222,8 @@ function test_hessians(backend; multiple_inputs=false) else # explicit test that AbstractDifferentiation throws an error # don't support tuple of Hessians - @test_throws AssertionError H1 = AD.hessian(backend, fgrad, xvec, yvec) + @test_throws AssertionError H1 = AD.hessian(backend, fgrad, (xvec, yvec)) + @test_throws MethodError H1 = AD.hessian(backend, fgrad, xvec, yvec) end # @test dfgraddxdx(xvec,yvec) ≈ H1[1] atol=1e-10 From c384a1b840e8fa9b02ca7bd9695b1782073154a4 Mon Sep 17 00:00:00 2001 From: Frank Schaefer Date: Wed, 11 Aug 2021 11:37:30 +0200 Subject: [PATCH 25/25] remove unnecessary tuple conversion --- src/AbstractDifferentiation.jl | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index fc1f2e3..96e9dc5 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -274,11 +274,7 @@ function value_and_pullback_function( value = primalvalue(lowest(ab), vs, f, xs) primalcalled = true end - if vs isa Tuple - return vs[1] - else - return vs - end + return vs end, xs...)(ws) return value, pb end