From 14a376499d1d14d799582e5ce1acc657065d2762 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20F=C3=A4lt?= Date: Fri, 20 Oct 2017 18:04:15 +0200 Subject: [PATCH 1/3] Stared work on gradients --- src/functions/elasticNet.jl | 11 +++++++++++ src/functions/indSOC.jl | 12 ++++++------ src/functions/linear.jl | 4 +++- src/functions/logBarrier.jl | 11 +++++++++++ src/functions/normL1.jl | 5 +++++ src/functions/normL2.jl | 10 ++++++++++ src/functions/normLinf.jl | 7 +++++++ 7 files changed, 53 insertions(+), 7 deletions(-) diff --git a/src/functions/elasticNet.jl b/src/functions/elasticNet.jl index 56c32e90..8afbe234 100644 --- a/src/functions/elasticNet.jl +++ b/src/functions/elasticNet.jl @@ -88,6 +88,17 @@ function prox!{R <: Real}(y::AbstractArray{Complex{R}}, f::ElasticNet{R}, x::Abs return f.mu*norm1x + (f.lambda/2)*sqnorm2x end +function gradient!{T <: RealOrComplex, R <: Real}(y::AbstractArray{T}, f::Conjugate{IndBallL1{R}}, x::AbstractArray{T}) + # Gradient of 1 norm + y .= f.mu.*sign.(x) + # Gradient of 2 norm + f2norm2 = vecnorm(x,2)^2 + if f2norm2 != 0 + y .+= (lambda/f2norm2).*x + end + return f.mu*vecnorm(x,1) + (f.lambda/2)*f2norm2 +end + fun_name(f::ElasticNet) = "elastic-net regularization" fun_dom(f::ElasticNet) = "AbstractArray{Real}, AbstractArray{Complex}" fun_expr(f::ElasticNet) = "x ↦ μ||x||_1 + (λ/2)||x||²" diff --git a/src/functions/indSOC.jl b/src/functions/indSOC.jl index e8365193..f36654af 100644 --- a/src/functions/indSOC.jl +++ b/src/functions/indSOC.jl @@ -27,7 +27,7 @@ is_convex(f::IndSOC) = true is_set(f::IndSOC) = true function prox!{T <: Real}(y::AbstractArray{T,1}, f::IndSOC, x::AbstractArray{T,1}, gamma::T=one(T)) - nx = norm(x[2:end]) + @views nx = norm(x[2:end]) t = x[1] if t <= -nx y[:] = 0.0 @@ -36,7 +36,7 @@ function prox!{T <: Real}(y::AbstractArray{T,1}, f::IndSOC, x::AbstractArray{T,1 else r = 0.5 * (1 + t / nx) y[1] = r * nx - y[2:end] = r * x[2:end] + @views y[2:end] .= r .* x[2:end] end return 0.0 end @@ -57,7 +57,7 @@ function prox_naive{T <: Real}(f::IndSOC, x::AbstractArray{T,1}, gamma=1.0) y = zeros(x) r = 0.5 * (1 + t / nx) y[1] = r * nx - y[2:end] = r * x[2:end] + y[2:end] .= r .* x[2:end] end return y, 0.0 end @@ -95,19 +95,19 @@ function prox!{T <: Real}(y::AbstractArray{T,1}, f::IndRotatedSOC, x::AbstractAr x1 = 0.7071067811865475*x[1] + 0.7071067811865475*x[2] x2 = 0.7071067811865475*x[1] - 0.7071067811865475*x[2] # project rotated x onto SOC - nx = sqrt(x2^2+norm(x[3:end])^2) + @views nx = sqrt(x2^2+norm(x[3:end])^2) t = x1 if t <= -nx y[:] = 0.0 elseif t >= nx y[1] = x1 y[2] = x2 - y[3:end] = x[3:end] + @views y[3:end] .= x[3:end] else r = 0.5 * (1 + t / nx) y[1] = r * nx y[2] = r * x2 - y[3:end] = r * x[3:end] + @views y[3:end] = r .* x[3:end] end # rotate back y cw by pi/4 y1 = 0.7071067811865475*y[1] + 0.7071067811865475*y[2] diff --git a/src/functions/linear.jl b/src/functions/linear.jl index 98384d9f..5219bc6b 100644 --- a/src/functions/linear.jl +++ b/src/functions/linear.jl @@ -17,6 +17,9 @@ end Linear(c::A) where {R <: RealOrComplex, A <: AbstractArray{R}} = Linear{R, A}(c) +is_separable(f::Linear) = true +is_convex(f::Linear) = true + function (f::Linear{RC, A})(x::AbstractArray{RC}) where {R <: Real, RC <: Union{R, Complex{R}}, A <: AbstractArray{RC}} return vecdot(f.c, x) end @@ -28,7 +31,6 @@ function prox!(y::AbstractArray{RC}, f::Linear{RC, A}, x::AbstractArray{RC}, gam end function gradient!(y::AbstractArray{RC}, f::Linear{RC, A}, x::AbstractArray{RC}) where {R <: Real, RC <: Union{R, Complex{R}}, A <: AbstractArray{RC}} - A_mul_B!(y, f.Q, x) y .= f.c return vecdot(f.c, x) end diff --git a/src/functions/logBarrier.jl b/src/functions/logBarrier.jl index 5fe51196..6db9fbe2 100644 --- a/src/functions/logBarrier.jl +++ b/src/functions/logBarrier.jl @@ -74,6 +74,17 @@ function prox!{T <: Real}(y::AbstractArray{T}, f::LogBarrier, x::AbstractArray{T return -f.mu*sumf end +function gradient!{T <: Real}(y::AbstractArray{T}, f::LogBarrier, x::AbstractArray{T}) + sum = 0.0 + for i in eachindex(x) + logarg = f.a*x[i]+f.b + y[i] = -f.mu*f.a/logarg + sum += log(logarg) + end + sum *= -f.mu + return sum +end + fun_name(f::LogBarrier) = "logarithmic barrier" fun_dom(f::LogBarrier) = "AbstractArray{Real}" fun_expr(f::LogBarrier) = "x ↦ -μ * sum( log(a*x_i+b), i=1,...,n )" diff --git a/src/functions/normL1.jl b/src/functions/normL1.jl index 0d298fc1..6493a0a1 100644 --- a/src/functions/normL1.jl +++ b/src/functions/normL1.jl @@ -135,6 +135,11 @@ function prox!{T <: Real, R <: Real}(y::AbstractArray{Complex{R}}, f::NormL1{T}, return f.lambda*n1y end +function gradient!{T <: Union{Real, Complex}}(y::AbstractArray{T}, f::NormL1, x::AbstractArray{T}) + y .= f.lambda.*sign.(x) + return f(x) +end + fun_name(f::NormL1) = "weighted L1 norm" fun_dom(f::NormL1) = "AbstractArray{Real}, AbstractArray{Complex}" fun_expr{R <: Real}(f::NormL1{R}) = "x ↦ λ||x||_1" diff --git a/src/functions/normL2.jl b/src/functions/normL2.jl index 45d9ba7a..21f4db20 100644 --- a/src/functions/normL2.jl +++ b/src/functions/normL2.jl @@ -41,6 +41,16 @@ function prox!{T <: RealOrComplex}(y::AbstractArray{T}, f::NormL2, x::AbstractAr return f.lambda*scale*vecnormx end +function gradient!{T <: Union{Real, Complex}}(y::AbstractArray{T}, f::NormL2, x::AbstractArray{T}) + fx = f(x) + if fx == 0 + y .= 0 + else + y .= (lambda/fx).*x + end + return fx +end + fun_name(f::NormL2) = "Euclidean norm" fun_dom(f::NormL2) = "AbstractArray{Real}, AbstractArray{Complex}" fun_expr(f::NormL2) = "x ↦ λ||x||_2" diff --git a/src/functions/normLinf.jl b/src/functions/normLinf.jl index eb9bdd63..e1ac28e7 100644 --- a/src/functions/normLinf.jl +++ b/src/functions/normLinf.jl @@ -20,6 +20,13 @@ function (f::Conjugate{IndBallL1{R}}){R <: Real, S <: RealOrComplex}(x::Abstract return (f.f.r)*vecnorm(x, Inf) end +function gradient!{T <: RealOrComplex, R <: Real}(y::AbstractArray{T}, f::Conjugate{IndBallL1{R}}, x::AbstractArray{T}) + absxi, i = findmax(abs(xi) for xi in x) # Largest absolute value + y .= 0 + y[i] = lambda*sign(x[i]) + return lambda*absxi +end + fun_name{R <: Real}(f::Postcompose{Conjugate{IndBallL1{R}}, R}) = "weighted L-infinity norm" fun_expr{R <: Real}(f::Postcompose{Conjugate{IndBallL1{R}}, R}) = "x ↦ λ||x||_∞ = λ⋅max(abs(x))" fun_params{R <: Real}(f::Postcompose{Conjugate{IndBallL1{R}}, R}) = "λ = $(f.a*(f.f.f.r))" From fa29a3f1671e36c90e8ac34fac0c9a5c2c0646f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20F=C3=A4lt?= Date: Mon, 23 Oct 2017 10:45:10 +0200 Subject: [PATCH 2/3] Almost all gradients (Issue #43) implemented, test still missing --- src/functions/elasticNet.jl | 2 +- src/functions/sumPositive.jl | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/functions/elasticNet.jl b/src/functions/elasticNet.jl index 8afbe234..1fcbe267 100644 --- a/src/functions/elasticNet.jl +++ b/src/functions/elasticNet.jl @@ -88,7 +88,7 @@ function prox!{R <: Real}(y::AbstractArray{Complex{R}}, f::ElasticNet{R}, x::Abs return f.mu*norm1x + (f.lambda/2)*sqnorm2x end -function gradient!{T <: RealOrComplex, R <: Real}(y::AbstractArray{T}, f::Conjugate{IndBallL1{R}}, x::AbstractArray{T}) +function gradient!{T <: RealOrComplex, R <: Real}(y::AbstractArray{T}, f::ElasticNet{R}, x::AbstractArray{T}) # Gradient of 1 norm y .= f.mu.*sign.(x) # Gradient of 2 norm diff --git a/src/functions/sumPositive.jl b/src/functions/sumPositive.jl index 5bb4d47a..f12f4a55 100644 --- a/src/functions/sumPositive.jl +++ b/src/functions/sumPositive.jl @@ -19,7 +19,7 @@ is_separable(f::SumPositive) = true is_convex(f::SumPositive) = true function (f::SumPositive){T <: Real}(x::AbstractArray{T}) - return sum(max.(0.0, x)) + return sum(xi -> max(xi,0), x) end function prox!{T <: Real}(y::AbstractArray{T}, f::SumPositive, x::AbstractArray{T}, gamma::Real=1.0) @@ -31,6 +31,11 @@ function prox!{T <: Real}(y::AbstractArray{T}, f::SumPositive, x::AbstractArray{ return fsum end +function gradient!{T <: Real}(y::AbstractArray{T}, f::SumPositive, x::AbstractArray{T}) + y .= max.(0, sign.(x)) + return sum(xi -> max(xi,0), x) +end + fun_name(f::SumPositive) = "Sum of the positive coefficients" fun_dom(f::SumPositive) = "AbstractArray{Real}" fun_expr(f::SumPositive) = "x ↦ sum(max(0, x))" From d750efab0c60c3b397501fdea2e4ea48fcabde7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20F=C3=A4lt?= Date: Tue, 24 Oct 2017 18:13:01 +0200 Subject: [PATCH 3/3] Fixed gradients, added tests and updated documentation --- FUNCTIONS.md | 35 +++++---- docs/src/functions.md | 6 ++ src/ProximalOperators.jl | 1 + src/calculus/distL2.jl | 24 ++++-- src/calculus/sqrDistL2.jl | 11 ++- src/functions/elasticNet.jl | 7 +- src/functions/normL2.jl | 6 +- src/functions/normLinf.jl | 4 +- src/functions/quadratic.jl | 1 + src/utilities/vecnormdiff.jl | 15 ++++ test/runtests.jl | 4 + test/test_gradients.jl | 141 +++++++++++++++++++++++++++++++++++ 12 files changed, 217 insertions(+), 38 deletions(-) create mode 100644 src/utilities/vecnormdiff.jl create mode 100644 test/test_gradients.jl diff --git a/FUNCTIONS.md b/FUNCTIONS.md index 8b711f93..a421880e 100644 --- a/FUNCTIONS.md +++ b/FUNCTIONS.md @@ -16,6 +16,7 @@ Name | Type of set | Properties `IndBallRank` | Set of matrices with given rank | nonconvex `IndBinary` | Indicator of a binary set | nonconvex, separable `IndBox` | Box | convex, separable +`IndGraph` | Indicator of the graph of a linear op. | convex `IndExpPrimal` | Indicator of (primal) exponential cone | convex cone `IndExpDual` | Indicator of (dual) exponential cone | convex cone `IndFree` | Indicator of the free cone | convex cone, separable @@ -34,33 +35,31 @@ Name | Type of set | Properties Name | Description | Properties ----------------|-------------------------------------|---------------- -`ElasticNet` | Elastic-net regularization | convex, separable +`ElasticNet` | Elastic-net regularization | convex, separable, gradient `NormL0` | L0 pseudo-norm | nonconvex -`NormL1` | L1 norm | convex, separable -`NormL2` | Euclidean norm | convex +`NormL1` | L1 norm | convex, separable, gradient +`NormL2` | Euclidean norm | convex, gradient `NormL21` | Sum-of-L2 norms | convex -`NormLinf` | L-infinity norm | convex +`NormLinf` | L-infinity norm | convex, gradient `NuclearNorm` | Nuclear norm | convex -`SqrNormL2` | Squared Euclidean norm | convex, separable +`SqrNormL2` | Squared Euclidean norm | convex, separable, gradient -### Penalties +### Penalties and other functions Name | Description | Properties ----------------|-------------------------------------|----------------- -`HingeLoss` | Hinge loss function | convex, separable -`HuberLoss` | Huber loss function | convex -`LogBarrier` | Logarithmic barrier | convex, separable -`LeastSquares` | Sum-of-residual-squares | convex +`HingeLoss` | Hinge loss function | convex, separable, gradient +`HuberLoss` | Huber loss function | convex, gradient +`LogBarrier` | Logarithmic barrier | convex, separable, gradient +`LeastSquares` | Sum-of-residual-squares | convex, gradient +`Maximum` | Maximum coordinate of a vector | convex +`Linear` | Linear function | convex, separable, gradient +`Quadratic` | Quadratic function | convex, gradient +`SumPositive` | Sum of the positive coefficients | convex, gradient ### Distances Name | Description | Properties ----------------|------------------------------------------------------|---------------- -`DistL2` | Euclidean distance from a convex set | convex -`SqrDistL2` | Squared Euclidean distance from a convex set | convex - -### Other functions - -Name | Description | Properties -----------------|------------------------------------------------------|---------------- -`Maximum` | Maximum coordinate of a vector | convex +`DistL2` | Euclidean distance from a convex set | convex, gradient +`SqrDistL2` | Squared Euclidean distance from a convex set | convex, gradient diff --git a/docs/src/functions.md b/docs/src/functions.md index 2305ba99..3d5045e9 100644 --- a/docs/src/functions.md +++ b/docs/src/functions.md @@ -75,3 +75,9 @@ Maximum Quadratic SumPositive ``` + +## Distances +```@docs +DistL2 +SqrDistL2 +``` diff --git a/src/ProximalOperators.jl b/src/ProximalOperators.jl index 102c1bbc..8feb5f05 100644 --- a/src/ProximalOperators.jl +++ b/src/ProximalOperators.jl @@ -23,6 +23,7 @@ include("utilities/deep.jl") include("utilities/linops.jl") include("utilities/symmetricpacked.jl") include("utilities/uniformarrays.jl") +include("utilities/vecnormdiff.jl") # Basic functions diff --git a/src/calculus/distL2.jl b/src/calculus/distL2.jl index cb96d360..1084fdb8 100644 --- a/src/calculus/distL2.jl +++ b/src/calculus/distL2.jl @@ -9,7 +9,7 @@ export DistL2 Given `ind_S` the indicator function of a convex set ``S``, and an optional positive parameter `λ`, returns the (weighted) Euclidean distance from ``S``, that is function ```math -g(x) = \\mathrm{dist}_S(x) = \\min \\{ \\|y - x\\| : y \\in S \\}. +g(x) = λ\\mathrm{dist}_S(x) = \\min \\{ λ\\|y - x\\| : y \\in S \\}. ``` """ @@ -35,24 +35,32 @@ DistL2{R <: Real, T <: ProximableFunction}(ind::T, lambda::R=1.0) = DistL2{R, T} function (f::DistL2){R <: RealOrComplex}(x::AbstractArray{R}) p, = prox(f.ind, x) - return f.lambda*vecnorm(x-p) + return f.lambda*vecnormdiff(x,p) end function prox!{R <: RealOrComplex}(y::AbstractArray{R}, f::DistL2, x::AbstractArray{R}, gamma::Real=1.0) - p, = prox(f.ind, x) - d = vecnorm(x-p) + prox!(y, f.ind, x) + d = vecnormdiff(x,y) gamlam = (gamma*f.lambda) if gamlam < d gamlamd = gamlam/d - for k in eachindex(p) - y[k] = (1-gamlamd)*x[k] + gamlamd*p[k] - end + y .= (1-gamlamd).*x .+ gamlamd.*y return f.lambda*(d-gamlam) end - y[:] = p return 0.0 end +function gradient!{T <: RealOrComplex}(y::AbstractArray{T}, f::DistL2, x::AbstractArray{T}) + prox!(y, f.ind, x) # Use y as temporary storage + dist = vecnormdiff(x,y) + if dist > 0 + y .= (f.lambda/dist).*(x .- y) + else + y .= 0 + end + return f.lambda*dist +end + fun_name(f::DistL2) = "Euclidean distance from a convex set" fun_dom(f::DistL2) = fun_dom(f.ind) fun_expr(f::DistL2) = "x ↦ λ inf { ||x-y|| : y ∈ S} " diff --git a/src/calculus/sqrDistL2.jl b/src/calculus/sqrDistL2.jl index 4fcfa5f9..6e36e50c 100644 --- a/src/calculus/sqrDistL2.jl +++ b/src/calculus/sqrDistL2.jl @@ -38,12 +38,12 @@ SqrDistL2{R <: Real, T <: ProximableFunction}(ind::T, lambda::R=1.0) = SqrDistL2 function (f::SqrDistL2){T <: RealOrComplex}(x::AbstractArray{T}) p, = prox(f.ind, x) - return (f.lambda/2)*vecnorm(x-p)^2 + return (f.lambda/2)*vecnormdiff2(x,p) end function prox!{T <: RealOrComplex}(y::AbstractArray{T}, f::SqrDistL2, x::AbstractArray{T}, gamma::Real=1.0) p, = prox(f.ind, x) - sqrd = (f.lambda/2)*vecnorm(x-p)^2 + sqrd = (f.lambda/2)*vecnormdiff2(x,p) c1 = 1/(1+f.lambda*gamma) c2 = f.lambda*gamma*c1 for k in eachindex(p) @@ -52,6 +52,13 @@ function prox!{T <: RealOrComplex}(y::AbstractArray{T}, f::SqrDistL2, x::Abstrac return sqrd*c1^2 end +function gradient!{T <: RealOrComplex}(y::AbstractArray{T}, f::SqrDistL2, x::AbstractArray{T}) + p, = prox(f.ind, x) + dist2 = vecnormdiff2(x,p) + y .= f.lambda.*(x .- p) + return (f.lambda/2)*dist2 +end + fun_name(f::SqrDistL2) = "squared Euclidean distance from a convex set" fun_dom(f::SqrDistL2) = fun_dom(f.ind) fun_expr(f::SqrDistL2) = "x ↦ (λ/2) inf { ||x-y||^2 : y ∈ S} " diff --git a/src/functions/elasticNet.jl b/src/functions/elasticNet.jl index 1fcbe267..ab788733 100644 --- a/src/functions/elasticNet.jl +++ b/src/functions/elasticNet.jl @@ -92,11 +92,8 @@ function gradient!{T <: RealOrComplex, R <: Real}(y::AbstractArray{T}, f::Elasti # Gradient of 1 norm y .= f.mu.*sign.(x) # Gradient of 2 norm - f2norm2 = vecnorm(x,2)^2 - if f2norm2 != 0 - y .+= (lambda/f2norm2).*x - end - return f.mu*vecnorm(x,1) + (f.lambda/2)*f2norm2 + y .+= f.lambda.*x + return f.mu*vecnorm(x,1) + (f.lambda/2)*vecnorm(x,2)^2 end fun_name(f::ElasticNet) = "elastic-net regularization" diff --git a/src/functions/normL2.jl b/src/functions/normL2.jl index 21f4db20..d7b1d7a2 100644 --- a/src/functions/normL2.jl +++ b/src/functions/normL2.jl @@ -42,13 +42,13 @@ function prox!{T <: RealOrComplex}(y::AbstractArray{T}, f::NormL2, x::AbstractAr end function gradient!{T <: Union{Real, Complex}}(y::AbstractArray{T}, f::NormL2, x::AbstractArray{T}) - fx = f(x) + fx = vecnorm(x) # Value of f, without lambda if fx == 0 y .= 0 else - y .= (lambda/fx).*x + y .= (f.lambda/fx).*x end - return fx + return f.lambda*fx end fun_name(f::NormL2) = "Euclidean norm" diff --git a/src/functions/normLinf.jl b/src/functions/normLinf.jl index e1ac28e7..2d1a1b1c 100644 --- a/src/functions/normLinf.jl +++ b/src/functions/normLinf.jl @@ -23,8 +23,8 @@ end function gradient!{T <: RealOrComplex, R <: Real}(y::AbstractArray{T}, f::Conjugate{IndBallL1{R}}, x::AbstractArray{T}) absxi, i = findmax(abs(xi) for xi in x) # Largest absolute value y .= 0 - y[i] = lambda*sign(x[i]) - return lambda*absxi + y[i] = f.f.r*sign(x[i]) + return f.f.r*absxi end fun_name{R <: Real}(f::Postcompose{Conjugate{IndBallL1{R}}, R}) = "weighted L-infinity norm" diff --git a/src/functions/quadratic.jl b/src/functions/quadratic.jl index c6c3e595..1d8e1323 100644 --- a/src/functions/quadratic.jl +++ b/src/functions/quadratic.jl @@ -19,6 +19,7 @@ If `iterative=true`, then `prox!` is evaluated approximately using an iterative abstract type Quadratic <: ProximableFunction end +is_convex(f::Quadratic) = true is_smooth(f::Quadratic) = true is_quadratic(f::Quadratic) = true diff --git a/src/utilities/vecnormdiff.jl b/src/utilities/vecnormdiff.jl new file mode 100644 index 00000000..c0f092bb --- /dev/null +++ b/src/utilities/vecnormdiff.jl @@ -0,0 +1,15 @@ +""" +Fast (non-allocating) version of vecnorm(x-y,2)^2 +""" +function vecnormdiff2(x,y) + s = 0.0 + for i in eachindex(x) + s += abs2(x[i]-y[i]) + end + return s +end + +""" +Fast (non-allocating) version of vecnorm(x-y,2) +""" +vecnormdiff(x,y) = sqrt(vecnormdiff2(x,y)) diff --git a/test/runtests.jl b/test/runtests.jl index 4fec029d..5832bc1f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -77,6 +77,10 @@ end include("test_graph.jl") end +@testset "Gradients" begin + include("test_gradients.jl") +end + @testset "Calculus rules" begin include("test_calculus.jl") include("test_moreauEnvelope.jl") diff --git a/test/test_gradients.jl b/test/test_gradients.jl new file mode 100644 index 00000000..ddab59d2 --- /dev/null +++ b/test/test_gradients.jl @@ -0,0 +1,141 @@ + +stuff = [ + Dict( "f" => ElasticNet(2.0, 3.0), + "x" => [-2., -1., 0., 1., 2., 3.], + "∇f(x)" => 2*[-1., -1., 0., 1., 1., 1.] + 3*[-2., -1., 0., 1., 2., 3.], + ), + Dict( "f" => NormL1(2.0), + "x" => [-2., -1., 0., 1., 2., 3.], + "∇f(x)" => 2*[-1., -1., 0., 1., 1., 1.], + ), + Dict( "f" => NormL2(2.0), + "x" => [-1., 0., 2.], + "∇f(x)" => 2/sqrt(5)*[-1., 0., 2.], + ), + Dict( "f" => NormL2(2.0), + "x" => [-1., 0., 2.], + "∇f(x)" => 2/sqrt(5)*[-1., 0., 2.], + ), + Dict( "f" => NormL2(2.0), + "x" => [0., 0., 0.], + "∇f(x)" => [0., 0., 0.], + ), + # Dict( "f" => NormL21(), + # "x" => , + # "∇f(x)" => , + # ), + Dict( "f" => NormLinf(2.), + "x" => [-2., -1., 0., 1., 2., 3.], + "∇f(x)" => 2*[0., 0., 0., 0., 0., 1.], + ), + Dict( "f" => NormLinf(2.), + "x" => [-4., -1., 0., 1., 2., 3.], + "∇f(x)" => 2*[-1., 0., 0., 0., 0., 0.], + ), + # Dict( "f" => NuclearNorm(), + # "x" => , + # "∇f(x)" => , + # ), + Dict( "f" => SqrNormL2(2.0), + "x" => [-2., -1., 0., 1., 2., 3.], + "∇f(x)" => 2*[-2., -1., 0., 1., 2., 3.], + ), + Dict( "f" => HingeLoss([1., 2., 1., 2., 1.], 2.0), + "x" => [-2., -1., 0., 2., 3.], + "∇f(x)" => -2*[1., 2., 1., 0., 0.], + ), + Dict( "f" => HuberLoss(2., 3.), + "x" => [-1., 0.5], + "∇f(x)" => 3.*[-1., 0.5], + ), + Dict( "f" => HuberLoss(2., 3.), + "x" => [-2., 1.5], + "∇f(x)" => [-4.8, 3.6], # 3*2*[-2., 1.5]/norm([-2., 1.5]), + ), + Dict( "f" => Linear([-1.,2.,3.]), + "x" => [1., 5., 3.14], + "∇f(x)" => [-1.,2.,3.], + ), + Dict( "f" => LogBarrier(2.0, 1.0, 1.5), + "x" => [1.0, 2.0, 3.0], + "∇f(x)" => -1.5*2.0./(2.0*[1.0, 2.0, 3.0].+1.0),# -μ*a*/(a*x+b) + ), + Dict( "f" => LeastSquares([1.0 2.0; 3.0 4.0], [0.1, 0.2], 2.0, iterative=false), + "x" => [-1., 2.], + "∇f(x)" => [34.6, 50.], #λ*(A'A*x-A'b), + ), + Dict( "f" => LeastSquares([1.0 2.0; 3.0 4.0], [0.1, 0.2], 2.0, iterative=true), + "x" => [-1., 2.], + "∇f(x)" => [34.6, 50.], #λ*(A'A*x-A'b), + ), + Dict( "f" => Quadratic([2. -1.; -1. 2.], [0.1, 0.2], iterative=false), + "x" => [3., 4.], + "∇f(x)" => [2.1, 5.2], #[2. -1.; -1. 2.]*[3., 4.]+[0.1, 0.2], + ), + Dict( "f" => Quadratic([2. -1.; -1. 2.], [0.1, 0.2], iterative=true), + "x" => [3., 4.], + "∇f(x)" => [2.1, 5.2], #[2. -1.; -1. 2.]*[3., 4.]+[0.1, 0.2], + ), + Dict( "f" => SumPositive(), + "x" => [-1., 1., 2.], + "∇f(x)" => [0., 1., 1.], + ), + # Dict( "f" => Maximum(2.0), + # "x" => [-4., 2., 3.], + # "∇f(x)" => 2*[0., 0., 1.], + # ), + Dict( "f" => DistL2(IndZero(),2.0), + "x" => [1., -2], + "∇f(x)" => 2*[1., -2]./norm([1., -2]), + ), + Dict( "f" => DistL2(IndBallL2(1.0),2.0), + "x" => [2., -2], + "∇f(x)" => [sqrt(2), -sqrt(2)], + ), + Dict( "f" => SqrDistL2(IndZero(),2.0), + "x" => [1., -2], + "∇f(x)" => 2*[1., -2], + ), + Dict( "f" => SqrDistL2(IndBallL2(1.0),2.0), + "x" => [2., -2], + "∇f(x)" => 2.0*([2., -2]-[1/sqrt(2), -1/(sqrt(2))]), + ), +] + +println("Testing Gradients") +for i = 1:length(stuff) + + println("----------------------------------------------------------") + + f = stuff[i]["f"] + x = stuff[i]["x"] + + println("Gradient of ", string(typeof(f), " on ", typeof(x))) + + ref_∇f = stuff[i]["∇f(x)"] + + ref_fx = f(x) + ∇f = similar(x) + fx = gradient!(∇f, f, x) + @test fx == ref_fx || abs(fx-ref_fx)/(1+abs(ref_fx)) <= TOL_ASSERT + @test ∇f == ref_∇f || norm(∇f-ref_∇f)/(1+norm(ref_∇f)) <= TOL_ASSERT + + for j = 1:11 + #For initial point x and 10 other random points + fx = gradient!(∇f, f, x) + for k = 1:10 + # Test conditions in different directions + if ProximalOperators.is_convex(f) + # Test ∇f is subgradient + d = randn(size(x)) + @test f(x+d) ≥ fx + dot(d, ∇f) - TOL_ASSERT + else + # Assume smooth function + d = randn(size(x)) + d ./= norm(d).*sqrt(TOL_ASSERT) + @test f(x+d) ≈ fx + dot(d, ∇f) atol=TOL_ASSERT + end + end + x = rand(size(x)) + end +end