From 849b9448549ee0faaa90f26885522b1475dbcae5 Mon Sep 17 00:00:00 2001 From: Vaibhav Dixit Date: Tue, 8 Oct 2024 17:19:22 -0400 Subject: [PATCH] Optimization.jl interface and format --- Project.toml | 5 +- examples/lasso/FISTASolver.jl | 93 +++++---- examples/lasso/NLoptLassoData.jl | 36 ++-- examples/lasso/SetupLasso.jl | 14 +- examples/lasso/solve_lasso.jl | 125 ++++++------ examples/lasso/solve_least_squares.jl | 44 ++--- examples/rosenbrock/DefineRosenbrock.jl | 6 +- examples/rosenbrock/NLoptRosenbrockData.jl | 36 ++-- .../rosenbrock/SparseCCSARosenbrockData.jl | 16 +- examples/rosenbrock/plot.jl | 67 +++---- examples/rosenbrock/profile.jl | 21 +- examples/rosenbrock/set_binary.jl | 3 - src/SparseCCSA.jl | 1 + src/dual.jl | 5 +- src/optimize.jl | 185 +++++++++++++----- src/structs.jl | 18 +- test/old_tests.jl | 4 +- test/runtests.jl | 12 +- test/test_dual.jl | 3 +- test/test_lasso.jl | 8 +- test/test_optimization.jl | 11 ++ test/test_rosenbrock.jl | 10 +- 22 files changed, 433 insertions(+), 290 deletions(-) create mode 100644 test/test_optimization.jl diff --git a/Project.toml b/Project.toml index 9061734..3dfe747 100644 --- a/Project.toml +++ b/Project.toml @@ -6,6 +6,7 @@ version = "0.1.0" [deps] DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46" UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" @@ -15,15 +16,15 @@ julia = "1" [extras] ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153" JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +LinearMaps = "7a12625a-238d-50fd-b39a-03d52299707e" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153" -LinearMaps = "7a12625a-238d-50fd-b39a-03d52299707e" [targets] test = ["SparseArrays", "ForwardDiff", "Test", "SafeTestsets", "JLD2", "Random", "StaticArrays", "Statistics", "IterativeSolvers", "LinearMaps"] diff --git a/examples/lasso/FISTASolver.jl b/examples/lasso/FISTASolver.jl index ad32b2c..eeb3ef3 100644 --- a/examples/lasso/FISTASolver.jl +++ b/examples/lasso/FISTASolver.jl @@ -18,47 +18,48 @@ abstract type Regularizer end # uses the Fast Iterative Soft-Thresholding Algorithm (FISTA) to # minimize f(x) + g(x) = ½(|Ax - y|² + β|x|²) + ½α|Ψx|₁ -fista(A, y, α, β, iters, tol, reg::Regularizer) = fista(A, y, α, β, iters, tol, reg::Regularizer, zeros(size(A)[2])) +function fista(A, y, α, β, iters, tol, reg::Regularizer) + fista(A, y, α, β, iters, tol, reg::Regularizer, zeros(size(A)[2])) +end function fista(A, y, α, β, iters, tol, reg::Regularizer, xstart) n, p = size(A) - - maxeig = abs(powm(A' * A, maxiter=100)[1]) # TODO: handle number of eigiters + + maxeig = abs(powm(A' * A, maxiter = 100)[1]) # TODO: handle number of eigiters L = maxeig + β # Lipschitz constant of f (strongly convex term) η = 1 / (L * lipschitz_scale(reg)) x = xstart[:] - z = x[:] + z = x[:] xold = similar(x) - res = similar(y) + res = similar(y) grad = similar(x) t = 1 - + iters_done = iters - xupdates = Float64[] + xupdates = Float64[] convdists = Float64[] evals = Float64[] Ψ = transform_op(reg) Fold = Inf - for i = 1:iters - + for i in 1:iters xold .= x res .= mul!(res, A, z) .- y # TODO: maybe use five-arg mul! instead. (but IterativeSolvers sticks to three-arg) - grad .= mul!(grad, A', res) .+ β .* z + grad .= mul!(grad, A', res) .+ β .* z x .= z .- η .* grad - proximal!(x, 1/2 * η * α, reg) + proximal!(x, 1 / 2 * η * α, reg) restart = dot(z .- x, x .- xold) restart > 0 && (t = 1) told = t - t = 1/2 * (1 + √(1 + 4t^2)) - - z .= x .+ (told - 1)/t .* (x .- xold) + t = 1 / 2 * (1 + √(1 + 4t^2)) + + z .= x .+ (told - 1) / t .* (x .- xold) xupdate = norm(z .- xold) / norm(xold) append!(xupdates, xupdate) @@ -69,7 +70,7 @@ function fista(A, y, α, β, iters, tol, reg::Regularizer, xstart) end end - x, (;iters=iters_done, final_tol=norm(x .- xold) / norm(x), xupdates) + x, (; iters = iters_done, final_tol = norm(x .- xold) / norm(x), xupdates) end ## regularizer implementations @@ -78,12 +79,12 @@ support(x, thresh, reg::Regularizer) = abs.(transform_op(reg) * x) .> thresh proximal(x, thresh, reg::Regularizer) = proximal!(copy(x), thresh, reg) # fallback for out-of-place proximal ## L1 regularizer -struct L1 <: Regularizer +struct L1 <: Regularizer size::Int end support(x, reg::L1) = support(x, 1e-3, reg) -lipschitz_scale(::L1) = 2. +lipschitz_scale(::L1) = 2.0 # proximal @@ -120,9 +121,16 @@ function L1Project(p::Int, supp::AbstractVector{Bool}) end Base.:(*)(P::L1Project, x::AbstractVector) = x[P.suppinv .> 0] -Base.:(*)(Pt::L1ProjectTranspose, y::AbstractVector) = [Pt.lmap.suppinv[i] > 0 ? y[Pt.lmap.suppinv[i]] : 0.0 for i in 1:Pt.lmap.p] # TODO: use zero element instead? -LinearAlgebra.mul!(uflat::AbstractVecOrMat, P::L1Project, yflat::AbstractVector) = (uflat[:] = P * yflat) -LinearAlgebra.mul!(uflat::AbstractVecOrMat, Pt::L1ProjectTranspose, yflat::AbstractVector) = (uflat[:] = Pt * yflat) +function Base.:(*)(Pt::L1ProjectTranspose, y::AbstractVector) + [Pt.lmap.suppinv[i] > 0 ? y[Pt.lmap.suppinv[i]] : 0.0 for i in 1:(Pt.lmap.p)] +end # TODO: use zero element instead? +function LinearAlgebra.mul!(uflat::AbstractVecOrMat, P::L1Project, yflat::AbstractVector) + (uflat[:] = P * yflat) +end +function LinearAlgebra.mul!( + uflat::AbstractVecOrMat, Pt::L1ProjectTranspose, yflat::AbstractVector) + (uflat[:] = Pt * yflat) +end projection_op(supp, reg::L1) = L1Project(reg.size, supp) @@ -134,7 +142,7 @@ struct TVProximalWork diff::AbstractArray{Float64} end -struct TV <: Regularizer +struct TV <: Regularizer size::Tuple wrap::Bool work::TVProximalWork @@ -162,7 +170,7 @@ function proximal!(x, thresh, reg::TV) xroll = roll(x, -1, i) diff .= (d -> min(2 * D * thresh, abs(d) / 2) * sign(d)).(xroll .- x) Δ .+= diff ./ (2 * D) - Δ .-= roll(diff, 1, i) ./ (2 * D) + Δ .-= roll(diff, 1, i) ./ (2 * D) end x .+= Δ reshape(x, prod(reg.size)) @@ -170,13 +178,15 @@ end function proximal2(x, thresh, reg::TV) y = x[:] - proxTV!(y, thresh, shape=reg.size, iterations=20) + proxTV!(y, thresh, shape = reg.size, iterations = 20) y end # transform -roll(x, shift, dim) = ShiftedArrays.circshift(x, (zeros(dim-1)..., shift, zeros(ndims(x)-dim)...)) +function roll(x, shift, dim) + ShiftedArrays.circshift(x, (zeros(dim - 1)..., shift, zeros(ndims(x) - dim)...)) +end struct Gradient <: LinearMap{Float64} size::Tuple @@ -200,8 +210,13 @@ function Base.:(*)(Dt::GradientTranspose, xflat::AbstractVector) y[:] end -LinearAlgebra.mul!(uflat::AbstractVecOrMat, D::Gradient, yflat::AbstractVector) = (uflat[:] = D * yflat) -LinearAlgebra.mul!(uflat::AbstractVecOrMat, Dt::GradientTranspose, yflat::AbstractVector) = (uflat[:] = Dt * yflat) +function LinearAlgebra.mul!(uflat::AbstractVecOrMat, D::Gradient, yflat::AbstractVector) + (uflat[:] = D * yflat) +end +function LinearAlgebra.mul!( + uflat::AbstractVecOrMat, Dt::GradientTranspose, yflat::AbstractVector) + (uflat[:] = Dt * yflat) +end function transform_op(reg::TV) ndims = length(reg.size) @@ -233,15 +248,16 @@ function TVProject(size::Tuple, supp::AbstractVector{<:Bool}, wrap::Bool) for start_pt in CartesianIndices(size) explored[start_pt] && continue region = [start_pt] - explored[start_pt] = true; - s = Stack{CartesianIndex{ndims}}(); + explored[start_pt] = true + s = Stack{CartesianIndex{ndims}}() push!(s, start_pt) while !isempty(s) pt = pop!(s) pttup = Tuple(pt) for i in 1:ndims if wrap || pt[i] < size[i] - pt_right = CartesianIndex(pttup[1:i-1]..., 1 + mod(pt[i], size[i]), pttup[i+1:ndims]...) + pt_right = CartesianIndex(pttup[1:(i - 1)]..., 1 + mod(pt[i], size[i]), + pttup[(i + 1):ndims]...) if !supp[pt, i] && !explored[pt_right] explored[pt_right] = true push!(s, pt_right) @@ -249,7 +265,9 @@ function TVProject(size::Tuple, supp::AbstractVector{<:Bool}, wrap::Bool) end end if wrap || pt[i] > 1 - pt_left = CartesianIndex(pttup[1:i-1]..., 1 + mod(pt[i]-2, size[i]), pttup[i+1:ndims]...) + pt_left = CartesianIndex( + pttup[1:(i - 1)]..., 1 + mod(pt[i] - 2, size[i]), + pttup[(i + 1):ndims]...) if !supp[pt_left, i] && !explored[pt_left] explored[pt_left] = true push!(s, pt_left) @@ -271,14 +289,14 @@ end function region_vals(P::TVProject, x::AbstractVector) x = reshape(x, P.size) - function region_vals(region) + function region_vals(region) vals = [x[pt] for pt in region] sum(vals) / length(region), maximum(vals) - minimum(vals), length(vals) end map(region_vals, P.regions) end -function Base.:(*)(Pt::TVProjectTranspose, y::AbstractVector) +function Base.:(*)(Pt::TVProjectTranspose, y::AbstractVector) P = Pt.lmap x = zeros(P.size) for (val, region) in zip(y, P.regions) @@ -288,9 +306,14 @@ function Base.:(*)(Pt::TVProjectTranspose, y::AbstractVector) end projection_op(supp, reg::TV) = TVProject(reg.size, supp, reg.wrap) -LinearAlgebra.mul!(uflat::AbstractVecOrMat, P::TVProject, yflat::AbstractVector) = (uflat[:] = P * yflat) -LinearAlgebra.mul!(uflat::AbstractVecOrMat, Pt::TVProjectTranspose, yflat::AbstractVector) = (uflat[:] = Pt * yflat) +function LinearAlgebra.mul!(uflat::AbstractVecOrMat, P::TVProject, yflat::AbstractVector) + (uflat[:] = P * yflat) +end +function LinearAlgebra.mul!( + uflat::AbstractVecOrMat, Pt::TVProjectTranspose, yflat::AbstractVector) + (uflat[:] = Pt * yflat) +end export fista, L1, TV -end \ No newline at end of file +end diff --git a/examples/lasso/NLoptLassoData.jl b/examples/lasso/NLoptLassoData.jl index 8b16020..b39a10b 100644 --- a/examples/lasso/NLoptLassoData.jl +++ b/examples/lasso/NLoptLassoData.jl @@ -9,7 +9,7 @@ function run_once_nlopt(G, y, α, β) n, p = size(G) nlopt = Opt(:LD_CCSAQ, 2p) - nlopt.lower_bounds = vcat(fill(-Inf, p), zeros(p)) + nlopt.lower_bounds = vcat(fill(-Inf, p), zeros(p)) nlopt.upper_bounds = fill(Inf, 2p) # nlopt.maxeval = 2000 nlopt.xtol_rel = 1e-8 @@ -22,7 +22,7 @@ function run_once_nlopt(G, y, α, β) nlopt.params["verbosity"] = 0 nlopt.min_objective = SetupLasso.make_obj(G, y, α, β) - for i in 1:2p + for i in 1:(2p) inequality_constraint!(nlopt, make_cons(p, Val(i)), 1e-10) # this is helpful for convergence. needs to be tuned well: higher and lower can both be bad. end @@ -30,8 +30,8 @@ function run_once_nlopt(G, y, α, β) t0 = abs.(u0) # start the t's with some slack u0_and_t0 = vcat(u0, t0) - (minf,minx,ret) = optimize(nlopt, u0_and_t0) - return minf,minx,ret + (minf, minx, ret) = optimize(nlopt, u0_and_t0) + return minf, minx, ret end ## NLOpt output processing @@ -46,8 +46,10 @@ function safe_scanf(buffer, fmt, args...) seek(buffer, pos) # skip ignored lines ln = readline(buffer) - if (startswith(ln, "j=") || startswith(ln, "dx =")) || startswith(ln, "u =") || startswith(ln, "v =") || startswith(ln, "dfdx") || startswith(ln, "dfcdx") || startswith(ln, "y:") - return safe_scanf(buffer, fmt, args...) + if (startswith(ln, "j=") || startswith(ln, "dx =")) || startswith(ln, "u =") || + startswith(ln, "v =") || startswith(ln, "dfdx") || startswith(ln, "dfcdx") || + startswith(ln, "y:") + return safe_scanf(buffer, fmt, args...) end seek(buffer, pos) return nothing @@ -60,7 +62,7 @@ end Requires use of custom nlopt binary: build from https://github.com/gaurav-arya/nlopt/tree/ag-debug, move shared object file to this directory, and set with set_binary.jl. """ -function nlopt_lasso_data(evals) +function nlopt_lasso_data(evals) open("nlopt_out.txt", "w") do io redirect_stdout(io) do run_once_nlopt(evals) @@ -92,41 +94,45 @@ function nlopt_lasso_data(evals) buffer = IOBuffer(read(open("nlopt_out.txt"), String)) # easier to copy d = DataFrame() - while true + while true # read one inner iteration inner_history = DataFrame() inner_iter = 0 done = false while true - if (out = safe_scanf(buffer, inner_iter_fmt, Int64, (Float64 for i in 1:5)...)) !== nothing + if (out = safe_scanf(buffer, inner_iter_fmt, Int64, (Float64 for i in 1:5)...)) !== + nothing dual_iters, dual_obj, dual_opt, dual_grad, _x_proposed... = out x_proposed = collect(_x_proposed) else done = true end - if (out = safe_scanf(buffer, inner_iter2_fmt, (Float64 for i in 1:2)...)) !== nothing + if (out = safe_scanf(buffer, inner_iter2_fmt, (Float64 for i in 1:2)...)) !== + nothing ρ = collect(out) do_break = false else ρ = [NaN, NaN] do_break = true end - push!(inner_history, (;dual_iters, dual_obj, dual_opt, dual_grad, ρ, x_proposed)) + push!( + inner_history, (; dual_iters, dual_obj, dual_opt, dual_grad, ρ, x_proposed)) do_break && break end done && break safe_scanf(buffer, infeasible_point_fmt, String) # skip infeasible point log in hacky way - out = safe_scanf(buffer, outer_iter_fmt, (Float64 for i in 1:2)...) + out = safe_scanf(buffer, outer_iter_fmt, (Float64 for i in 1:2)...) if out === nothing break end ρ = collect(out) - if (out = safe_scanf(buffer, outer_iter_sigma_fmt, (Float64 for i in 1:2)...)) !== nothing + if (out = safe_scanf(buffer, outer_iter_sigma_fmt, (Float64 for i in 1:2)...)) !== + nothing σ = collect(out) else σ = [NaN] end - push!(d, (;ρ, σ, inner_history, x=inner_history.x_proposed[end])) + push!(d, (; ρ, σ, inner_history, x = inner_history.x_proposed[end])) end if countlines(copy(buffer)) != 0 @show countlines(copy(buffer)) @@ -137,4 +143,4 @@ end export run_once_nlopt, nlopt_lasso_data -end \ No newline at end of file +end diff --git a/examples/lasso/SetupLasso.jl b/examples/lasso/SetupLasso.jl index c89cb9b..48106c7 100644 --- a/examples/lasso/SetupLasso.jl +++ b/examples/lasso/SetupLasso.jl @@ -16,7 +16,7 @@ function setup_lasso(n, p, S, noise_level) y = G * u y += noise_level * mean(abs.(y)) * η - return (;u, G, y) + return (; u, G, y) end """ @@ -24,7 +24,7 @@ Return the f_and_jac function for Lasso's epigraph formulation. """ function lasso_epigraph(G, y, α, β) n, p = size(G) - ∇cons = spzeros(2p, 2p) + ∇cons = spzeros(2p, 2p) for i in 1:p ∇cons[i, i] = 1 ∇cons[i, p + i] = -1 @@ -42,11 +42,11 @@ function lasso_epigraph(G, y, α, β) end return nothing end - return (;f_and_jac, jac_prototype = vcat(ones(2 * p)', ∇cons)) + return (; f_and_jac, jac_prototype = vcat(ones(2 * p)', ∇cons)) end # for NLopt -function make_obj(G, y, α, β) +function make_obj(G, y, α, β) n, p = size(G) return (u_and_t, grad) -> begin u = @view u_and_t[1:p] @@ -58,7 +58,7 @@ function make_obj(G, y, α, β) end end -_unwrap_val(::Val{x}) where x = x +_unwrap_val(::Val{x}) where {x} = x function make_cons(p, iv) i = _unwrap_val(iv) return (u_and_t, grad) -> begin @@ -66,7 +66,7 @@ function make_cons(p, iv) t = @view u_and_t[(p + 1):(2p)] if length(grad) > 0 grad .= 0 - if i <= p + if i <= p grad[i] = 1 grad[i + p] = -1 else @@ -80,4 +80,4 @@ end export setup_lasso, lasso_epigraph, make_obj, make_cons -end \ No newline at end of file +end diff --git a/examples/lasso/solve_lasso.jl b/examples/lasso/solve_lasso.jl index a062657..4fb982d 100644 --- a/examples/lasso/solve_lasso.jl +++ b/examples/lasso/solve_lasso.jl @@ -5,13 +5,13 @@ using LinearAlgebra ## Initialize problem begin -n = 4 -p = 8 -S = 2 -noise_level = 0.01 -(;u, G, y) = setup_lasso(n, p, S, noise_level) -α = 1e-2 -β = 0.0 + n = 4 + p = 8 + S = 2 + noise_level = 0.01 + (; u, G, y) = setup_lasso(n, p, S, noise_level) + α = 1e-2 + β = 0.0 end ## Solve problem with FISTA @@ -19,8 +19,8 @@ end includet("FISTASolver.jl") using .FISTASolver begin -uest, info = fista(G, y, α, β, 1000000, 1e-44, L1(p)) -norm(uest - u) / norm(u) + uest, info = fista(G, y, α, β, 1000000, 1e-44, L1(p)) + norm(uest - u) / norm(u) end ## Solve problem with CCSA @@ -29,11 +29,11 @@ includet("SparseCCSALassoData.jl") using .SparseCCSALassoData begin -h, opt = sparseccsa_lasso_data(G, y, α, β; xtol_rel=1e-11, dual_ftol_rel=1e-10) -ih = h.inner_history[1] -uestsp = h.x[end][1:p] -@show norm(uestsp - uest) / norm(uest) # converges well even with noise + uest far from u (!) -@show opt.stats.outer_iters_done + h, opt = sparseccsa_lasso_data(G, y, α, β; xtol_rel = 1e-11, dual_ftol_rel = 1e-10) + ih = h.inner_history[1] + uestsp = h.x[end][1:p] + @show norm(uestsp - uest) / norm(uest) # converges well even with noise + uest far from u (!) + @show opt.stats.outer_iters_done end ## OK, time to try NLopt instead @@ -41,66 +41,67 @@ end includet("NLoptLassoData.jl") using .NLoptLassoData begin -sol = run_once_nlopt(G, y, α, β) -uestnl = sol[2][1:p] -norm(uestnl - uest) / norm(uest) + sol = run_once_nlopt(G, y, α, β) + uestnl = sol[2][1:p] + norm(uestnl - uest) / norm(uest) end # evaluate objective on solutions begin -grad = zeros(2p) -@show make_obj(G, y, α, β)(vcat(uestnl, abs.(uestnl)), grad) -@show make_obj(G, y, α, β)(vcat(uestsp, abs.(uestsp)), grad) -@show make_obj(G, y, α, β)(vcat(uest, abs.(uest)), grad) + grad = zeros(2p) + @show make_obj(G, y, α, β)(vcat(uestnl, abs.(uestnl)), grad) + @show make_obj(G, y, α, β)(vcat(uestsp, abs.(uestsp)), grad) + @show make_obj(G, y, α, β)(vcat(uest, abs.(uest)), grad) end ## Plotting (WIP) begin -using CairoMakie -f = Figure(resolution=(1200, 800)) -ax1 = Axis(f[1,1], xlabel="Iterations", ylabel="log(value)", yscale=log10) -ax2 = Axis(f[1,2], xlabel="Iterations", ylabel="log(value)", yscale=log10) -ax3 = Axis(f[2,1], xlabel="Iterations", ylabel="log(value)", yscale=log10) -ax4 = Axis(f[2,2], xlabel="Iterations", ylabel="number") -ax5 = Axis(f[1,3], xlabel="Iterations", ylabel="value") -# objective -lines!(ax1, (a -> a[1]).(h.fx), label="fx[1] (objective)") -# lines!(ax1, (a -> a[2]).(h.fx), label="fx[2] (constraint 1)") -# lines!(ax1, (a -> a[3]).(h.fx), label="fx[3] (constraint 2)") -# ρ -lines!(ax2, (a -> a[1]).(h.ρ), label="ρ[1]") -lines!(ax2, (a -> a[2]).(h.ρ), label="ρ[2]") -lines!(ax2, (a -> a[p+1]).(h.ρ), label="ρ[p+1]") -# σ -lines!(ax3, (a -> a[1]).(h.σ), label="σ[1]") -lines!(ax3, (a -> a[2]).(h.σ), label="σ[2]") -lines!(ax3, (a -> a[p+1]).(h.σ), label="σ[p+1]") -# dual iterations -lines!(ax4, Base.Fix2(size, 1).(h.inner_history), label="inner iterations") -lines!(ax4, (ih -> sum(i -> i.dual_iters, ih.dual_info)).(h.inner_history), label="total dual iterations") -# u/t values -lines!(ax5, (a -> a[1]).(h.x), label="u[1]") -lines!(ax5, (a -> a[p+1]).(h.x), label="t[1]") -lines!(ax5, (a -> a[2]).(h.x), label="u[2]") -lines!(ax5, (a -> a[p+2]).(h.x), label="t[2]") -# legends -axislegend(ax1; position=:rt) -axislegend(ax2; position=:rt) -axislegend(ax3; position=:rt) -axislegend(ax4; position=:rt) -axislegend(ax5; position=:rt) -f + using CairoMakie + f = Figure(resolution = (1200, 800)) + ax1 = Axis(f[1, 1], xlabel = "Iterations", ylabel = "log(value)", yscale = log10) + ax2 = Axis(f[1, 2], xlabel = "Iterations", ylabel = "log(value)", yscale = log10) + ax3 = Axis(f[2, 1], xlabel = "Iterations", ylabel = "log(value)", yscale = log10) + ax4 = Axis(f[2, 2], xlabel = "Iterations", ylabel = "number") + ax5 = Axis(f[1, 3], xlabel = "Iterations", ylabel = "value") + # objective + lines!(ax1, (a -> a[1]).(h.fx), label = "fx[1] (objective)") + # lines!(ax1, (a -> a[2]).(h.fx), label="fx[2] (constraint 1)") + # lines!(ax1, (a -> a[3]).(h.fx), label="fx[3] (constraint 2)") + # ρ + lines!(ax2, (a -> a[1]).(h.ρ), label = "ρ[1]") + lines!(ax2, (a -> a[2]).(h.ρ), label = "ρ[2]") + lines!(ax2, (a -> a[p + 1]).(h.ρ), label = "ρ[p+1]") + # σ + lines!(ax3, (a -> a[1]).(h.σ), label = "σ[1]") + lines!(ax3, (a -> a[2]).(h.σ), label = "σ[2]") + lines!(ax3, (a -> a[p + 1]).(h.σ), label = "σ[p+1]") + # dual iterations + lines!(ax4, Base.Fix2(size, 1).(h.inner_history), label = "inner iterations") + lines!(ax4, (ih -> sum(i -> i.dual_iters, ih.dual_info)).(h.inner_history), + label = "total dual iterations") + # u/t values + lines!(ax5, (a -> a[1]).(h.x), label = "u[1]") + lines!(ax5, (a -> a[p + 1]).(h.x), label = "t[1]") + lines!(ax5, (a -> a[2]).(h.x), label = "u[2]") + lines!(ax5, (a -> a[p + 2]).(h.x), label = "t[2]") + # legends + axislegend(ax1; position = :rt) + axislegend(ax2; position = :rt) + axislegend(ax3; position = :rt) + axislegend(ax4; position = :rt) + axislegend(ax5; position = :rt) + f end begin -(;f_and_jac, jac_prototype) = lasso_epigraph(G, y, α) -fx = zeros(2p+1) -_u = uest#sp#h.x[end][1:p] -f_and_jac(fx, jac_prototype, vcat(_u, abs.(_u))) -fx -jac_prototype + (; f_and_jac, jac_prototype) = lasso_epigraph(G, y, α) + fx = zeros(2p + 1) + _u = uest#sp#h.x[end][1:p] + f_and_jac(fx, jac_prototype, vcat(_u, abs.(_u))) + fx + jac_prototype end norm(uestnl - uest) / norm(uest) -norm(uestsp - uest) / norm(uest) \ No newline at end of file +norm(uestsp - uest) / norm(uest) diff --git a/examples/lasso/solve_least_squares.jl b/examples/lasso/solve_least_squares.jl index 6da2b06..f5e569d 100644 --- a/examples/lasso/solve_least_squares.jl +++ b/examples/lasso/solve_least_squares.jl @@ -5,13 +5,13 @@ using LinearAlgebra ## Initialize problem begin -n = 8 -p = 8 -S = 2 -noise_level = 0.0 -(;u, G, y) = setup_lasso(n, p, S, noise_level) -α = 0.0#1e-2 -β = 1e-5#0.0 + n = 8 + p = 8 + S = 2 + noise_level = 0.0 + (; u, G, y) = setup_lasso(n, p, S, noise_level) + α = 0.0#1e-2 + β = 1e-5#0.0 end ## Solve problem with FISTA @@ -19,8 +19,8 @@ end includet("FISTASolver.jl") using .FISTASolver begin -uest, info = fista(G, y, α, β, 1000000, 1e-44, L1(p)) -norm(uest - u) / norm(u) + uest, info = fista(G, y, α, β, 1000000, 1e-44, L1(p)) + norm(uest - u) / norm(u) end ## Solve problem with QR @@ -34,10 +34,10 @@ includet("SparseCCSALassoData.jl") using .SparseCCSALassoData begin -h, opt = sparseccsa_lasso_data(G, y, α, β; xtol_rel=1e-8, dual_ftol_rel=1e-12) -ih = h.inner_history[1] -uestsp = h.x[end][1:p] -norm(uestsp - uest_qr) / norm(uest_qr) # note: we can get better convergence with no noise. + h, opt = sparseccsa_lasso_data(G, y, α, β; xtol_rel = 1e-8, dual_ftol_rel = 1e-12) + ih = h.inner_history[1] + uestsp = h.x[end][1:p] + norm(uestsp - uest_qr) / norm(uest_qr) # note: we can get better convergence with no noise. end ## OK, time to try NLopt instead @@ -45,17 +45,17 @@ end includet("NLoptLassoData.jl") using .NLoptLassoData begin -sol = run_once_nlopt(G, y, α, β) -uestnl = sol[2][1:p] -norm(uestnl - uest_qr) / norm(uest_qr) + sol = run_once_nlopt(G, y, α, β) + uestnl = sol[2][1:p] + norm(uestnl - uest_qr) / norm(uest_qr) end # evaluate objective on QR soln begin -grad = zeros(2p) -@show make_obj(G, y, α, β)(vcat(uest_qr, abs.(uest_qr)), grad) -@show make_obj(G, y, α, β)(vcat(uestnl, abs.(uestnl)), grad) -@show make_obj(G, y, α, β)(vcat(uestsp, abs.(uestsp)), grad) -# gradnl = zeros(2p) -# NLoptLassoData.make_obj(G, y, α)(vcat(uestnl, abs.(uestnl)), gradnl) + grad = zeros(2p) + @show make_obj(G, y, α, β)(vcat(uest_qr, abs.(uest_qr)), grad) + @show make_obj(G, y, α, β)(vcat(uestnl, abs.(uestnl)), grad) + @show make_obj(G, y, α, β)(vcat(uestsp, abs.(uestsp)), grad) + # gradnl = zeros(2p) + # NLoptLassoData.make_obj(G, y, α)(vcat(uestnl, abs.(uestnl)), gradnl) end diff --git a/examples/rosenbrock/DefineRosenbrock.jl b/examples/rosenbrock/DefineRosenbrock.jl index 122ebb1..3714e1c 100644 --- a/examples/rosenbrock/DefineRosenbrock.jl +++ b/examples/rosenbrock/DefineRosenbrock.jl @@ -16,8 +16,8 @@ end function f_and_jac(fx, jac, x) fx .= f(x) if jac !== nothing - jac .= @SMatrix [100 * 2 * (x[2] - x[1]^2) * (-2x[1]) - 2 * (1 - x[1]) * 1 100 * 2 * (x[2] - x[1]^2); - 2 * x[1] 2 * x[2]] + jac .= @SMatrix [100 * 2 * (x[2] - x[1]^2) * (-2x[1])-2 * (1 - x[1]) * 1 100*2*(x[2]-x[1]^2); + 2*x[1] 2*x[2]] # cfg = ForwardDiff.JacobianConfig(f, jac, x, ForwardDiff.Chunk{2}()) # ForwardDiff.jacobian!(jac, f, x, cfg) end @@ -34,4 +34,4 @@ m = 1 export f, f_and_jac, n, m -end \ No newline at end of file +end diff --git a/examples/rosenbrock/NLoptRosenbrockData.jl b/examples/rosenbrock/NLoptRosenbrockData.jl index 0b43afd..32fe06a 100644 --- a/examples/rosenbrock/NLoptRosenbrockData.jl +++ b/examples/rosenbrock/NLoptRosenbrockData.jl @@ -24,16 +24,16 @@ function run_once_nlopt(evals) nlopt = Opt(:LD_CCSAQ, 2) nlopt.lower_bounds = [-1.0, -1.0] nlopt.upper_bounds = [2.0, 2.0] - nlopt.maxeval = evals + nlopt.maxeval = evals nlopt.xtol_rel = 0.0 nlopt.xtol_abs = 0.0 nlopt.params["verbosity"] = 2 - nlopt.min_objective = obj + nlopt.min_objective = obj inequality_constraint!(nlopt, cons1) - (minf,minx,ret) = optimize(nlopt, [0.5, 0.5]) - return minf,minx,ret + (minf, minx, ret) = optimize(nlopt, [0.5, 0.5]) + return minf, minx, ret end ## NLOpt output processing @@ -48,8 +48,10 @@ function safe_scanf(buffer, fmt, args...) seek(buffer, pos) # skip ignored lines ln = readline(buffer) - if (startswith(ln, "j=") || startswith(ln, "dx =")) || startswith(ln, "u =") || startswith(ln, "v =") || startswith(ln, "dfdx") || startswith(ln, "dfcdx") || startswith(ln, "y:") - return safe_scanf(buffer, fmt, args...) + if (startswith(ln, "j=") || startswith(ln, "dx =")) || startswith(ln, "u =") || + startswith(ln, "v =") || startswith(ln, "dfdx") || startswith(ln, "dfcdx") || + startswith(ln, "y:") + return safe_scanf(buffer, fmt, args...) end seek(buffer, pos) return nothing @@ -62,7 +64,7 @@ end Requires use of custom nlopt binary: build from https://github.com/gaurav-arya/nlopt/tree/ag-debug, move shared object file to this directory, and set with set_binary.jl. """ -function nlopt_dataframe(evals) +function nlopt_dataframe(evals) open("nlopt_out.txt", "w") do io redirect_stdout(io) do run_once_nlopt(evals) @@ -94,41 +96,45 @@ function nlopt_dataframe(evals) buffer = IOBuffer(read(open("nlopt_out.txt"), String)) # easier to copy d = DataFrame() - while true + while true # read one inner iteration inner_history = DataFrame() inner_iter = 0 done = false while true - if (out = safe_scanf(buffer, inner_iter_fmt, Int64, (Float64 for i in 1:5)...)) !== nothing + if (out = safe_scanf(buffer, inner_iter_fmt, Int64, (Float64 for i in 1:5)...)) !== + nothing dual_iters, dual_obj, dual_opt, dual_grad, _x_proposed... = out x_proposed = collect(_x_proposed) else done = true end - if (out = safe_scanf(buffer, inner_iter2_fmt, (Float64 for i in 1:2)...)) !== nothing + if (out = safe_scanf(buffer, inner_iter2_fmt, (Float64 for i in 1:2)...)) !== + nothing ρ = collect(out) do_break = false else ρ = [NaN, NaN] do_break = true end - push!(inner_history, (;dual_iters, dual_obj, dual_opt, dual_grad, ρ, x_proposed)) + push!( + inner_history, (; dual_iters, dual_obj, dual_opt, dual_grad, ρ, x_proposed)) do_break && break end done && break safe_scanf(buffer, infeasible_point_fmt, String) # skip infeasible point log in hacky way - out = safe_scanf(buffer, outer_iter_fmt, (Float64 for i in 1:2)...) + out = safe_scanf(buffer, outer_iter_fmt, (Float64 for i in 1:2)...) if out === nothing break end ρ = collect(out) - if (out = safe_scanf(buffer, outer_iter_sigma_fmt, (Float64 for i in 1:2)...)) !== nothing + if (out = safe_scanf(buffer, outer_iter_sigma_fmt, (Float64 for i in 1:2)...)) !== + nothing σ = collect(out) else σ = [NaN] end - push!(d, (;ρ, σ, inner_history, x=inner_history.x_proposed[end])) + push!(d, (; ρ, σ, inner_history, x = inner_history.x_proposed[end])) end if countlines(copy(buffer)) != 0 @show countlines(copy(buffer)) @@ -139,4 +145,4 @@ end export nlopt_dataframe -end \ No newline at end of file +end diff --git a/examples/rosenbrock/SparseCCSARosenbrockData.jl b/examples/rosenbrock/SparseCCSARosenbrockData.jl index aad3a36..9390318 100644 --- a/examples/rosenbrock/SparseCCSARosenbrockData.jl +++ b/examples/rosenbrock/SparseCCSARosenbrockData.jl @@ -5,17 +5,17 @@ include("DefineRosenbrock.jl") using .DefineRosenbrock using SparseCCSA -function sparseccsa_dataframe(iters=1) - opt = init(f_and_jac, n, m, Float64, zeros(m+1, n); - lb=fill(-1.0, 2), ub=fill(2.0, 2), - x0 = [0.5, 0.5], max_iters = iters, xtol_rel=1e-8, - dual_ftol_abs=0.0, dual_ftol_rel=1e-15 - ) +function sparseccsa_dataframe(iters = 1) + opt = init(f_and_jac, n, m, Float64, zeros(m + 1, n); + lb = fill(-1.0, 2), ub = fill(2.0, 2), + x0 = [0.5, 0.5], max_iters = iters, xtol_rel = 1e-8, + dual_ftol_abs = 0.0, dual_ftol_rel = 1e-15 + ) - solve!(opt; verbosity=Val(2)) + solve!(opt; verbosity = Val(2)) return opt.stats.history end export sparseccsa_dataframe -end \ No newline at end of file +end diff --git a/examples/rosenbrock/plot.jl b/examples/rosenbrock/plot.jl index 5b073c8..34fc99f 100644 --- a/examples/rosenbrock/plot.jl +++ b/examples/rosenbrock/plot.jl @@ -5,9 +5,9 @@ using .NLoptRosenbrockData using LinearAlgebra begin -df_sp = sparseccsa_dataframe(100) -df_n = nlopt_dataframe(334) -df_n_long = nlopt_dataframe(1000) + df_sp = sparseccsa_dataframe(100) + df_n = nlopt_dataframe(334) + df_n_long = nlopt_dataframe(1000) end # Plot NLopt vs SparseCCSA @@ -15,34 +15,35 @@ end using CairoMakie begin -fig = Figure(resolution = (1200, 600)) -# axislegend(ax; position=:rt) -# ρ -# ax = Axis(fig[1, 2]; xlabel="Iterations", yscale=log10) -ax = Axis(fig[1, 1]; xlabel="Iterations", ylabel="log(value)", yscale=log10) -lines!(ax, first.(df_sp.ρ); label="SparseCCSA ρ", linestyle=:dash) -lines!(ax, first.(df_n.ρ); label="nlopt ρ", linestyle=:dot) -# σ -lines!(ax, norm.(df_sp.σ); label="SparseCCSA σ", linestyle=:dash) -lines!(ax, norm.(df_n.σ); label="nlopt σ", linestyle=:dot) -axislegend(ax; position=:lt) -# -ax = Axis(fig[1, 2]; xlabel="Iterations", ylabel="value") -lines!(ax, first.(df_sp.x); label="SparseCCSA x[1]", linestyle=:dash) -lines!(ax, first.(df_n.x); label="nlopt x[1]", linestyle=:dot) -lines!(ax, last.(df_sp.x); label="SparseCCSA x[2]", linestyle=:dash) -lines!(ax, last.(df_n.x); label="nlopt x[2]", linestyle=:dot) -axislegend(ax; position=:rb) -# lines!(ax, first.(df_n.x); label="nlopt ρ", linestyle=:dot) -# axislegend(ax; position=:rt) -# -ax = Axis(fig[1, 3]; xlabel="Iterations", ylabel="log(error)", yscale=log10) -finalx = df_n_long.x[end] .+ 1e-8 # fudge factor for log -lines!(ax, abs.(last.(df_sp.x .- Ref(finalx))); label="SparseCCSA x[2]", linestyle=:dash) -lines!(ax, abs.(last.(df_n.x .- Ref(finalx))); label="nlopt x[2]", linestyle=:dot) -lines!(ax, abs.(first.(df_sp.x .- Ref(finalx))); label="SparseCCSA x[1]", linestyle=:dash) -lines!(ax, abs.(first.(df_n.x .- Ref(finalx))); label="nlopt x[1]", linestyle=:dot) -axislegend(ax; position=:rt) -fig - + fig = Figure(resolution = (1200, 600)) + # axislegend(ax; position=:rt) + # ρ + # ax = Axis(fig[1, 2]; xlabel="Iterations", yscale=log10) + ax = Axis(fig[1, 1]; xlabel = "Iterations", ylabel = "log(value)", yscale = log10) + lines!(ax, first.(df_sp.ρ); label = "SparseCCSA ρ", linestyle = :dash) + lines!(ax, first.(df_n.ρ); label = "nlopt ρ", linestyle = :dot) + # σ + lines!(ax, norm.(df_sp.σ); label = "SparseCCSA σ", linestyle = :dash) + lines!(ax, norm.(df_n.σ); label = "nlopt σ", linestyle = :dot) + axislegend(ax; position = :lt) + # + ax = Axis(fig[1, 2]; xlabel = "Iterations", ylabel = "value") + lines!(ax, first.(df_sp.x); label = "SparseCCSA x[1]", linestyle = :dash) + lines!(ax, first.(df_n.x); label = "nlopt x[1]", linestyle = :dot) + lines!(ax, last.(df_sp.x); label = "SparseCCSA x[2]", linestyle = :dash) + lines!(ax, last.(df_n.x); label = "nlopt x[2]", linestyle = :dot) + axislegend(ax; position = :rb) + # lines!(ax, first.(df_n.x); label="nlopt ρ", linestyle=:dot) + # axislegend(ax; position=:rt) + # + ax = Axis(fig[1, 3]; xlabel = "Iterations", ylabel = "log(error)", yscale = log10) + finalx = df_n_long.x[end] .+ 1e-8 # fudge factor for log + lines!(ax, abs.(last.(df_sp.x .- Ref(finalx))); + label = "SparseCCSA x[2]", linestyle = :dash) + lines!(ax, abs.(last.(df_n.x .- Ref(finalx))); label = "nlopt x[2]", linestyle = :dot) + lines!(ax, abs.(first.(df_sp.x .- Ref(finalx))); + label = "SparseCCSA x[1]", linestyle = :dash) + lines!(ax, abs.(first.(df_n.x .- Ref(finalx))); label = "nlopt x[1]", linestyle = :dot) + axislegend(ax; position = :rt) + fig end diff --git a/examples/rosenbrock/profile.jl b/examples/rosenbrock/profile.jl index 95ad068..0031768 100644 --- a/examples/rosenbrock/profile.jl +++ b/examples/rosenbrock/profile.jl @@ -2,21 +2,18 @@ includet("DefineRosenbrock.jl") using .DefineRosenbrock using SparseCCSA -opt = init(f_and_jac, n, m, Float64, zeros(m+1, n); - lb=fill(-1.0, 2), ub=fill(2.0, 2), - x0 = [0.5, 0.5], max_iters = 100000000, - dual_ftol_abs=1e-15, dual_ftol_rel=1e-15 -) +opt = init(f_and_jac, n, m, Float64, zeros(m + 1, n); + lb = fill(-1.0, 2), ub = fill(2.0, 2), + x0 = [0.5, 0.5], max_iters = 100000000, + dual_ftol_abs = 1e-15, dual_ftol_rel = 1e-15 +) using BenchmarkTools import Profile -@btime f_and_jac($(zeros(2)), $(zeros(m+1, n)), $(zeros(n))) # check rosenbrock f is efficient +@btime f_and_jac($(zeros(2)), $(zeros(m + 1, n)), $(zeros(n))) # check rosenbrock f is efficient @btime step!(opt) # benchmark optimization step -@profview for i in 1:100000 step!(opt) end # profile optimization step +@profview for i in 1:100000 + step!(opt) +end # profile optimization step @report_opt step!(opt) - - - - - diff --git a/examples/rosenbrock/set_binary.jl b/examples/rosenbrock/set_binary.jl index b1427bb..fc5a700 100644 --- a/examples/rosenbrock/set_binary.jl +++ b/examples/rosenbrock/set_binary.jl @@ -11,6 +11,3 @@ set_preferences!( using NLopt_jll NLopt_jll.get_libnlopt_path() - - - diff --git a/src/SparseCCSA.jl b/src/SparseCCSA.jl index b1748f9..25ce8fc 100644 --- a/src/SparseCCSA.jl +++ b/src/SparseCCSA.jl @@ -4,6 +4,7 @@ using LinearAlgebra using UnPack using Printf using DataFrames # TODO: lighter weight alternative? +using Optimization, Optimization.SciMLBase import Base.@kwdef diff --git a/src/dual.jl b/src/dual.jl index 0b846ca..f4c3187 100644 --- a/src/dual.jl +++ b/src/dual.jl @@ -36,12 +36,13 @@ function (evaluator::DualEvaluator{T})(neg_gλ, neg_grad_gλ, λ) where {T} # although we ultimately don't care about the first entry. grad_gλ_all .= 0 mul!(grad_gλ_all, jac_fx, Δx) - grad_gλ_all .+= fx .+ sum(Iterators.map(abs2 ∘ /, Δx, σ); init=zero(T)) ./ 2 .* ρ + grad_gλ_all .+= fx .+ sum(Iterators.map(abs2 ∘ /, Δx, σ); init = zero(T)) ./ 2 .* ρ copyto!(neg_grad_gλ, (@view grad_gλ_all[2:end])) neg_grad_gλ .*= -1 end - neg_gλ[1] = -(dot(λ_all, fx) + sum(Iterators.map((ai, bi, Δxi) -> ai * Δxi^2 + bi * Δxi, a, b, Δx); init=zero(T))) + neg_gλ[1] = -(dot(λ_all, fx) + sum( + Iterators.map((ai, bi, Δxi) -> ai * Δxi^2 + bi * Δxi, a, b, Δx); init = zero(T))) return nothing end diff --git a/src/optimize.jl b/src/optimize.jl index 0123de4..62bfb21 100644 --- a/src/optimize.jl +++ b/src/optimize.jl @@ -1,25 +1,26 @@ -function reinit!(optimizer::CCSAOptimizer{T}; x0=nothing, lb=nothing, ub=nothing) where {T} +function reinit!( + optimizer::CCSAOptimizer{T}; x0 = nothing, lb = nothing, ub = nothing) where {T} @unpack cache, dual_optimizer = optimizer # Reset optimizer stats reset!(optimizer.stats) # Initialize lb and ub - (lb !== nothing) && (cache.lb .= lb) - (ub !== nothing) && (cache.ub .= ub) + (lb !== nothing) && (cache.lb .= lb) + (ub !== nothing) && (cache.ub .= ub) # Reinitialize ρ and σ cache.ρ .= one(T) map!(cache.σ, cache.lb, cache.ub) do lb, ub (isinf(lb) || isinf(ub)) ? 1.0 : (ub - lb) / 2.0 end # Reinitialize starting point (default: keep what we are already at) - (x0 !== nothing) && (cache.x .= x0) - cache.x_prev .= cache.x - cache.x_prevprev .= cache.x + (x0 !== nothing) && (cache.x .= x0) + cache.x_prev .= cache.x + cache.x_prevprev .= cache.x # Reinitialize function evaluation and Jacobian optimizer.f_and_jac(cache.fx, cache.jac_fx, cache.x) # Recursively reinitalize dual optimizer if dual_optimizer !== nothing - reinit!(dual_optimizer; x0=zero(T), lb=zero(T), ub=typemax(T)) + reinit!(dual_optimizer; x0 = zero(T), lb = zero(T), ub = typemax(T)) end end @@ -27,7 +28,7 @@ function propose_Δx!(Δx, optimizer::CCSAOptimizer{T}; verbosity) where {T} if optimizer.dual_optimizer !== nothing dual_optimizer = optimizer.dual_optimizer reinit!(dual_optimizer) - dual_sol = solve!(dual_optimizer; verbosity=Val(_unwrap_val(verbosity)-1)) + dual_sol = solve!(dual_optimizer; verbosity = Val(_unwrap_val(verbosity) - 1)) # We can form the dual evaluator with DualEvaluator(; cache = optimizer.cache), # but since we have already formed it for the dual optimizer we just retrieve it here. @@ -40,11 +41,11 @@ function propose_Δx!(Δx, optimizer::CCSAOptimizer{T}; verbosity) where {T} Δx .= optimizer.cache.Δx # Return dual solution object (used for logging) - return dual_sol + return dual_sol else # the "dual dual" problem has 0 variables and 0 contraints, but running it allows us to retrieve the proposed Δx [length m]. dual_dual_evaluator = DualEvaluator(; cache = optimizer.cache) - dual_dual_evaluator(nothing, nothing, nothing) + dual_dual_evaluator(nothing, nothing, nothing) Δx .= optimizer.cache.Δx return nothing @@ -56,10 +57,12 @@ Return a CCSAOptimizer that can be step!'d. Free to allocate here. """ # TODO: defaults for kwargs below -function init(f_and_jac, n, m, T, jac_prototype; lb=nothing, ub=nothing, x0=nothing, - max_iters=nothing, max_inner_iters=nothing, max_dual_iters=nothing, max_dual_inner_iters=nothing, - ftol_abs=nothing, ftol_rel=nothing, dual_ftol_abs=nothing, dual_ftol_rel=nothing, - xtol_abs=nothing, xtol_rel=nothing, dual_xtol_abs=nothing, dual_xtol_rel=nothing) +function init( + f_and_jac, n, m, T, jac_prototype, p; lb = nothing, ub = nothing, x0 = nothing, + max_iters = nothing, max_inner_iters = nothing, max_dual_iters = nothing, + max_dual_inner_iters = nothing, + ftol_abs = nothing, ftol_rel = nothing, dual_ftol_abs = nothing, dual_ftol_rel = nothing, + xtol_abs = nothing, xtol_rel = nothing, dual_xtol_abs = nothing, dual_xtol_rel = nothing) # Allocate primal cache, with n variables and m constraints cache = allocate_cache(; n, m, T, jac_prototype) @@ -68,15 +71,16 @@ function init(f_and_jac, n, m, T, jac_prototype; lb=nothing, ub=nothing, x0=noth dual_cache = allocate_cache_for_dual(; m, T) # Setup optimizers - dual_optimizer = CCSAOptimizer(; f_and_jac = DualEvaluator(; cache), cache = dual_cache, - dual_optimizer = nothing, - settings = CCSASettings(; max_iters = max_dual_iters, - max_inner_iters = max_dual_inner_iters, - ftol_abs = dual_ftol_abs, ftol_rel = dual_ftol_rel, - xtol_abs = dual_xtol_abs, xtol_rel = dual_xtol_rel)) + dual_optimizer = CCSAOptimizer(; + f_and_jac = DualEvaluator(; cache), cache = dual_cache, + dual_optimizer = nothing, + settings = CCSASettings(; max_iters = max_dual_iters, + max_inner_iters = max_dual_inner_iters, + ftol_abs = dual_ftol_abs, ftol_rel = dual_ftol_rel, + xtol_abs = dual_xtol_abs, xtol_rel = dual_xtol_rel)) optimizer = CCSAOptimizer(; f_and_jac, cache, dual_optimizer, - settings = CCSASettings(; max_iters, max_inner_iters, - ftol_abs, ftol_rel, xtol_abs, xtol_rel)) + settings = CCSASettings(; max_iters, max_inner_iters, + ftol_abs, ftol_rel, xtol_abs, xtol_rel)) # Initialize optimizer _x0 = zeros(T, n) @@ -85,7 +89,7 @@ function init(f_and_jac, n, m, T, jac_prototype; lb=nothing, ub=nothing, x0=noth (_x0 !== nothing) && (_x0 .= x0) (lb !== nothing) && (_lb .= lb) (ub !== nothing) && (_ub .= ub) - reinit!(optimizer; x0=_x0, lb=_lb, ub=_ub) + reinit!(optimizer; x0 = _x0, lb = _lb, ub = _ub) return optimizer end @@ -102,7 +106,7 @@ What are the invariants / contracts? - optimizer.cache.{fx,jac_fx} come from applying optimizer.f_and_jac at optimizer.cache.x - optimizer.dual_optimizer contains a ref to optimizer.cache, so updating latter implicitly updates the former. """ -function step!(optimizer::CCSAOptimizer{T}; verbosity=Val(0)) where {T} +function step!(optimizer::CCSAOptimizer{T}; verbosity = Val(0)) where {T} @unpack f_and_jac, cache, stats, settings = optimizer retcode = get_retcode(optimizer) @@ -119,11 +123,11 @@ function step!(optimizer::CCSAOptimizer{T}; verbosity=Val(0)) where {T} # Solve the dual problem, searching for a conservative solution. stats.inner_iters_cur_done = 0 inner_history = _unwrap_val(verbosity) > 0 ? DataFrame() : nothing - while true + while true dual_sol = propose_Δx!(cache.Δx_proposed, optimizer; verbosity) # Compute conservative approximation g at proposed point. - w = sum(Iterators.map(abs2 ∘ /, cache.Δx_proposed, cache.σ); init=zero(T)) / 2 + w = sum(Iterators.map(abs2 ∘ /, cache.Δx_proposed, cache.σ); init = zero(T)) / 2 cache.gx_proposed .= cache.fx .+ cache.ρ .* w mul!(cache.gx_proposed, cache.jac_fx, cache.Δx_proposed, true, true) @@ -132,31 +136,33 @@ function step!(optimizer::CCSAOptimizer{T}; verbosity=Val(0)) where {T} f_and_jac(cache.fx_proposed, cache.jac_fx_proposed, cache.x_proposed) # Increase ρ for non-conservative convex approximations. - conservative = true + conservative = true for i in eachindex(cache.ρ) approx_error = cache.gx_proposed[i] - cache.fx_proposed[i] conservative_i = (approx_error >= -1e-10) conservative &= conservative_i - if !conservative_i + if !conservative_i cache.ρ[i] = min(10.0 * cache.ρ[i], 1.1 * (cache.ρ[i] - approx_error / w)) end end if _unwrap_val(verbosity) > 0 dual_info = if dual_sol !== nothing - (;dual_iters=dual_sol.stats.outer_iters_done, dual_obj=-dual_sol.fx[1], - dual_opt=dual_sol.x[1], - dual_history=dual_sol.stats.history) + (; dual_iters = dual_sol.stats.outer_iters_done, + dual_obj = -dual_sol.fx[1], + dual_opt = dual_sol.x[1], + dual_history = dual_sol.stats.history) else nothing end - push!(inner_history, (;ρ=copy(cache.ρ), - x_proposed=copy(cache.x_proposed), - Δx_proposed=copy(cache.Δx_proposed), - conservative=cache.gx_proposed .>= (cache.fx_proposed .- 1e-10), - fx_proposed=copy(cache.fx_proposed), - gx_proposed=copy(cache.gx_proposed), - dual_info)) + push!(inner_history, + (; ρ = copy(cache.ρ), + x_proposed = copy(cache.x_proposed), + Δx_proposed = copy(cache.Δx_proposed), + conservative = cache.gx_proposed .>= (cache.fx_proposed .- 1e-10), + fx_proposed = copy(cache.fx_proposed), + gx_proposed = copy(cache.gx_proposed), + dual_info)) end # We are guaranteed to have a better optimum once we find a conservative approximation. @@ -165,7 +171,8 @@ function step!(optimizer::CCSAOptimizer{T}; verbosity=Val(0)) where {T} # so long as we are still feasible. (Done mostly for consistency with nlopt.) feasible = all(<=(0), @view cache.fx_proposed[2:end]) better_opt = cache.fx_proposed[1] < cache.fx[1] - inner_done = conservative || (stats.inner_iters_cur_done == settings.max_inner_iters) + inner_done = conservative || + (stats.inner_iters_cur_done == settings.max_inner_iters) # Make sure to always update if inner_done, even if (inner_done && !feasible) somehow holds due to # floating point shenanigans. if (feasible && better_opt) || inner_done @@ -174,7 +181,7 @@ function step!(optimizer::CCSAOptimizer{T}; verbosity=Val(0)) where {T} cache.fx .= cache.fx_proposed cache.jac_fx .= cache.jac_fx_proposed end - + stats.inner_iters_cur_done += 1 stats.inner_iters_done += 1 @@ -203,7 +210,10 @@ function step!(optimizer::CCSAOptimizer{T}; verbosity=Val(0)) where {T} @. cache.ρ = max(cache.ρ / 10, 1e-3) if _unwrap_val(verbosity) > 0 - push!(stats.history, (;ρ=copy(cache.ρ), σ=copy(cache.σ), x=copy(cache.x), fx=copy(cache.fx), inner_iters_done=stats.inner_iters_done, inner_history)) + push!(stats.history, + (; ρ = copy(cache.ρ), σ = copy(cache.σ), + x = copy(cache.x), fx = copy(cache.fx), + inner_iters_done = stats.inner_iters_done, inner_history)) end return retcode @@ -218,10 +228,10 @@ function get_retcode(optimizer::CCSAOptimizer) if (stats.outer_iters_done > 1) # Objective tolerance - Δfx = abs(cache.fx[1] - cache.fx_prev[1]) + Δfx = abs(cache.fx[1] - cache.fx_prev[1]) if (settings.ftol_abs !== nothing) && (Δfx < settings.ftol_abs) return :FTOL_ABS - elseif (settings.ftol_rel !== nothing) && + elseif (settings.ftol_rel !== nothing) && (abs(Δfx) / min(abs(cache.fx[1]), abs(cache.fx_prev[1])) < settings.ftol_rel) return :FTOL_REL end @@ -229,8 +239,8 @@ function get_retcode(optimizer::CCSAOptimizer) Δx_norm = norm(Iterators.map(-, cache.x, cache.x_prev)) if (settings.xtol_abs !== nothing) && (Δx_norm < settings.xtol_abs) return :XTOL_ABS - elseif (settings.xtol_rel !== nothing) && - (Δx_norm / min(norm(cache.x), norm(cache.x_prev)) < settings.xtol_rel) + elseif (settings.xtol_rel !== nothing) && + (Δx_norm / min(norm(cache.x), norm(cache.x_prev)) < settings.xtol_rel) return :XTOL_REL end end @@ -238,10 +248,91 @@ function get_retcode(optimizer::CCSAOptimizer) return :CONTINUE end -function solve!(optimizer::CCSAOptimizer; verbosity=Val(0)) +function solve!(optimizer::CCSAOptimizer; verbosity = Val(0)) retcode = :CONTINUE while retcode == :CONTINUE retcode = step!(optimizer; verbosity) end - return (; x=optimizer.cache.x, fx=optimizer.cache.fx, retcode, stats=optimizer.stats) + return (; + x = optimizer.cache.x, fx = optimizer.cache.fx, retcode, stats = optimizer.stats) +end + +struct CCSA end + +SciMLBase.supports_opt_cache_interface(::CCSA) = true +SciMLBase.requiresgradient(::CCSA) = true +SciMLBase.allowsfg(::CCSA) = true +SciMLBase.allowsconstraints(::CCSA) = true +SciMLBase.requiresconsjac(::CCSA) = true +SciMLBase.allowsbounds(::CCSA) = true + +function Optimization.__map_optimizer_args( + cache::Optimization.OptimizationCache, opt::CCSA; + callback = nothing, + maxiters::Union{Number, Nothing} = nothing, + maxtime::Union{Number, Nothing} = nothing, + abstol::Union{Number, Nothing} = nothing, + reltol::Union{Number, Nothing} = nothing, + verbose::Bool = false, + kwargs...) + mapped_args = (; kwargs...) + if !isnothing(abstol) + mapped_args = (; mapped_args..., xtol_abs = abstol) + end + if !isnothing(maxtime) + @warn "common maxtime is currently not used by $(opt)" + end + + if !isnothing(maxiters) + mapped_args = (; mapped_args..., max_iters = maxiters, max_dual_iters = maxiters) + end + + if !isnothing(reltol) + mapped_args = (; mapped_args..., xtol_rel = reltol) + end + + return mapped_args +end + +function SciMLBase.__solve( + cache::OptimizationCache{ + F, + RC, + LB, + UB, + LC, + UC, + S, + O, + D, + P +}) where { + F, + RC, + LB, + UB, LC, UC, + S, + O <: CCSA, + D, + P +} + function f_and_jac(fx_proposed, jac_fx_proposed, x_proposed, p = cache.p) + fx_proposed[1] = cache.f.fg(@view(jac_fx_proposed[1, :]), x_proposed, p) + cache.f.cons(@view(fx_proposed[2:end]), x_proposed) + cache.f.cons_j(@view(jac_fx_proposed[2:end, :]), x_proposed) + end + + solver_kwargs = Optimization.__map_optimizer_args( + cache, cache.opt; cache.solver_args...) + + if cache.f.cons_jac_prototype !== nothing + opt = init(f_and_jac, length(cache.u0), length(cache.lcons), eltype(cache.u0), + vcat(zeros((1, length(cache.u0))), cache.f.cons_jac_prototype), + cache.p; lb = cache.lb, ub = cache.ub, x0 = cache.u0, solver_kwargs...) + end + + sol = solve!(opt; verbosity = Val(2)) + opt_ret = Optimization.deduce_retcode(sol.retcode) + return SciMLBase.build_solution( + cache, opt, sol.x, sol.fx, retcode = opt_ret, original = sol) end diff --git a/src/structs.jl b/src/structs.jl index d95f23d..99ea182 100644 --- a/src/structs.jl +++ b/src/structs.jl @@ -33,13 +33,13 @@ end function allocate_cache(; n, m, T, jac_prototype) return CCSACache(; x = zeros(T, n), fx = zeros(T, m + 1), jac_fx = copy(jac_prototype), - ρ = zeros(T, m + 1), - σ = zeros(T, n), lb = zeros(T, n), ub = zeros(T, n), - Δx_proposed = zeros(T, n), x_proposed = zeros(T, n), gx_proposed = zeros(T, m + 1), - fx_proposed = zeros(T, m + 1), jac_fx_proposed = copy(jac_prototype), - x_prev = zeros(T, n), x_prevprev = zeros(T, n), fx_prev = zeros(T, m + 1), - a = zeros(T, n), b = zeros(T, n), Δx = zeros(T, n), - λ_all = zeros(T, m + 1), grad_gλ_all = zeros(T, m + 1)) + ρ = zeros(T, m + 1), + σ = zeros(T, n), lb = zeros(T, n), ub = zeros(T, n), + Δx_proposed = zeros(T, n), x_proposed = zeros(T, n), gx_proposed = zeros(T, m + 1), + fx_proposed = zeros(T, m + 1), jac_fx_proposed = copy(jac_prototype), + x_prev = zeros(T, n), x_prevprev = zeros(T, n), fx_prev = zeros(T, m + 1), + a = zeros(T, n), b = zeros(T, n), Δx = zeros(T, n), + λ_all = zeros(T, m + 1), grad_gλ_all = zeros(T, m + 1)) end """ @@ -47,7 +47,7 @@ Instantiates the cache structure for a dual problem with m constraints. """ function allocate_cache_for_dual(; m, T) return allocate_cache(; n = m, m = 0, T, - jac_prototype = zeros(T, 1, m)) + jac_prototype = zeros(T, 1, m)) end """ @@ -101,4 +101,4 @@ Solution structure, not yet fleshed out / used. @kwdef struct Solution{T} x::Vector{T} RET::Symbol -end \ No newline at end of file +end diff --git a/test/old_tests.jl b/test/old_tests.jl index 27ae2a5..62253df 100644 --- a/test/old_tests.jl +++ b/test/old_tests.jl @@ -26,7 +26,7 @@ @test opt₂.ub == fill(typemax(T), nvar) @test opt₂.xtol_rel == T(1e-5) opt₃ = CCSAState(nvar, ncon, f_and_jac, x₀, ρ = ρ, σ = σ, lb = lower_bound, - ub = upper_bound) + ub = upper_bound) @test opt₃.lb == lower_bound @test opt₃.ub == upper_bound @test opt₃.xtol_rel == T(1e-5) @@ -124,7 +124,7 @@ end x = [-10.0, -10.0] xtol_rel = 1e-6 st = CCSAState(n, m, f_and_grad, x, ρ = ρ, σ = σ, lb = lb, ub = ub, - xtol_rel = xtol_rel) + xtol_rel = xtol_rel) optimize(st) return st.x end diff --git a/test/runtests.jl b/test/runtests.jl index ab2074a..387ac2f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,6 +4,12 @@ import Random Random.seed!(1234) -@safetestset "Dual toy problem" begin include("test_dual.jl") end -@safetestset "Rosenbrock" begin include("test_rosenbrock.jl") end -@safetestset "LASSO" begin include("test_lasso.jl") end \ No newline at end of file +@safetestset "Dual toy problem" begin + include("test_dual.jl") +end +@safetestset "Rosenbrock" begin + include("test_rosenbrock.jl") +end +@safetestset "LASSO" begin + include("test_lasso.jl") +end diff --git a/test/test_dual.jl b/test/test_dual.jl index 6bc02f5..899a6ee 100644 --- a/test/test_dual.jl +++ b/test/test_dual.jl @@ -29,7 +29,8 @@ using ForwardDiff Δx_solution([0.0, 0.0]) # Form dual evaluator - cache = SparseCCSA.allocate_cache(; n = 2, m = 2, T = Float64, jac_prototype = [2 1; 1 0; 0 1]) + cache = SparseCCSA.allocate_cache(; + n = 2, m = 2, T = Float64, jac_prototype = [2 1; 1 0; 0 1]) cache.x .= 1.0 cache.ρ .= 1.0 cache.σ .= 1.0 diff --git a/test/test_lasso.jl b/test/test_lasso.jl index 59a99f0..70ddf98 100644 --- a/test/test_lasso.jl +++ b/test/test_lasso.jl @@ -19,12 +19,12 @@ noise_level = 0.01 @testset "Convergence" begin for i in 1:5 # Initialize problem - (;u, G, y) = setup_lasso(n, p, S, noise_level) + (; u, G, y) = setup_lasso(n, p, S, noise_level) # Solve problem with FISTA uest, info = fista(G, y, α, β, 10000, 1e-16, L1(p)) # Solve problem with CCSA - h, opt = sparseccsa_lasso_data(G, y, α, β; xtol_rel=1e-9, dual_ftol_rel=1e-8) + h, opt = sparseccsa_lasso_data(G, y, α, β; xtol_rel = 1e-9, dual_ftol_rel = 1e-8) uestsp = h.x[end][1:p] - @test norm(uestsp - uest) / norm(uest) < 1e-6 + @test norm(uestsp - uest) / norm(uest) < 1e-6 end -end \ No newline at end of file +end diff --git a/test/test_optimization.jl b/test/test_optimization.jl new file mode 100644 index 0000000..7991e18 --- /dev/null +++ b/test/test_optimization.jl @@ -0,0 +1,11 @@ +using Optimization, SparseCCSA, ForwardDiff + +x0 = zeros(2) +rosenbrock(x, p = nothing) = (1 - x[1])^2 + 100 * (x[2] - x[1]^2)^2 + +cons = (res, x, p) -> (res[1] = x[1]^2 + x[2]^2 - 1.0; return nothing) +optf = OptimizationFunction(rosenbrock, AutoSparseForwardDiff(), cons = cons) + +prob = OptimizationProblem( + optf, x0, lb = [-1.0, -1.0], ub = [1.0, 1.0], lcons = [0.0], ucons = [0.0]) +opt = solve(prob, SparseCCSA.CCSA()) diff --git a/test/test_rosenbrock.jl b/test/test_rosenbrock.jl index 56f00dc..376bf09 100644 --- a/test/test_rosenbrock.jl +++ b/test/test_rosenbrock.jl @@ -5,15 +5,15 @@ using JLD2 using DataFrames df_sp = sparseccsa_dataframe(400) -df_n = load("../examples/rosenbrock/nlopt_dataframe.jld2", "df_n") +df_n = load("../examples/rosenbrock/nlopt_dataframe.jld2", "df_n") # Check that first 20 iterations match @testset "Consistency with nlopt" begin - @test all(isapprox(df_n.x[i], df_sp.x[i]; rtol=1e-3) for i in 1:20) - @test all(isapprox(df_n.ρ[i], df_sp.ρ[i]; rtol=1e-3) for i in 1:20) - @test all(isapprox(df_n.σ[i], df_sp.σ[i]; rtol=1e-3) for i in 2:20) # don't include i = 1 since no data for nlopt + @test all(isapprox(df_n.x[i], df_sp.x[i]; rtol = 1e-3) for i in 1:20) + @test all(isapprox(df_n.ρ[i], df_sp.ρ[i]; rtol = 1e-3) for i in 1:20) + @test all(isapprox(df_n.σ[i], df_sp.σ[i]; rtol = 1e-3) for i in 2:20) # don't include i = 1 since no data for nlopt end @testset "Convergence" begin - @test all(isapprox(df_n.x[end], df_sp.x[end]; rtol=1e-5)) + @test all(isapprox(df_n.x[end], df_sp.x[end]; rtol = 1e-5)) end