Skip to content

Commit 6dd82d6

Browse files
joaquimgblegat
andauthored
Integrate with POI to improve UX (#262)
* [WIP] Integrate with POI to improve UX * add missing import * temp change to proj toml * format * simplify method setting to sue model constructor * add possible fix to scalarize bridge error * add pkg to project * format * improvements * remove jump wrapper * clean tests * fix readme * use intermediary API * format * Apply suggestions from code review Co-authored-by: Benoît Legat <benoit.legat@gmail.com> * add suggestion * use Parameter set * todo was fixed * format * update docs for newer Flux * format * kwargs * remove diff model * suggestions * format * fix examples --------- Co-authored-by: Benoît Legat <benoit.legat@gmail.com>
1 parent d09d1b1 commit 6dd82d6

14 files changed

+1513
-69
lines changed

Project.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02"
1212
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
1313
MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
1414
MathOptSetDistances = "3b969827-a86c-476c-9527-bb6f1a8fbad5"
15+
ParametricOptInterface = "0ce4ce61-57bf-432b-a095-efac525d185e"
1516
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
1617

1718
[compat]
@@ -21,5 +22,6 @@ IterativeSolvers = "0.9"
2122
JuMP = "1"
2223
LazyArrays = "0.21, 0.22, 1"
2324
MathOptInterface = "1.18"
24-
MathOptSetDistances = "0.2.7"
25+
MathOptSetDistances = "0.2.9"
26+
ParametricOptInterface = "0.9.0"
2527
julia = "1.6"

README.md

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,76 @@ examples, tutorials, and an API reference.
3131

3232
## Use with JuMP
3333

34-
Use DiffOpt with JuMP by following this brief example:
34+
### DiffOpt-JuMP API with `Parameters`
35+
36+
```julia
37+
using JuMP, DiffOpt, HiGHS
38+
39+
model = Model(
40+
() -> DiffOpt.diff_optimizer(
41+
HiGHS.Optimizer;
42+
with_parametric_opt_interface = true,
43+
),
44+
)
45+
set_silent(model)
46+
47+
p_val = 4.0
48+
pc_val = 2.0
49+
@variable(model, x)
50+
@variable(model, p in Parameter(p_val))
51+
@variable(model, pc in Parameter(pc_val))
52+
@constraint(model, cons, pc * x >= 3 * p)
53+
@objective(model, Min, 2x)
54+
optimize!(model)
55+
@show value(x) == 3 * p_val / pc_val
56+
57+
# the function is
58+
# x(p, pc) = 3p / pc
59+
# hence,
60+
# dx/dp = 3 / pc
61+
# dx/dpc = -3p / pc^2
62+
63+
# First, try forward mode AD
64+
65+
# differentiate w.r.t. p
66+
direction_p = 3.0
67+
MOI.set(model, DiffOpt.ForwardConstraintSet(), ParameterRef(p), Parameter(direction_p))
68+
DiffOpt.forward_differentiate!(model)
69+
@show MOI.get(model, DiffOpt.ForwardVariablePrimal(), x) == direction_p * 3 / pc_val
70+
71+
# update p and pc
72+
p_val = 2.0
73+
pc_val = 6.0
74+
set_parameter_value(p, p_val)
75+
set_parameter_value(pc, pc_val)
76+
# re-optimize
77+
optimize!(model)
78+
# check solution
79+
@show value(x) 3 * p_val / pc_val
80+
81+
# stop differentiating with respect to p
82+
DiffOpt.empty_input_sensitivities!(model)
83+
# differentiate w.r.t. pc
84+
direction_pc = 10.0
85+
MOI.set(model, DiffOpt.ForwardConstraintSet(), ParameterRef(pc), Parameter(direction_pc))
86+
DiffOpt.forward_differentiate!(model)
87+
@show abs(MOI.get(model, DiffOpt.ForwardVariablePrimal(), x) -
88+
-direction_pc * 3 * p_val / pc_val^2) < 1e-5
89+
90+
# always a good practice to clear previously set sensitivities
91+
DiffOpt.empty_input_sensitivities!(model)
92+
# Now, reverse model AD
93+
direction_x = 10.0
94+
MOI.set(model, DiffOpt.ReverseVariablePrimal(), x, direction_x)
95+
DiffOpt.reverse_differentiate!(model)
96+
@show MOI.get(model, DiffOpt.ReverseConstraintSet(), ParameterRef(p)) == MOI.Parameter(direction_x * 3 / pc_val)
97+
@show abs(MOI.get(model, DiffOpt.ReverseConstraintSet(), ParameterRef(pc)).value -
98+
-direction_x * 3 * p_val / pc_val^2) < 1e-5
99+
```
100+
101+
### Low level DiffOpt-JuMP API:
102+
103+
A brief example:
35104

36105
```julia
37106
using JuMP, DiffOpt, HiGHS

docs/src/examples/custom-relu.jl

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ function matrix_relu(
3232
@variable(model, x[1:layer_size, 1:batch_size] >= 0)
3333
@objective(model, Min, x[:]'x[:] - 2y[:]'x[:])
3434
optimize!(model)
35-
return value.(x)
35+
return Float32.(value.(x))
3636
end
3737

3838
# Define the reverse differentiation rule, for the function we defined above.
@@ -42,9 +42,9 @@ function ChainRulesCore.rrule(::typeof(matrix_relu), y::Matrix{T}) where {T}
4242
function pullback_matrix_relu(dl_dx)
4343
## some value from the backpropagation (e.g., loss) is denoted by `l`
4444
## so `dl_dy` is the derivative of `l` wrt `y`
45-
x = model[:x] # load decision variable `x` into scope
46-
dl_dy = zeros(T, size(dl_dx))
47-
dl_dq = zeros(T, size(dl_dx))
45+
x = model[:x]::Matrix{JuMP.VariableRef} # load decision variable `x` into scope
46+
dl_dy = zeros(T, size(x))
47+
dl_dq = zeros(T, size(x))
4848
## set sensitivities
4949
MOI.set.(model, DiffOpt.ReverseVariablePrimal(), x[:], dl_dx[:])
5050
## compute grad
@@ -76,13 +76,13 @@ m = Flux.Chain(
7676

7777
N = 1000 # batch size
7878
## Preprocessing train data
79-
imgs = MLDatasets.MNIST.traintensor(1:N)
80-
labels = MLDatasets.MNIST.trainlabels(1:N)
79+
imgs = MLDatasets.MNIST(; split = :train).features[:, :, 1:N]
80+
labels = MLDatasets.MNIST(; split = :train).targets[1:N]
8181
train_X = float.(reshape(imgs, size(imgs, 1) * size(imgs, 2), N)) # stack images
8282
train_Y = Flux.onehotbatch(labels, 0:9);
8383
## Preprocessing test data
84-
test_imgs = MLDatasets.MNIST.testtensor(1:N)
85-
test_labels = MLDatasets.MNIST.testlabels(1:N)
84+
test_imgs = MLDatasets.MNIST(; split = :test).features[:, :, 1:N]
85+
test_labels = MLDatasets.MNIST(; split = :test).targets[1:N];
8686
test_X = float.(reshape(test_imgs, size(test_imgs, 1) * size(test_imgs, 2), N))
8787
test_Y = Flux.onehotbatch(test_labels, 0:9);
8888

@@ -97,19 +97,12 @@ dataset = repeated((train_X, train_Y), epochs);
9797
# ## Network training
9898

9999
# training loss function, Flux optimizer
100-
custom_loss(x, y) = Flux.crossentropy(m(x), y)
101-
opt = Flux.Adam()
102-
evalcb = () -> @show(custom_loss(train_X, train_Y))
100+
custom_loss(m, x, y) = Flux.crossentropy(m(x), y)
101+
opt = Flux.setup(Flux.Adam(), m)
103102

104103
# Train to optimize network parameters
105104

106-
@time Flux.train!(
107-
custom_loss,
108-
Flux.params(m),
109-
dataset,
110-
opt,
111-
cb = Flux.throttle(evalcb, 5),
112-
);
105+
@time Flux.train!(custom_loss, m, dataset, opt);
113106

114107
# Although our custom implementation takes time, it is able to reach similar
115108
# accuracy as the usual ReLU function implementation.

docs/src/examples/polyhedral_project.jl

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ function (polytope::Polytope{N})(
5454
)
5555
@objective(model, Min, dot(x - y, x - y))
5656
optimize!(model)
57-
return JuMP.value.(x)
57+
return Float32.(JuMP.value.(x))
5858
end
5959

6060
# The `@functor` macro from Flux implements auxiliary functions for collecting the parameters of
@@ -75,12 +75,12 @@ function ChainRulesCore.rrule(
7575
model = direct_model(DiffOpt.diff_optimizer(Ipopt.Optimizer))
7676
xv = polytope(y; model = model)
7777
function pullback_matrix_projection(dl_dx)
78-
layer_size, batch_size = size(dl_dx)
7978
dl_dx = ChainRulesCore.unthunk(dl_dx)
8079
## `dl_dy` is the derivative of `l` wrt `y`
81-
x = model[:x]
80+
x = model[:x]::Matrix{JuMP.VariableRef}
81+
layer_size, batch_size = size(x)
8282
## grad wrt input parameters
83-
dl_dy = zeros(size(dl_dx))
83+
dl_dy = zeros(size(x))
8484
## grad wrt layer parameters
8585
dl_dw = zero.(polytope.w)
8686
dl_db = zero(polytope.b)
@@ -122,13 +122,13 @@ m = Flux.Chain(
122122

123123
M = 500 # batch size
124124
## Preprocessing train data
125-
imgs = MLDatasets.MNIST.traintensor(1:M)
126-
labels = MLDatasets.MNIST.trainlabels(1:M);
125+
imgs = MLDatasets.MNIST(; split = :train).features[:, :, 1:M]
126+
labels = MLDatasets.MNIST(; split = :train).targets[1:M]
127127
train_X = float.(reshape(imgs, size(imgs, 1) * size(imgs, 2), M)) # stack images
128128
train_Y = Flux.onehotbatch(labels, 0:9);
129129
## Preprocessing test data
130-
test_imgs = MLDatasets.MNIST.testtensor(1:M)
131-
test_labels = MLDatasets.MNIST.testlabels(1:M)
130+
test_imgs = MLDatasets.MNIST(; split = :test).features[:, :, 1:M]
131+
test_labels = MLDatasets.MNIST(; split = :test).targets[1:M]
132132
test_X = float.(reshape(test_imgs, size(test_imgs, 1) * size(test_imgs, 2), M))
133133
test_Y = Flux.onehotbatch(test_labels, 0:9);
134134

@@ -142,19 +142,12 @@ dataset = repeated((train_X, train_Y), epochs);
142142
# ## Network training
143143

144144
# training loss function, Flux optimizer
145-
custom_loss(x, y) = Flux.crossentropy(m(x), y)
146-
opt = Flux.ADAM()
147-
evalcb = () -> @show(custom_loss(train_X, train_Y))
145+
custom_loss(m, x, y) = Flux.crossentropy(m(x), y)
146+
opt = Flux.setup(Flux.Adam(), m)
148147

149148
# Train to optimize network parameters
150149

151-
@time Flux.train!(
152-
custom_loss,
153-
Flux.params(m),
154-
dataset,
155-
opt,
156-
cb = Flux.throttle(evalcb, 5),
157-
);
150+
@time Flux.train!(custom_loss, m, dataset, opt);
158151

159152
# Although our custom implementation takes time, it is able to reach similar
160153
# accuracy as the usual ReLU function implementation.

docs/src/manual.md

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
As of now, this package only works for optimization models that can be written either in convex conic form or convex quadratic form.
55

66

7-
## Supported objectives & constraints - scheme 1
7+
## Supported objectives & constraints - `QuadraticProgram` backend
88

9-
For `QPTH`/`OPTNET` style backend, the package supports following `Function-in-Set` constraints:
9+
For `QuadraticProgram` backend, the package supports following `Function-in-Set` constraints:
1010

1111
| MOI Function | MOI Set |
1212
|:-------|:---------------|
@@ -26,9 +26,9 @@ and the following objective types:
2626
| `ScalarQuadraticFunction` |
2727

2828

29-
## Supported objectives & constraints - scheme 2
29+
## Supported objectives & constraints - `ConicProgram` backend
3030

31-
For `DiffCP`/`CVXPY` style backend, the package supports following `Function-in-Set` constraints:
31+
For the `ConicProgram` backend, the package supports following `Function-in-Set` constraints:
3232

3333
| MOI Function | MOI Set |
3434
|:-------|:---------------|
@@ -50,19 +50,16 @@ and the following objective types:
5050
| `VariableIndex` |
5151
| `ScalarAffineFunction` |
5252

53+
Other conic sets such as `RotatedSecondOrderCone` and `PositiveSemidefiniteConeSquare` are supported through bridges.
5354

54-
## Creating a differentiable optimizer
55+
56+
## Creating a differentiable MOI optimizer
5557

5658
You can create a differentiable optimizer over an existing MOI solver by using the `diff_optimizer` utility.
5759
```@docs
5860
diff_optimizer
5961
```
6062

61-
## Adding new sets and constraints
62-
63-
The DiffOpt `Optimizer` behaves similarly to other MOI Optimizers
64-
and implements the `MOI.AbstractOptimizer` API.
65-
6663
## Projections on cone sets
6764

6865
DiffOpt requires taking projections and finding projection gradients of vectors while computing the jacobians. For this purpose, we use [MathOptSetDistances.jl](https://github.com/matbesancon/MathOptSetDistances.jl), which is a dedicated package for computing set distances, projections and projection gradients.
@@ -104,6 +101,4 @@ In the light of above, DiffOpt differentiates program variables ``x``, ``s``, ``
104101
- OptNet: Differentiable Optimization as a Layer in Neural Networks
105102

106103
### Backward Pass vector
107-
One possible point of confusion in finding Jacobians is the role of the backward pass vector - above eqn (7), *OptNet: Differentiable Optimization as a Layer in Neural Networks*. While differentiating convex programs, it is often the case that we don't want to find the actual derivatives, rather we might be interested in computing the product of Jacobians with a *backward pass vector*, often used in backprop in machine learning/automatic differentiation. This is what happens in scheme 1 of `DiffOpt` backend.
108-
109-
But, for the conic system (scheme 2), we provide perturbations in conic data (`dA`, `db`, `dc`) to compute pertubations (`dx`, `dy`, `dz`) in input variables. Unlike the quadratic case, these perturbations are actual derivatives, not the product with a backward pass vector. This is an important distinction between the two schemes of differential optimization.
104+
One possible point of confusion in finding Jacobians is the role of the backward pass vector - above eqn (7), *OptNet: Differentiable Optimization as a Layer in Neural Networks*. While differentiating convex programs, it is often the case that we don't want to find the actual derivatives, rather we might be interested in computing the product of Jacobians with a *backward pass vector*, often used in backpropagation in machine learning/automatic differentiation. This is what happens in `DiffOpt` backends.

src/DiffOpt.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,14 @@ import LazyArrays
1212
import LinearAlgebra
1313
import MathOptInterface as MOI
1414
import MathOptSetDistances as MOSD
15+
import ParametricOptInterface as POI
1516
import SparseArrays
1617

1718
include("utils.jl")
1819
include("product_of_sets.jl")
1920
include("diff_opt.jl")
2021
include("moi_wrapper.jl")
22+
include("parameters.jl")
2123
include("jump_moi_overloads.jl")
2224

2325
include("copy_dual.jl")
@@ -40,4 +42,7 @@ end
4042

4143
export diff_optimizer
4244

45+
# TODO
46+
# add precompilation statements
47+
4348
end # module

src/bridges.jl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,25 @@ function MOI.get(
4343
MOI.get(model, attr, bridge.vector_constraint),
4444
)[1]
4545
end
46+
47+
function MOI.set(
48+
model::MOI.ModelLike,
49+
attr::ForwardConstraintFunction,
50+
bridge::MOI.Bridges.Constraint.ScalarizeBridge,
51+
value,
52+
)
53+
MOI.set.(model, attr, bridge.scalar_constraints, value)
54+
return
55+
end
56+
57+
function MOI.get(
58+
model::MOI.ModelLike,
59+
attr::ReverseConstraintFunction,
60+
bridge::MOI.Bridges.Constraint.ScalarizeBridge,
61+
)
62+
return _vectorize(MOI.get.(model, attr, bridge.scalar_constraints))
63+
end
64+
4665
function MOI.get(
4766
model::MOI.ModelLike,
4867
attr::DiffOpt.ReverseConstraintFunction,

src/diff_opt.jl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,17 @@ The output solution differentials can be queried with the attribute
5858
"""
5959
function forward_differentiate! end
6060

61+
"""
62+
empty_input_sensitivities!(model::MOI.ModelLike)
63+
64+
Empty the input sensitivities of the model.
65+
Sets to zero all the sensitivities set by the user with method such as:
66+
- `MOI.set(model, DiffOpt.ReverseVariablePrimal(), variable_index, value)`
67+
- `MOI.set(model, DiffOpt.ForwardObjectiveFunction(), expression)`
68+
- `MOI.set(model, DiffOpt.ForwardConstraintFunction(), index, expression)`
69+
"""
70+
function empty_input_sensitivities! end
71+
6172
"""
6273
ForwardObjectiveFunction <: MOI.AbstractModelAttribute
6374

0 commit comments

Comments
 (0)