JuliaDecisionFocusedLearning
diff --git a/‎Project.toml‎
Lines changed: 5 additions & 13 deletions b/‎Project.toml‎
Lines changed: 5 additions & 13 deletions
diff --git a/‎debug_dagger.jl‎ b/‎debug_dagger.jl‎
diff --git a/‎scripts/main.jl‎
Lines changed: 9 additions & 95 deletions b/‎scripts/main.jl‎
Lines changed: 9 additions & 95 deletions
diff --git a/‎scripts/old/main.jl‎
Lines changed: 107 additions & 0 deletions b/‎scripts/old/main.jl‎
Lines changed: 107 additions & 0 deletions
diff --git a/‎scripts/main3.jl‎ ‎scripts/old/main3.jl‎scripts/main3.jl renamed to scripts/old/main3.jl b/‎scripts/main3.jl‎ ‎scripts/old/main3.jl‎scripts/main3.jl renamed to scripts/old/main3.jl
diff --git a/‎scripts/maine.jl‎ ‎scripts/old/maine.jl‎scripts/maine.jl renamed to scripts/old/maine.jl b/‎scripts/maine.jl‎ ‎scripts/old/maine.jl‎scripts/maine.jl renamed to scripts/old/maine.jl
diff --git a/‎scripts/tb.jl‎ ‎scripts/old/tb.jl‎scripts/tb.jl renamed to scripts/old/tb.jl b/‎scripts/tb.jl‎ ‎scripts/old/tb.jl‎scripts/tb.jl renamed to scripts/old/tb.jl
diff --git a/‎src/DecisionFocusedLearningAlgorithms.jl‎
Lines changed: 2 additions & 1 deletion b/‎src/DecisionFocusedLearningAlgorithms.jl‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/fyl.jl‎
Lines changed: 17 additions & 7 deletions b/‎src/fyl.jl‎
Lines changed: 17 additions & 7 deletions
diff --git a/‎src/metric.jl‎
Lines changed: 5 additions & 0 deletions b/‎src/metric.jl‎
Lines changed: 5 additions & 0 deletions
@@ -1,7 +1,10 @@
 name = "DecisionFocusedLearningAlgorithms"
 uuid = "46d52364-bc3b-4fac-a992-eb1d3ef2de15"
 authors = ["Members of JuliaDecisionFocusedLearning and contributors"]
-version = "0.0.1"
+version = "0.1.0"
+
+[workspace]
+projects = ["docs", "test"]
 
 [deps]
 DecisionFocusedLearningBenchmarks = "2fbe496a-299b-4c81-bab5-c44dfc55cf20"
@@ -15,7 +18,7 @@ UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"
 ValueHistories = "98cad3c8-aec3-5f06-8e41-884608649ab7"
 
 [compat]
-DecisionFocusedLearningBenchmarks = "0.3.0"
+DecisionFocusedLearningBenchmarks = "0.4"
 Flux = "0.16.5"
 InferOpt = "0.7.1"
 MLUtils = "0.4.8"
@@ -25,14 +28,3 @@ Statistics = "1.11.1"
 UnicodePlots = "3.8.1"
 ValueHistories = "0.5.4"
 julia = "1.11"
-
-[extras]
-Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
-Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
-JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b"
-JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"
-
-[targets]
-test = ["Aqua", "Documenter", "JET", "JuliaFormatter", "Test", "TestItemRunner"]
@@ -1,107 +1,21 @@
 using DecisionFocusedLearningAlgorithms
 using DecisionFocusedLearningBenchmarks
+
+using Flux
 using MLUtils
-using Statistics
 using Plots
 
-# ! metric(prediction, data_sample)
-
 b = ArgmaxBenchmark()
 initial_model = generate_statistical_model(b)
 maximizer = generate_maximizer(b)
 dataset = generate_dataset(b, 100)
-train_dataset, val_dataset, _ = splitobs(dataset; at=(0.3, 0.3, 0.4))
-res, model = fyl_train_model(
-    initial_model, maximizer, train_dataset, val_dataset; epochs=100
-)
-
-res = fyl_train_model(StochasticVehicleSchedulingBenchmark(); epochs=100)
-plot(res.validation_loss; label="Validation Loss")
-plot!(res.training_loss; label="Training Loss")
-
-baty_train_model(DynamicVehicleSchedulingBenchmark(; two_dimensional_features=false))
-DAgger_train_model(DynamicVehicleSchedulingBenchmark(; two_dimensional_features=false))
-
-struct KleopatraPolicy{M}
-    model::M
-end
-
-function (m::KleopatraPolicy)(env)
-    x, instance = observe(env)
-    θ = m.model(x)
-    return maximizer(θ; instance)
-end
-
-b = DynamicVehicleSchedulingBenchmark(; two_dimensional_features=false)
-dataset = generate_dataset(b, 100)
-train_instances, validation_instances, test_instances = splitobs(
-    dataset; at=(0.3, 0.3, 0.4)
-)
-train_environments = generate_environments(b, train_instances; seed=0)
-validation_environments = generate_environments(b, validation_instances)
-test_environments = generate_environments(b, test_instances)
-
-train_dataset = vcat(map(train_environments) do env
-    v, y = generate_anticipative_solution(b, env; reset_env=true)
-    return y
-end...)
-
-val_dataset = vcat(map(validation_environments) do env
-    v, y = generate_anticipative_solution(b, env; reset_env=true)
-    return y
-end...)
+train_dataset, val_dataset, test_dataset = splitobs(dataset; at=(0.3, 0.3, 0.4))
 
-model = generate_statistical_model(b; seed=0)
-maximizer = generate_maximizer(b)
-anticipative_policy = (env; reset_env) -> generate_anticipative_solution(b, env; reset_env)
-
-fyl_model = deepcopy(model)
-fyl_policy = Policy("fyl", "", KleopatraPolicy(fyl_model))
-
-callbacks = [
-    Metric(:obj, (data, ctx) -> mean(evaluate_policy!(fyl_policy, test_environments, 1)[1]))
-]
-
-fyl_history = fyl_train_model!(
-    fyl_model, maximizer, train_dataset, val_dataset; epochs=100, callbacks
-)
-
-dagger_model = deepcopy(model)
-dagger_policy = Policy("dagger", "", KleopatraPolicy(dagger_model))
-
-callbacks = [
-    Metric(
-        :obj, (data, ctx) -> mean(evaluate_policy!(dagger_policy, test_environments, 1)[1])
-    ),
-]
-
-dagger_history = DAgger_train_model!(
-    dagger_model,
-    maximizer,
-    train_environments,
-    validation_environments,
-    anticipative_policy;
-    iterations=10,
-    fyl_epochs=10,
-    callbacks=callbacks,
-)
-
-# Extract metric values for plotting
-fyl_epochs, fyl_obj_values = get(fyl_history, :val_obj)
-dagger_epochs, dagger_obj_values = get(dagger_history, :val_obj)
-
-plot(
-    [fyl_epochs, dagger_epochs],
-    [fyl_obj_values, dagger_obj_values];
-    labels=["FYL" "DAgger"],
-    xlabel="Epoch",
-    ylabel="Test Average Reward (1 scenario)",
+algorithm = PerturbedImitationAlgorithm(;
+    nb_samples=20, ε=0.05, threaded=true, training_optimizer=Adam()
 )
 
-using Statistics
-v_fyl, _ = evaluate_policy!(fyl_policy, test_environments, 100)
-v_dagger, _ = evaluate_policy!(dagger_policy, test_environments, 100)
-mean(v_fyl)
-mean(v_dagger)
-
-anticipative_policy(test_environments[1]; reset_env=true)
+model = deepcopy(initial_model)
+history = train!(algorithm, model, maximizer, train_dataset, val_dataset; epochs=50)
+x, y = get(history, :training_loss)
+plot(x, y; xlabel="Epoch", ylabel="Training Loss", title="Training Loss over Epochs")
@@ -0,0 +1,107 @@
+using DecisionFocusedLearningAlgorithms
+using DecisionFocusedLearningBenchmarks
+using MLUtils
+using Statistics
+using Plots
+
+# ! metric(prediction, data_sample)
+
+b = ArgmaxBenchmark()
+initial_model = generate_statistical_model(b)
+maximizer = generate_maximizer(b)
+dataset = generate_dataset(b, 100)
+train_dataset, val_dataset, _ = splitobs(dataset; at=(0.3, 0.3, 0.4))
+res, model = fyl_train_model(
+    initial_model, maximizer, train_dataset, val_dataset; epochs=100
+)
+
+res = fyl_train_model(StochasticVehicleSchedulingBenchmark(); epochs=100)
+plot(res.validation_loss; label="Validation Loss")
+plot!(res.training_loss; label="Training Loss")
+
+baty_train_model(DynamicVehicleSchedulingBenchmark(; two_dimensional_features=false))
+DAgger_train_model(DynamicVehicleSchedulingBenchmark(; two_dimensional_features=false))
+
+struct KleopatraPolicy{M}
+    model::M
+end
+
+function (m::KleopatraPolicy)(env)
+    x, instance = observe(env)
+    θ = m.model(x)
+    return maximizer(θ; instance)
+end
+
+b = DynamicVehicleSchedulingBenchmark(; two_dimensional_features=false)
+dataset = generate_dataset(b, 100)
+train_instances, validation_instances, test_instances = splitobs(
+    dataset; at=(0.3, 0.3, 0.4)
+)
+train_environments = generate_environments(b, train_instances; seed=0)
+validation_environments = generate_environments(b, validation_instances)
+test_environments = generate_environments(b, test_instances)
+
+train_dataset = vcat(map(train_environments) do env
+    v, y = generate_anticipative_solution(b, env; reset_env=true)
+    return y
+end...)
+
+val_dataset = vcat(map(validation_environments) do env
+    v, y = generate_anticipative_solution(b, env; reset_env=true)
+    return y
+end...)
+
+model = generate_statistical_model(b; seed=0)
+maximizer = generate_maximizer(b)
+anticipative_policy = (env; reset_env) -> generate_anticipative_solution(b, env; reset_env)
+
+fyl_model = deepcopy(model)
+fyl_policy = Policy("fyl", "", KleopatraPolicy(fyl_model))
+
+callbacks = [
+    Metric(:obj, (data, ctx) -> mean(evaluate_policy!(fyl_policy, test_environments, 1)[1]))
+]
+
+fyl_history = fyl_train_model!(
+    fyl_model, maximizer, train_dataset, val_dataset; epochs=100, callbacks
+)
+
+dagger_model = deepcopy(model)
+dagger_policy = Policy("dagger", "", KleopatraPolicy(dagger_model))
+
+callbacks = [
+    Metric(
+        :obj, (data, ctx) -> mean(evaluate_policy!(dagger_policy, test_environments, 1)[1])
+    ),
+]
+
+dagger_history = DAgger_train_model!(
+    dagger_model,
+    maximizer,
+    train_environments,
+    validation_environments,
+    anticipative_policy;
+    iterations=10,
+    fyl_epochs=10,
+    callbacks=callbacks,
+)
+
+# Extract metric values for plotting
+fyl_epochs, fyl_obj_values = get(fyl_history, :val_obj)
+dagger_epochs, dagger_obj_values = get(dagger_history, :val_obj)
+
+plot(
+    [fyl_epochs, dagger_epochs],
+    [fyl_obj_values, dagger_obj_values];
+    labels=["FYL" "DAgger"],
+    xlabel="Epoch",
+    ylabel="Test Average Reward (1 scenario)",
+)
+
+using Statistics
+v_fyl, _ = evaluate_policy!(fyl_policy, test_environments, 100)
+v_dagger, _ = evaluate_policy!(dagger_policy, test_environments, 100)
+mean(v_fyl)
+mean(v_dagger)
+
+anticipative_policy(test_environments[1]; reset_env=true)
@@ -1,7 +1,6 @@
 module DecisionFocusedLearningAlgorithms
 
 using DecisionFocusedLearningBenchmarks
-const DVSP = DecisionFocusedLearningBenchmarks.DynamicVehicleScheduling
 using Flux: Flux, Adam
 using InferOpt: InferOpt, FenchelYoungLoss, PerturbedAdditive
 using MLUtils: splitobs
@@ -22,4 +21,6 @@ export fyl_train_model!,
 export TrainingCallback, Metric, on_epoch_end, get_metric_names, run_callbacks!
 export TrainingContext, update_context
 
+export PerturbedImitationAlgorithm, train!
+
 end
@@ -5,23 +5,33 @@
 # TODO: parallelize loss computation on validation set
 # TODO: have supervised learning training method, where fyl_train calls it, therefore we can easily test new supervised losses if needed
 
-function fyl_train_model!(
+@kwdef struct PerturbedImitationAlgorithm{O}
+    nb_samples::Int = 10
+    ε::Float64 = 0.1
+    threaded::Bool = true
+    training_optimizer::O = Adam()
+    history::MVHistory = MVHistory()
+end
+
+reset!(algorithm::PerturbedImitationAlgorithm) = empty!(algorithm.history)
+
+function train!(
+    algorithm::PerturbedImitationAlgorithm,
     model,
     maximizer,
     train_dataset::AbstractArray{<:DataSample},
     validation_dataset;
     epochs=100,
     maximizer_kwargs=get_info,
     callbacks::Vector{<:TrainingCallback}=TrainingCallback[],
+    reset=false,
 )
-    perturbed = PerturbedAdditive(maximizer; nb_samples=10, ε=0.1, threaded=true)  # ! hardcoded
+    reset && reset!(algorithm)
+    (; nb_samples, ε, threaded, training_optimizer, history) = algorithm
+    perturbed = PerturbedAdditive(maximizer; nb_samples, ε, threaded)
     loss = FenchelYoungLoss(perturbed)
 
-    optimizer = Adam()  # ! hardcoded
-    opt_state = Flux.setup(optimizer, model)
-
-    # Initialize metrics storage with MVHistory
-    history = MVHistory()
+    opt_state = Flux.setup(training_optimizer, model)
 
     # Compute initial losses
     initial_val_loss = mean([
 
@@ -0,0 +1,5 @@
+abstract type AbstractMetric end
+
+function reset!(metric::AbstractMetric) end
+function update!(metric::AbstractMetric; kwargs...) end
+function evaluate!(metric::AbstractMetric, policy, dataset; kwargs...) end