From 793548480c3db1e0570327a01c0f372b4a3e1d38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20J=C3=A4rvinen?= Date: Tue, 25 Jul 2023 15:41:45 -0700 Subject: [PATCH 1/6] basic MLJ extension --- Project.toml | 5 ++++ ext/ACEfit_MLJLinearModels_ext.jl | 25 +++++++++++++++++ ext/ACEfit_MLJScikitLearnInterface_ext.jl | 17 ++++++++++++ test/Project.toml | 2 ++ test/runtests.jl | 2 ++ test/test_mlj.jl | 34 +++++++++++++++++++++++ 6 files changed, 85 insertions(+) create mode 100644 ext/ACEfit_MLJLinearModels_ext.jl create mode 100644 ext/ACEfit_MLJScikitLearnInterface_ext.jl create mode 100644 test/test_mlj.jl diff --git a/Project.toml b/Project.toml index b3a9815..753d70e 100644 --- a/Project.toml +++ b/Project.toml @@ -15,10 +15,15 @@ SharedArrays = "1a1011a3-84de-559e-8e89-a11a2f7dc383" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" [weakdeps] +MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7" +MLJLinearModels = "6ee0df7b-362f-4a72-a706-9e79364fb692" +MLJScikitLearnInterface = "5ae90465-5518-4432-b9d2-8a1def2f0cab" PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" [extensions] ACEfit_PythonCall_ext = "PythonCall" +ACEfit_MLJLinearModels_ext = [ "MLJ", "MLJLinearModels" ] +ACEfit_MLJScikitLearnInterface_ext = ["MLJScikitLearnInterface", "PythonCall", "MLJ"] [compat] julia = "1.9" diff --git a/ext/ACEfit_MLJLinearModels_ext.jl b/ext/ACEfit_MLJLinearModels_ext.jl new file mode 100644 index 0000000..96c8399 --- /dev/null +++ b/ext/ACEfit_MLJLinearModels_ext.jl @@ -0,0 +1,25 @@ +module ACEfit_MLJLinearModels_ext + +using MLJ +using ACEfit +using MLJLinearModels + +function ACEfit.solve(solver::Union{ + MLJLinearModels.ElasticNetRegressor, + MLJLinearModels.HuberRegressor, + MLJLinearModels.LADRegressor, + MLJLinearModels.LassoRegressor, + MLJLinearModels.LinearRegressor, + MLJLinearModels.QuantileRegressor, + MLJLinearModels.RidgeRegressor, + MLJLinearModels.RobustRegressor, + }, + A, y) + Atable = MLJ.table(A) + mach = machine(solver, Atable, y) + MLJ.fit!(mach) + params = fitted_params(mach) + return Dict{String, Any}("C" => params.coef ) +end + +end \ No newline at end of file diff --git a/ext/ACEfit_MLJScikitLearnInterface_ext.jl b/ext/ACEfit_MLJScikitLearnInterface_ext.jl new file mode 100644 index 0000000..a5682d2 --- /dev/null +++ b/ext/ACEfit_MLJScikitLearnInterface_ext.jl @@ -0,0 +1,17 @@ +module ACEfit_MLJScikitLearnInterface_ext + +using ACEfit +using MLJ +using MLJScikitLearnInterface +using PythonCall + +function ACEfit.solve(solver, A, y) + Atable = MLJ.table(A) + mach = machine(solver, Atable, y) + MLJ.fit!(mach) + params = fitted_params(mach) + c = params.coef + return Dict{String, Any}("C" => pyconvert(Array, c) ) +end + +end \ No newline at end of file diff --git a/test/Project.toml b/test/Project.toml index a84fdbf..cd149ec 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,4 +1,6 @@ [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7" +MLJLinearModels = "6ee0df7b-362f-4a72-a706-9e79364fb692" PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/test/runtests.jl b/test/runtests.jl index e87eb9f..2039593 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -7,4 +7,6 @@ using Test @testset "Bayesian Linear" begin include("test_bayesianlinear.jl") end @testset "Linear Solvers" begin include("test_linearsolvers.jl") end + + @testset "MLJ Solvers" begin include("test_mlj.jl") end end diff --git a/test/test_mlj.jl b/test/test_mlj.jl new file mode 100644 index 0000000..1f73f98 --- /dev/null +++ b/test/test_mlj.jl @@ -0,0 +1,34 @@ +using ACEfit +using LinearAlgebra +using MLJ +using MLJScikitLearnInterface + +@info("Test MLJ interface on overdetermined system") +Nobs = 10_000 +Nfeat = 100 +A = randn(Nobs, Nfeat) / sqrt(Nobs) +y = randn(Nobs) +P = Diagonal(1.0 .+ rand(Nfeat)) + + +@info(" ... MLJLinearModels LinearRegressor") +LinearRegressor = @load LinearRegressor pkg=MLJLinearModels +solver = LinearRegressor() +solver = ACEfit.LSQR(damp = 0, atol = 1e-6) +results = ACEfit.solve(solver, A, y) +C = results["C"] +@show norm(A * C - y) +@show norm(C) + + +@info(" ... MLJ SKLearn ARD") +ARDRegressor = @load ARDRegressor pkg=MLJScikitLearnInterface +solver = ARDRegressor( + n_iter = 300, + tol = 1e-3, + threshold_lambda = 10000 +) +results = ACEfit.solve(solver, A, y) +C = results["C"] +@show norm(A * C - y) +@show norm(C) \ No newline at end of file From 2a02ea3d68cbee5fe3cf29db1af2c56dc3bd01b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20J=C3=A4rvinen?= Date: Tue, 25 Jul 2023 16:05:06 -0700 Subject: [PATCH 2/6] Update Project.toml --- Project.toml | 3 +++ test/Project.toml | 1 + 2 files changed, 4 insertions(+) diff --git a/Project.toml b/Project.toml index 753d70e..9ee5052 100644 --- a/Project.toml +++ b/Project.toml @@ -28,6 +28,9 @@ ACEfit_MLJScikitLearnInterface_ext = ["MLJScikitLearnInterface", "PythonCall", " [compat] julia = "1.9" IterativeSolvers = "0.9.2" +MLJ = "0.19" +MLJLinearModels = "0.9" +MLJScikitLearnInterface = "0.5" LowRankApprox = "0.5.3" Optim = "1.7" ParallelDataTransfer = "0.5.0" diff --git a/test/Project.toml b/test/Project.toml index cd149ec..2284da9 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -2,5 +2,6 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7" MLJLinearModels = "6ee0df7b-362f-4a72-a706-9e79364fb692" +MLJScikitLearnInterface = "5ae90465-5518-4432-b9d2-8a1def2f0cab" PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" From b9aeb771603d226d9c65029596ea128c6b2aa71b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20J=C3=A4rvinen?= Date: Tue, 25 Jul 2023 16:05:56 -0700 Subject: [PATCH 3/6] Add instruction on how to use MLJ --- docs/src/index.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/docs/src/index.md b/docs/src/index.md index bc52b2e..3fb9969 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -15,6 +15,35 @@ using ACEfit using PythonCall ``` +## MLJ solvers + +To use [MLJ](https://github.com/alan-turing-institute/MLJ.jl) solvers you need to load MLJ in addition to ACEfit + +```julia +using ACEfit +using MLJ +``` + +After that you need to load an appropriate MLJ solver. Take a look on available MLJ [solvers](https://alan-turing-institute.github.io/MLJ.jl/dev/model_browser/). Note that only [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl) and [MLJLinearModels.jl](https://github.com/JuliaAI/MLJLinearModels.jl) have extension available. To use other MLJ solvers please file an issue. + +You need to load the solver and then create a solver structure + +```julia +# Load ARD solver +ARDRegressor = @load ARDRegressor pkg=MLJScikitLearnInterface + +# Create the solver itself and give it parameters +solver = ARDRegressor( + n_iter = 300, + tol = 1e-3, + threshold_lambda = 10000 +) +``` + +After this you can use the MLJ solver like any other solver. + +## Index + ```@index ``` From 15b739df0875ad2942f7ac5db3987d92b47359de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20J=C3=A4rvinen?= Date: Tue, 25 Jul 2023 17:41:03 -0700 Subject: [PATCH 4/6] add documentation strings --- ext/ACEfit_MLJLinearModels_ext.jl | 28 ++++++++++++++++++++++ ext/ACEfit_MLJScikitLearnInterface_ext.jl | 29 +++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/ext/ACEfit_MLJLinearModels_ext.jl b/ext/ACEfit_MLJLinearModels_ext.jl index 96c8399..bb476fa 100644 --- a/ext/ACEfit_MLJLinearModels_ext.jl +++ b/ext/ACEfit_MLJLinearModels_ext.jl @@ -4,6 +4,34 @@ using MLJ using ACEfit using MLJLinearModels +""" + ACEfit.solve(solver, A, y) + +Overloads `ACEfit.solve` to use MLJLinearModels solvers, +when `solver` is [MLJLinearModels](https://github.com/JuliaAI/MLJLinearModels.jl) solver. + +# Example +```julia +using MLJ +using ACEfit + +# Load Lasso solver +LassoRegressor = @load LassoRegressor pkg=MLJLinearModels + +# Create the solver itself and give it parameters +solver = LassoRegressor( + lambda = 0.2, + fit_intercept = false + # insert more fit params +) + +# fit ACE model +linear_fit(training_data, basis, solver) + +# or lower level +ACEfit.fit(solver, A, y) +``` +""" function ACEfit.solve(solver::Union{ MLJLinearModels.ElasticNetRegressor, MLJLinearModels.HuberRegressor, diff --git a/ext/ACEfit_MLJScikitLearnInterface_ext.jl b/ext/ACEfit_MLJScikitLearnInterface_ext.jl index a5682d2..34ae06a 100644 --- a/ext/ACEfit_MLJScikitLearnInterface_ext.jl +++ b/ext/ACEfit_MLJScikitLearnInterface_ext.jl @@ -5,6 +5,35 @@ using MLJ using MLJScikitLearnInterface using PythonCall + +""" + ACEfit.solve(solver, A, y) + +Overloads `ACEfit.solve` to use scikitlearn solvers from MLJ. + +# Example +```julia +using MLJ +using ACEfit + +# Load ARD solver +ARDRegressor = @load ARDRegressor pkg=MLJScikitLearnInterface + +# Create the solver itself and give it parameters +solver = ARDRegressor( + n_iter = 300, + tol = 1e-3, + threshold_lambda = 10000 + # more params +) + +# fit ACE model +linear_fit(training_data, basis, solver) + +# or lower level +ACEfit.fit(solver, A, y) +``` +""" function ACEfit.solve(solver, A, y) Atable = MLJ.table(A) mach = machine(solver, Atable, y) From 44f3ad1cbd2f8915811be370f90655d3b8da4d63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20J=C3=A4rvinen?= Date: Tue, 1 Aug 2023 10:23:10 -0700 Subject: [PATCH 5/6] add warnings to old SKLear to inform coming change to MLJ --- src/solvers.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/solvers.jl b/src/solvers.jl index 89b4e2f..54ac29c 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -126,7 +126,10 @@ struct SKLEARN_BRR n_iter::Integer end -SKLEARN_BRR(; tol = 1e-3, n_iter = 300) = SKLEARN_BRR(tol, n_iter) +function SKLEARN_BRR(; tol = 1e-3, n_iter = 300) + @warn "SKLearn will transition to MLJ in future, please upgrade your script to reflect this." + SKLEARN_BRR(tol, n_iter) +end # solve(solver::SKLEARN_BRR, ...) is implemented in ext/ @@ -140,6 +143,7 @@ struct SKLEARN_ARD end function SKLEARN_ARD(; n_iter = 300, tol = 1e-3, threshold_lambda = 10000) + @warn "SKLearn will transition to MLJ in future, please upgrade your script to reflect this." SKLEARN_ARD(n_iter, tol, threshold_lambda) end From 6f5d58954067557c307e9fb027dd5da19fba23f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20J=C3=A4rvinen?= Date: Fri, 4 Aug 2023 18:11:39 -0700 Subject: [PATCH 6/6] fix tests and parameter extraction --- ext/ACEfit_MLJLinearModels_ext.jl | 2 +- test/test_mlj.jl | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/ext/ACEfit_MLJLinearModels_ext.jl b/ext/ACEfit_MLJLinearModels_ext.jl index bb476fa..067259b 100644 --- a/ext/ACEfit_MLJLinearModels_ext.jl +++ b/ext/ACEfit_MLJLinearModels_ext.jl @@ -47,7 +47,7 @@ function ACEfit.solve(solver::Union{ mach = machine(solver, Atable, y) MLJ.fit!(mach) params = fitted_params(mach) - return Dict{String, Any}("C" => params.coef ) + return Dict{String, Any}("C" => map( x->x.second, params.coefs) ) end end \ No newline at end of file diff --git a/test/test_mlj.jl b/test/test_mlj.jl index 1f73f98..aa9324f 100644 --- a/test/test_mlj.jl +++ b/test/test_mlj.jl @@ -14,7 +14,15 @@ P = Diagonal(1.0 .+ rand(Nfeat)) @info(" ... MLJLinearModels LinearRegressor") LinearRegressor = @load LinearRegressor pkg=MLJLinearModels solver = LinearRegressor() -solver = ACEfit.LSQR(damp = 0, atol = 1e-6) +results = ACEfit.solve(solver, A, y) +C = results["C"] +@show norm(A * C - y) +@show norm(C) + + +@info(" ... MLJLinearModels LassoRegressor") +LassoRegressor = @load LassoRegressor pkg=MLJLinearModels +solver = LassoRegressor() results = ACEfit.solve(solver, A, y) C = results["C"] @show norm(A * C - y)