JuliaSmoothOptimizers · amontoison · Aug 3, 2024
diff --git a/src/RipQP.jl b/src/RipQP.jl
@@ -16,7 +16,13 @@ using HSL,
 
 using Requires
 function __init__()
-  @require CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" include("gpu_utils.jl")
+  @require CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" begin
+    include("gpu_utils.jl")
+    @require CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e" begin
+      include("iterations/solvers/sparse_fact_utils/cudss_utils.jl")
+      include("iterations/solvers/augmented/K2CUDSS.jl")
+    end
+  end
   @require QDLDL = "bfc457fd-c171-5ab7-bd9e-d5dbfc242d63" include(
     "iterations/solvers/sparse_fact_utils/qdldl_utils.jl",
   )

diff --git a/src/iterations/solvers/augmented/K2CUDSS.jl b/src/iterations/solvers/augmented/K2CUDSS.jl
@@ -0,0 +1,76 @@
+# Formulation K2: (if regul==:classic, adds additional regularization parmeters -ρ (top left) and δ (bottom right))
+# [-Q - D     A' ] [x] = rhs
+# [ A         0  ] [y]
+export K2CUDSSParams
+
+"""
+Type to use the K2 formulation with an LDLᵀ factorization of cuDSS.
+The package [`CUDSS.jl`](https://github.com/exanauts/CUDSS.jl) is used by default.
+The outer constructor is
+
+    sp = K2CUDSSParams(uplo, ρ, δ)
+"""
+struct K2CUDSSParams{T<:Real} <: SolverParams
+    uplo :: Symbol # mandatory, tells RipQP which triangle of the augmented system to store
+    ρ    :: T # dual regularization
+    δ    :: T # primal regularization
+end
+
+mutable struct PreallocatedDataK2CUDSS{T<:Real, S} <: RipQP.PreallocatedDataAugmented{T, S}
+    D      :: S # temporary top-left diagonal of the K2 system
+    ρ      :: T # dual regularization
+    δ      :: T # primal regularization
+    K      :: CuSparseMatrixCSR{T,Cint} # K2 matrix
+    K_fact :: CudssSolver{T} # factorized K2
+end
+
+function RipQP.PreallocatedData(sp :: K2CUDSSParams, fd :: RipQP.QM_FloatData{T},
+                                id :: RipQP.QM_IntData, itd :: RipQP.IterData{T},
+                                pt :: RipQP.Point{T},
+                                iconf :: InputConfig{Tconf}) where {T<:Real, Tconf<:Real}
+
+    ρ, δ = T(sp.ρ), T(sp.δ)
+    K = CUDA.spzeros(T, id.ncon+id.nvar, id.ncon + id.nvar)
+    K[1:id.nvar, 1:id.nvar] = .-fd.Q .- ρ .* Diagonal(ones(T, id.nvar))
+    # A = Aᵀ of the input QuadraticModel since we use the upper triangle:
+    K[1:id.nvar, id.nvar+1:end] = fd.A 
+    K[diagind(K)[id.nvar+1:end]] .= δ
+
+    K_fact = ldlt(Symmetric(K, sp.data))
+    K_fact.__factorized = true
+
+    return PreallocatedDataK2CUDSS(CUDA.zeros(T, id.nvar),
+                                    ρ,
+                                    δ,
+                                    K, #K
+                                    K_fact #K_fact
+                                    )
+end
+
+function RipQP.update_pad!(pad :: PreallocatedDataK2basic{T}, dda :: RipQP.DescentDirectionAllocs{T},
+                           pt :: RipQP.Point{T}, itd :: RipQP.IterData{T},
+                           fd :: RipQP.Abstract_QM_FloatData{T}, id :: RipQP.QM_IntData,
+                           res :: RipQP.Residuals{T}, cnts :: RipQP.Counters) where {T<:Real}
+
+    # update the diagonal of K2
+    pad.D .= -pad.ρ
+    pad.D[id.ilow] .-= pt.s_l ./ itd.x_m_lvar
+    pad.D[id.iupp] .-= pt.s_u ./ itd.uvar_m_x
+    pad.D .-= fd.Q[diagind(fd.Q)]
+    pad.K[diagind(pad.K)[1:id.nvar]] = pad.D
+    pad.K[diagind(pad.K)[id.nvar+1:end]] .= pad.δ
+
+    # factorize K2
+    ldlt!(pad.K_fact, Symmetric(pad.K, :U))
+
+end
+
+function RipQP.solver!(dd :: AbstractVector{T}, pad :: PreallocatedDataK2basic{T},
+                       dda :: RipQP.DescentDirectionAllocsPC{T}, pt :: RipQP.Point{T},
+                       itd :: RipQP.IterData{T}, fd :: RipQP.Abstract_QM_FloatData{T},
+                       id :: RipQP.QM_IntData, res :: RipQP.Residuals{T},
+                       cnts :: RipQP.Counters, step :: Symbol) where {T<:Real}
+
+    ldiv!(pad.K_fact, dd)
+    return 0
+end
diff --git a/src/iterations/solvers/sparse_fact_utils/abstract-factorization.jl b/src/iterations/solvers/sparse_fact_utils/abstract-factorization.jl
@@ -1,4 +1,5 @@
-export AbstractFactorization, LDLFact, HSLMA57Fact, HSLMA97Fact, CholmodFact, QDLDLFact, LLDLFact
+export AbstractFactorization, LDLFact, HSLMA57Fact, HSLMA97Fact
+export CholmodFact, QDLDLFact, LLDLFact, CUDSSFact
 
 import LinearAlgebra.ldiv!
 
@@ -118,3 +119,19 @@ end
 LLDLFact(; regul::Symbol = :classic, mem::Int = 0, droptol::Float64 = 0.0) =
   LLDLFact(regul, mem, droptol)
 include("lldlfact_utils.jl")
+
+"""
+    fact_alg = CUDSSFact(; regul = :classic)
+
+Choose [`CUDSS.jl`](https://github.com/exanauts/CUDSS.jl) to perform LDLᵀ and Cholesky decompositions.
+"""
+struct CUDSSFact <: AbstractFactorization
+  regul::Symbol
+  structure::Char
+  view::Char
+  function CUDSSFact(regul::Symbol, structure::String, view::Char)
+    regul == :classic || regul == :none || error("regul should be :classic or :none")
+    return new(regul, structure, view)
+  end
+end
+CUDSSFact(; regul::Symbol = :classic, structure::String = "S", view::Char = 'U') = CUDSSFact(regul, structure, view)
diff --git a/src/iterations/solvers/sparse_fact_utils/cudss_utils.jl b/src/iterations/solvers/sparse_fact_utils/cudss_utils.jl
@@ -0,0 +1,57 @@
+using .CUDSS
+
+get_uplo(fact_alg::CUDSSFact) = fact_alg.view
+
+mutable struct CUDSSFactorization{T} <: FactorizationData{T}
+  F::CudssSolver{T}
+  type::Symbol
+  initialized::Bool
+  factorized::Bool
+end
+
+init_fact(K::Symmetric{T, CuSparseMatrixCSR{T, Cint}}, fact_alg::CUDSSFact, ::Type{T}) where {T} =
+  if fact_alg.structure == "S"
+    F = ldlt(K; view=fact_alg.view)
+    type = :ldlt
+  elseif fact_alg.structure == "SPD"
+    F = cholesky(K; view=fact_alg.view)
+    type = :cholesky
+  else
+    error("The provided structure is not supported by cuDSS.")
+  end
+  CUDSSFactorization(F, type, false, true)
+
+init_fact(K::Symmetric{T, CuSparseMatrixCSR{T, Cint}}, fact_alg::CUDSSFact) where {T} =
+  if fact_alg.structure == "S"
+    F = ldlt(K; view=fact_alg.view)
+    type = :ldlt
+  elseif fact_alg.structure == "SPD"
+    F = cholesky(K; view=fact_alg.view)
+    type = :cholesky
+  else
+    error("The provided structure is not supported by cuDSS.")
+  end
+  CUDSSFactorization(F, type, false, true)
+
+function generic_factorize!(
+  K::Symmetric{T, CuSparseMatrixCSR{T, Cint}},
+  K_fact::CUDSSFactorization{T},
+) where {T}
+  if K_fact.initialized
+    K_fact.facCUDSSFactorizationtorized = false
+    try
+      (type == :ldlt) && ldlt!(K_fact.F, K.data)
+      (type == :cholesky) && cholesky!(K_fact.F, K.data)
+      K_fact.factorized = true
+    catch
+      K_fact.factorized = false
+    end
+  end
+  !K_fact.initialized && (K_fact.initialized = true)
+end
+
+RipQP.factorized(K_fact::CUDSSFactorization) = K_fact.factorized
+
+function ldiv!(K_fact::CUDSSFactorization, dd::CuVector)
+  ldiv!(K_fact.F, dd)
+end