From beaaa36be5195a11618769ae9aafcf683b8bc755 Mon Sep 17 00:00:00 2001 From: Phillip Alday <palday@users.noreply.github.com> Date: Mon, 25 Mar 2024 14:56:12 -0500 Subject: [PATCH] Fix `simulate!` when only the estimable coefficients are provided (#756) * tinker * fix simulate when only the non pivoted coefs are provided * tests * patch bump and NEWS * improve rank deficiency handling in GLMM * more news * readability Co-authored-by: Alex Arslan <ararslan@comcast.net> * add pivot accessor * good point, @ararslan * more fullrankx --------- Co-authored-by: Alex Arslan <ararslan@comcast.net> --- NEWS.md | 7 +++++++ Project.toml | 2 +- src/Xymat.jl | 11 +++++++++++ src/bootstrap.jl | 14 ++++++++------ src/generalizedlinearmixedmodel.jl | 9 +++++---- src/linearmixedmodel.jl | 10 ++++++---- src/predict.jl | 6 +++--- src/simulate.jl | 18 +++++++++--------- test/bootstrap.jl | 27 +++++++++++++++++---------- 9 files changed, 67 insertions(+), 37 deletions(-) diff --git a/NEWS.md b/NEWS.md index 8df424dd8..cc2f91e03 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +MixedModels v4.23.1 Release Notes +============================== +* Fix for `simulate!` when only the estimable coefficients for a rank-deficient model are provided. [#756] +* Improve handling of rank deficiency in GLMM. [#756] +* Fix display of GLMM bootstrap without a dispersion parameter. [#756] + MixedModels v4.23.0 Release Notes ============================== * Support for rank deficiency in the parametric bootstrap. [#755] @@ -509,3 +515,4 @@ Package dependencies [#744]: https://github.com/JuliaStats/MixedModels.jl/issues/744 [#748]: https://github.com/JuliaStats/MixedModels.jl/issues/748 [#755]: https://github.com/JuliaStats/MixedModels.jl/issues/755 +[#756]: https://github.com/JuliaStats/MixedModels.jl/issues/756 diff --git a/Project.toml b/Project.toml index 10dac745d..7020d82ac 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MixedModels" uuid = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316" author = ["Phillip Alday <me@phillipalday.com>", "Douglas Bates <dmbates@gmail.com>", "Jose Bayoan Santiago Calderon <jbs3hp@virginia.edu>"] -version = "4.23.0" +version = "4.23.1" [deps] Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" diff --git a/src/Xymat.jl b/src/Xymat.jl index b85d81d8d..79ddffbc4 100644 --- a/src/Xymat.jl +++ b/src/Xymat.jl @@ -61,11 +61,22 @@ Base.copyto!(A::FeTerm{T}, src::AbstractVecOrMat{T}) where {T} = copyto!(A.x, sr Base.eltype(::FeTerm{T}) where {T} = T +""" + pivot(m::MixedModel) + pivot(A::FeTerm) + +Return the pivot associated with the FeTerm. +""" +@inline pivot(m::MixedModel) = pivot(m.feterm) +@inline pivot(A::FeTerm) = A.piv + function fullrankx(A::FeTerm) x, rnk = A.x, A.rank return rnk == size(x, 2) ? x : view(x, :, 1:rnk) # this handles the zero-columns case end +fullrankx(m::MixedModel) = fullrankx(m.feterm) + LinearAlgebra.rank(A::FeTerm) = A.rank """ diff --git a/src/bootstrap.jl b/src/bootstrap.jl index f5af4730b..269c5567f 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -626,11 +626,12 @@ function σρ!(v::AbstractVector, d::Diagonal, σ) return append!(v, σ .* d.diag) end -function σρ!(v::AbstractVector{T}, t::LowerTriangular{T}, σ::T) where {T} - """ - σρ!(v, t, σ) - push! `σ` times the row lengths (σs) and the inner products of normalized rows (ρs) of `t` onto `v` - """ +""" + σρ!(v, t, σ) + +push! `σ` times the row lengths (σs) and the inner products of normalized rows (ρs) of `t` onto `v` +""" +function σρ!(v::AbstractVector{<:Union{T,Missing}}, t::LowerTriangular, σ) where {T} dat = t.data for i in axes(dat, 1) ssqr = zero(T) @@ -659,11 +660,12 @@ end function pbstrtbl(bsamp::MixedModelFitCollection{T}) where {T} (; fits, λ) = bsamp + Tfull = ismissing(first(bsamp.fits).σ) ? Union{T,Missing} : T λcp = copy.(λ) syms = _syms(bsamp) m = length(syms) n = length(fits) - v = sizehint!(T[], m * n) + v = sizehint!(Tfull[], m * n) for f in fits (; β, θ, σ) = f push!(v, f.objective) diff --git a/src/generalizedlinearmixedmodel.jl b/src/generalizedlinearmixedmodel.jl index 7973fecaf..e33429b81 100644 --- a/src/generalizedlinearmixedmodel.jl +++ b/src/generalizedlinearmixedmodel.jl @@ -54,7 +54,7 @@ struct GeneralizedLinearMixedModel{T<:AbstractFloat,D<:Distribution} <: MixedMod end function StatsAPI.coef(m::GeneralizedLinearMixedModel{T}) where {T} - piv = m.LMM.feterm.piv + piv = pivot(m) return invpermute!(copyto!(fill(T(-0.0), length(piv)), m.β), piv) end @@ -426,6 +426,7 @@ function GeneralizedLinearMixedModel( LMM.optsum, ) end + X = fullrankx(LMM.feterm) # if the response is constant, there's no point (and this may even fail) # we allow this instead of simply failing so that a constant response can # be used as the starting point to simulation where the response will be @@ -439,8 +440,8 @@ function GeneralizedLinearMixedModel( # XXX unfortunately, this means we have double-rank deficiency detection # TODO: construct GLM by hand so that we skip collinearity checks # TODO: extend this so that we never fit a GLM when initializing from LMM - dofit = size(LMM.X, 2) != 0 # GLM.jl kwarg - gl = glm(LMM.X, y, d, l; + dofit = size(X, 2) != 0 # GLM.jl kwarg + gl = glm(X, y, d, l; wts=convert(Vector{T}, wts), dofit, offset=convert(Vector{T}, offset)) @@ -793,7 +794,7 @@ function updateη!(m::GeneralizedLinearMixedModel{T}) where {T} b = m.b u = m.u reterms = m.LMM.reterms - mul!(η, modelmatrix(m), m.β) + mul!(η, fullrankx(m), m.β) for i in eachindex(b) mul!(η, reterms[i], vec(mul!(b[i], reterms[i].λ, u[i])), one(T), one(T)) end diff --git a/src/linearmixedmodel.jl b/src/linearmixedmodel.jl index 558a434e3..cc95b1a1a 100644 --- a/src/linearmixedmodel.jl +++ b/src/linearmixedmodel.jl @@ -267,11 +267,11 @@ function StatsAPI.fit( end function StatsAPI.coef(m::LinearMixedModel{T}) where {T} - return coef!(Vector{T}(undef, length(m.feterm.piv)), m) + return coef!(Vector{T}(undef, length(pivot(m))), m) end function coef!(v::AbstractVector{Tv}, m::MixedModel{T}) where {Tv,T} - piv = m.feterm.piv + piv = pivot(m) return invpermute!(fixef!(v, m), piv) end @@ -904,6 +904,8 @@ Overwrite `v` with the conditional modes of the random effects for `m`. If `uscale` is `true` the random effects are on the spherical (i.e. `u`) scale, otherwise on the original scale + +`β` is the truncated, pivoted coefficient vector. """ function ranef!( v::Vector, m::LinearMixedModel{T}, β::AbstractArray{T}, uscale::Bool @@ -1241,12 +1243,12 @@ function stderror!(v::AbstractVector{Tv}, m::LinearMixedModel{T}) where {Tv,T} scr[i] = true v[i] = s * norm(ldiv!(L, scr)) end - invpermute!(v, m.feterm.piv) + invpermute!(v, pivot(m)) return v end function StatsAPI.stderror(m::LinearMixedModel{T}) where {T} - return stderror!(similar(m.feterm.piv, T), m) + return stderror!(similar(pivot(m), T), m) end """ diff --git a/src/predict.jl b/src/predict.jl index d540af353..5a2246ac6 100644 --- a/src/predict.jl +++ b/src/predict.jl @@ -70,7 +70,7 @@ Similarly, offsets are also not supported for `GeneralizedLinearMixedModel`. function StatsAPI.predict( m::LinearMixedModel, newdata::Tables.ColumnTable; new_re_levels=:missing ) - return _predict(m, newdata, coef(m)[m.feterm.piv]; new_re_levels) + return _predict(m, newdata, coef(m)[pivot(m)]; new_re_levels) end function StatsAPI.predict( @@ -81,7 +81,7 @@ function StatsAPI.predict( ) type in (:linpred, :response) || throw(ArgumentError("Invalid value for type: $(type)")) # want pivoted but not truncated - y = _predict(m.LMM, newdata, coef(m)[m.feterm.piv]; new_re_levels) + y = _predict(m.LMM, newdata, coef(m)[pivot(m)]; new_re_levels) return type == :linpred ? y : broadcast!(Base.Fix1(linkinv, Link(m)), y, y) end @@ -126,7 +126,7 @@ function _predict(m::MixedModel{T}, newdata, β; new_re_levels) where {T} ytemp, lmm end - pivotmatch = mnew.feterm.piv[m.feterm.piv] + pivotmatch = pivot(mnew)[pivot(m)] grps = fnames(m) mul!(y, view(mnew.X, :, pivotmatch), β) # mnew.reterms for the correct Z matrices diff --git a/src/simulate.jl b/src/simulate.jl index 58a93c757..1b4159e1e 100644 --- a/src/simulate.jl +++ b/src/simulate.jl @@ -149,7 +149,7 @@ end function simulate!( rng::AbstractRNG, y::AbstractVector, m::LinearMixedModel{T}; β=m.β, σ=m.σ, θ=m.θ ) where {T} - length(β) == length(m.feterm.piv) || length(β) == m.feterm.rank || + length(β) == length(pivot(m)) || length(β) == m.feterm.rank || throw(ArgumentError("You must specify all (non-singular) βs")) β = convert(Vector{T}, β) @@ -157,9 +157,10 @@ function simulate!( θ = convert(Vector{T}, θ) isempty(θ) || setθ!(m, θ) - if length(β) ≠ length(m.feterm.piv) - padding = length(model.feterm.piv) - m.feterm.rank + if length(β) ≠ length(pivot(m)) + padding = length(pivot(m)) - rank(m) append!(β, fill(-0.0, padding)) + invpermute!(β, pivot(m)) end # initialize y to standard normal @@ -228,7 +229,7 @@ function _simulate!( θ, resp=nothing, ) where {T} - length(β) == length(m.feterm.piv) || length(β) == m.feterm.rank || + length(β) == length(pivot(m)) || length(β) == m.feterm.rank || throw(ArgumentError("You must specify all (non-singular) βs")) dispersion_parameter(m) || @@ -247,11 +248,10 @@ function _simulate!( d = m.resp.d - if length(β) ≠ length(m.feterm.piv) - padding = length(model.feterm.piv) - m.feterm.rank - append!(β, fill(-0.0, padding)) + if length(β) == length(pivot(m)) + # unlike LMM, GLMM stores the truncated, pivoted vector directly + β = view(β, view(pivot(m), 1:(rank(m)))) end - fast = (length(m.θ) == length(m.optsum.final)) setpar! = fast ? setθ! : setβθ! params = fast ? θ : vcat(β, θ) @@ -271,7 +271,7 @@ function _simulate!( # add fixed-effects contribution # note that unit scaling may not be correct for # families with a dispersion parameter - mul!(η, lm.X, β, one(T), one(T)) + mul!(η, fullrankx(lm), β, one(T), one(T)) μ = resp === nothing ? linkinv.(Link(m), η) : GLM.updateμ!(resp, η).mu diff --git a/test/bootstrap.jl b/test/bootstrap.jl index 6683d7a9b..576aad918 100644 --- a/test/bootstrap.jl +++ b/test/bootstrap.jl @@ -5,6 +5,7 @@ using Random using Statistics using StableRNGs using Statistics +using Suppressor using Tables using Test @@ -224,16 +225,22 @@ end @testset "Rank deficient" begin rng = MersenneTwister(0); x = rand(rng, 100); - data = (x = x, x2 = 1.5 .* x, y = rand(rng, 100), z = repeat('A':'T', 5)) - model = @suppress fit(MixedModel, @formula(y ~ x + x2 + (1|z)), data; progress=false) - boot = quickboot(model, 10) - - dropped_idx = model.feterm.piv[end] - dropped_coef = coefnames(model)[dropped_idx] - @test all(boot.β) do nt - # if we're the dropped coef, then we must be -0.0 - # need isequal because of -0.0 - return nt.coefname != dropped_coef || isequal(nt.β, -0.0) + data = (x = x, x2 = 1.5 .* x, y = rand(rng, [0,1], 100), z = repeat('A':'T', 5)) + @testset "$family" for family in [Normal(), Bernoulli()] + model = @suppress fit(MixedModel, @formula(y ~ x + x2 + (1|z)), data, family; progress=false) + boot = quickboot(model, 10) + + dropped_idx = model.feterm.piv[end] + dropped_coef = coefnames(model)[dropped_idx] + @test all(boot.β) do nt + # if we're the dropped coef, then we must be -0.0 + # need isequal because of -0.0 + return nt.coefname != dropped_coef || isequal(nt.β, -0.0) + end + + yc = simulate(StableRNG(1), model; β=coef(model)) + yf = simulate(StableRNG(1), model; β=fixef(model)) + @test all(x -> isapprox(x...), zip(yc, yf)) end end end