Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix simulate! when only the estimable coefficients are provided #756

Merged
merged 18 commits into from
Mar 25, 2024
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
MixedModels v4.23.1 Release Notes
==============================
* Fix for `simulate!` when only the estimable coefficients for a rank-deficient model are provided. [#756]
* Improve handling of rank deficiency in GLMM. [#756]
* Fix display of GLMM bootstrap without a dispersion parameter.

MixedModels v4.23.0 Release Notes
==============================
* Support for rank deficiency in the parametric bootstrap. [#755]
Expand Down Expand Up @@ -509,3 +515,4 @@ Package dependencies
[#744]: https://github.com/JuliaStats/MixedModels.jl/issues/744
[#748]: https://github.com/JuliaStats/MixedModels.jl/issues/748
[#755]: https://github.com/JuliaStats/MixedModels.jl/issues/755
[#756]: https://github.com/JuliaStats/MixedModels.jl/issues/756
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "MixedModels"
uuid = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316"
author = ["Phillip Alday <[email protected]>", "Douglas Bates <[email protected]>", "Jose Bayoan Santiago Calderon <[email protected]>"]
version = "4.23.0"
version = "4.23.1"

[deps]
Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
Expand Down
14 changes: 8 additions & 6 deletions src/bootstrap.jl
Original file line number Diff line number Diff line change
Expand Up @@ -626,11 +626,12 @@ function σρ!(v::AbstractVector, d::Diagonal, σ)
return append!(v, σ .* d.diag)
end

function σρ!(v::AbstractVector{T}, t::LowerTriangular{T}, σ::T) where {T}
"""
σρ!(v, t, σ)
push! `σ` times the row lengths (σs) and the inner products of normalized rows (ρs) of `t` onto `v`
"""
"""
σρ!(v, t, σ)

push! `σ` times the row lengths (σs) and the inner products of normalized rows (ρs) of `t` onto `v`
"""
function σρ!(v::AbstractVector{<:Union{T,Missing}}, t::LowerTriangular, σ) where {T}
dat = t.data
for i in axes(dat, 1)
ssqr = zero(T)
Expand Down Expand Up @@ -659,11 +660,12 @@ end

function pbstrtbl(bsamp::MixedModelFitCollection{T}) where {T}
(; fits, λ) = bsamp
Tfull = ismissing(first(bsamp.fits).σ) ? Union{T,Missing} : T
λcp = copy.(λ)
syms = _syms(bsamp)
m = length(syms)
n = length(fits)
v = sizehint!(T[], m * n)
v = sizehint!(Tfull[], m * n)
for f in fits
(; β, θ, σ) = f
push!(v, f.objective)
Expand Down
13 changes: 7 additions & 6 deletions src/generalizedlinearmixedmodel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,7 @@ function GeneralizedLinearMixedModel(
LMM.optsum,
)
end
X = view(LMM.X, :, 1:LMM.feterm.rank)
palday marked this conversation as resolved.
Show resolved Hide resolved
# if the response is constant, there's no point (and this may even fail)
# we allow this instead of simply failing so that a constant response can
# be used as the starting point to simulation where the response will be
Expand All @@ -439,11 +440,11 @@ function GeneralizedLinearMixedModel(
# XXX unfortunately, this means we have double-rank deficiency detection
# TODO: construct GLM by hand so that we skip collinearity checks
# TODO: extend this so that we never fit a GLM when initializing from LMM
dofit = size(LMM.X, 2) != 0 # GLM.jl kwarg
gl = glm(LMM.X, y, d, l;
wts=convert(Vector{T}, wts),
dofit,
offset=convert(Vector{T}, offset))
dofit = size(X, 2) != 0 # GLM.jl kwarg
gl = glm(X, y, d, l;
wts=convert(Vector{T}, wts),
dofit,
offset=convert(Vector{T}, offset))
palday marked this conversation as resolved.
Show resolved Hide resolved
β = dofit ? coef(gl) : T[]
u = [fill(zero(eltype(y)), vsize(t), nlevs(t)) for t in LMM.reterms]
# vv is a template vector used to initialize fields for AGQ
Expand Down Expand Up @@ -793,7 +794,7 @@ function updateη!(m::GeneralizedLinearMixedModel{T}) where {T}
b = m.b
u = m.u
reterms = m.LMM.reterms
mul!(η, modelmatrix(m), m.β)
mul!(η, view(modelmatrix(m), :, 1:m.LMM.feterm.rank), m.β)
palday marked this conversation as resolved.
Show resolved Hide resolved
for i in eachindex(b)
mul!(η, reterms[i], vec(mul!(b[i], reterms[i].λ, u[i])), one(T), one(T))
end
Expand Down
2 changes: 2 additions & 0 deletions src/linearmixedmodel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -904,6 +904,8 @@ Overwrite `v` with the conditional modes of the random effects for `m`.

If `uscale` is `true` the random effects are on the spherical (i.e. `u`) scale, otherwise
on the original scale

`β` is the truncated, pivoted coefficient vector.
"""
function ranef!(
v::Vector, m::LinearMixedModel{T}, β::AbstractArray{T}, uscale::Bool
Expand Down
13 changes: 6 additions & 7 deletions src/simulate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,8 @@ function simulate!(
isempty(θ) || setθ!(m, θ)

if length(β) ≠ length(m.feterm.piv)
padding = length(model.feterm.piv) - m.feterm.rank
append!(β, fill(-0.0, padding))
β = invpermute!(copyto!(fill(-0.0, length(m.feterm.piv)), β),
palday marked this conversation as resolved.
Show resolved Hide resolved
m.feterm.piv)
end

# initialize y to standard normal
Expand Down Expand Up @@ -247,11 +247,10 @@ function _simulate!(

d = m.resp.d

if length(β) length(m.feterm.piv)
padding = length(model.feterm.piv) - m.feterm.rank
append!(β, fill(-0.0, padding))
if length(β) == length(m.feterm.piv)
# unlike LMM, GLMM stores the truncated, pivoted vector directly
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tangential to this PR but it could be convenient to have some kind of pivot accessor to retrieve this array.

β = β[view(m.feterm.piv, 1:m.feterm.rank)]
palday marked this conversation as resolved.
Show resolved Hide resolved
end

fast = (length(m.θ) == length(m.optsum.final))
setpar! = fast ? setθ! : setβθ!
params = fast ? θ : vcat(β, θ)
Expand All @@ -271,7 +270,7 @@ function _simulate!(
# add fixed-effects contribution
# note that unit scaling may not be correct for
# families with a dispersion parameter
mul!(η, lm.X, β, one(T), one(T))
mul!(η, view(lm.X, :, 1:lm.feterm.rank), β, one(T), one(T))
palday marked this conversation as resolved.
Show resolved Hide resolved

μ = resp === nothing ? linkinv.(Link(m), η) : GLM.updateμ!(resp, η).mu

Expand Down
27 changes: 17 additions & 10 deletions test/bootstrap.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ using Random
using Statistics
using StableRNGs
using Statistics
using Suppressor
using Tables
using Test

Expand Down Expand Up @@ -224,16 +225,22 @@ end
@testset "Rank deficient" begin
rng = MersenneTwister(0);
x = rand(rng, 100);
data = (x = x, x2 = 1.5 .* x, y = rand(rng, 100), z = repeat('A':'T', 5))
model = @suppress fit(MixedModel, @formula(y ~ x + x2 + (1|z)), data; progress=false)
boot = quickboot(model, 10)

dropped_idx = model.feterm.piv[end]
dropped_coef = coefnames(model)[dropped_idx]
@test all(boot.β) do nt
# if we're the dropped coef, then we must be -0.0
# need isequal because of -0.0
return nt.coefname != dropped_coef || isequal(nt.β, -0.0)
data = (x = x, x2 = 1.5 .* x, y = rand(rng, [0,1], 100), z = repeat('A':'T', 5))
@testset "$family" for family in [Normal(), Bernoulli()]
model = @suppress fit(MixedModel, @formula(y ~ x + x2 + (1|z)), data, family; progress=false)
boot = quickboot(model, 10)

dropped_idx = model.feterm.piv[end]
dropped_coef = coefnames(model)[dropped_idx]
@test all(boot.β) do nt
# if we're the dropped coef, then we must be -0.0
# need isequal because of -0.0
return nt.coefname != dropped_coef || isequal(nt.β, -0.0)
palday marked this conversation as resolved.
Show resolved Hide resolved
end

yc = simulate(StableRNG(1), model; β=coef(model))
yf = simulate(StableRNG(1), model; β=fixef(model))
@test all(x -> isapprox(x...), zip(yc, yf))
end
end
end
Expand Down
Loading