diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 883418267..563a6b81e 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -30,8 +30,7 @@ jobs: version: ${{ matrix.julia-version }} - uses: julia-actions/cache@v1 with: - cache-compiled: "true" - - uses: julia-actions/julia-buildpkg@v1 + cache-compiled: true - uses: julia-actions/julia-runtest@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore index 119a53c1a..a4fdd21fb 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ docs/src/assets/*.svg test/scratch.jl tune.json Manifest.toml +Manifest-v1.*.toml settings.json docs/jmd LocalPreferences.toml diff --git a/NEWS.md b/NEWS.md index e880cbc99..8df424dd8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +MixedModels v4.23.0 Release Notes +============================== +* Support for rank deficiency in the parametric bootstrap. [#755] + MixedModels v4.22.5 Release Notes ============================== * Use `muladd` where possible to enable fused multiply-add (FMA) on architectures with hardware support. FMA will generally improve computational speed and gives more accurate rounding. [#740] @@ -504,3 +508,4 @@ Package dependencies [#740]: https://github.com/JuliaStats/MixedModels.jl/issues/740 [#744]: https://github.com/JuliaStats/MixedModels.jl/issues/744 [#748]: https://github.com/JuliaStats/MixedModels.jl/issues/748 +[#755]: https://github.com/JuliaStats/MixedModels.jl/issues/755 diff --git a/Project.toml b/Project.toml index 30112b4d9..10dac745d 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MixedModels" uuid = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316" author = ["Phillip Alday ", "Douglas Bates ", "Jose Bayoan Santiago Calderon "] -version = "4.22.5" +version = "4.23.0" [deps] Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" diff --git a/src/bootstrap.jl b/src/bootstrap.jl index b009199ef..f5af4730b 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -80,7 +80,7 @@ been constructed with the same structure as the source of the saved replicates. The two-argument method constructs a [`MixedModelBootstrap`](@ref) with the same eltype as `m`. If an eltype is specified as the third argument, then a `MixedModelBootstrap` is returned. -If a subtype of `MixedModelFitCollection` is specified as the third argument, then that +If a subtype of `MixedModelFitCollection` is specified as the third argument, then that is the return type. See also [`savereplicates`](@ref), [`restoreoptsum!`](@ref). @@ -91,7 +91,7 @@ end # why this weird second method? it allows us to define custom types and write methods # to load into those types directly. For example, we could define a `PowerAnalysis <: MixedModelFitCollection` -# in MixedModelsSim and then overload this method to get a convenient object. +# in MixedModelsSim and then overload this method to get a convenient object. # Also, this allows us to write `restorereplicateS(f, m, ::Type{<:MixedModelNonparametricBootstrap})` for # entities in MixedModels bootstrap function restorereplicates( @@ -120,14 +120,14 @@ end """ savereplicates(f, b::MixedModelFitCollection) -Save the replicates associated with a [`MixedModelFitCollection`](@ref), -e.g. [`MixedModelBootstrap`](@ref) as an Arrow file. +Save the replicates associated with a [`MixedModelFitCollection`](@ref), +e.g. [`MixedModelBootstrap`](@ref) as an Arrow file. See also [`restorereplicates`](@ref), [`saveoptsum`](@ref) !!! note **Only** the replicates are saved, not the entire contents of the `MixedModelFitCollection`. - `restorereplicates` requires a model compatible with the bootstrap to restore the full object. + `restorereplicates` requires a model compatible with the bootstrap to restore the full object. """ savereplicates(f, b::MixedModelFitCollection) = Arrow.write(f, b.fits) @@ -200,6 +200,14 @@ fit during the bootstrapping process. For example, `optsum_overrides=(;ftol_rel= reduces the convergence criterion, which can greatly speed up the bootstrap fits. Taking advantage of this speed up to increase `n` can often lead to better estimates of coverage intervals. + + +!!! note + All coefficients are bootstrapped. In the rank deficient case, the inestimatable coefficients are + treated as -0.0 in the simulations underlying the bootstrap, which will generally result + in their estimate from the simulated data also being being inestimable and thus set to -0.0. + **However this behavior may change in future releases to explicitly drop the + extraneous columns before simulation and thus not include their estimates in the bootstrap result.** """ function parametricbootstrap( rng::AbstractRNG, @@ -234,7 +242,7 @@ function parametricbootstrap( # this seemed to slow things down?! # _copy_away_from_lowerbd!(m.optsum.initial, morig.optsum.final, m.lowerbd; incr=0.05) - β_names = Tuple(Symbol.(fixefnames(morig))) + β_names = Tuple(Symbol.(coefnames(morig))) use_threads && Base.depwarn( "use_threads is deprecated and will be removed in a future release", @@ -247,7 +255,7 @@ function parametricbootstrap( ( objective=ftype.(m.objective), σ=ismissing(m.σ) ? missing : ftype(m.σ), - β=NamedTuple{β_names}(fixef!(βsc, m)), + β=NamedTuple{β_names}(coef!(βsc, m)), se=SVector{p,ftype}(stderror!(βsc, m)), θ=SVector{k,ftype}(getθ!(θsc, m)), ) @@ -331,8 +339,8 @@ end Compute bootstrap confidence intervals for coefficients and variance components, with confidence level level (by default 95%). -The keyword argument `method` determines whether the `:shortest`, i.e. highest density, interval is used -or the `:equaltail`, i.e. quantile-based, interval is used. For historical reasons, the default is `:shortest`, +The keyword argument `method` determines whether the `:shortest`, i.e. highest density, interval is used +or the `:equaltail`, i.e. quantile-based, interval is used. For historical reasons, the default is `:shortest`, but `:equaltail` gives the interval that is most comparable to the profile and Wald confidence intervals. !!! note @@ -621,7 +629,7 @@ end function σρ!(v::AbstractVector{T}, t::LowerTriangular{T}, σ::T) where {T} """ σρ!(v, t, σ) - push! `σ` times the row lengths (σs) and the inner products of normalized rows (ρs) of `t` onto `v` + push! `σ` times the row lengths (σs) and the inner products of normalized rows (ρs) of `t` onto `v` """ dat = t.data for i in axes(dat, 1) diff --git a/src/generalizedlinearmixedmodel.jl b/src/generalizedlinearmixedmodel.jl index 6486fd4a0..7973fecaf 100644 --- a/src/generalizedlinearmixedmodel.jl +++ b/src/generalizedlinearmixedmodel.jl @@ -113,8 +113,8 @@ StatsAPI.deviance(m::GeneralizedLinearMixedModel) = deviance(m, m.optsum.nAGQ) fixef(m::GeneralizedLinearMixedModel) = m.β -function fixef!(v::AbstractVector{T}, m::GeneralizedLinearMixedModel{T}) where {T} - return copyto!(fill!(v, -zero(T)), m.β) +function fixef!(v::AbstractVector{Tv}, m::GeneralizedLinearMixedModel{T}) where {Tv,T} + return copyto!(fill!(v, -zero(Tv)), m.β) end objective(m::GeneralizedLinearMixedModel) = deviance(m) diff --git a/src/linearmixedmodel.jl b/src/linearmixedmodel.jl index 3827f41ec..558a434e3 100644 --- a/src/linearmixedmodel.jl +++ b/src/linearmixedmodel.jl @@ -267,8 +267,12 @@ function StatsAPI.fit( end function StatsAPI.coef(m::LinearMixedModel{T}) where {T} + return coef!(Vector{T}(undef, length(m.feterm.piv)), m) +end + +function coef!(v::AbstractVector{Tv}, m::MixedModel{T}) where {Tv,T} piv = m.feterm.piv - return invpermute!(fixef!(similar(piv, T), m), piv) + return invpermute!(fixef!(v, m), piv) end βs(m::LinearMixedModel) = NamedTuple{(Symbol.(coefnames(m))...,)}(coef(m)) diff --git a/src/simulate.jl b/src/simulate.jl index 1906ab943..58a93c757 100644 --- a/src/simulate.jl +++ b/src/simulate.jl @@ -110,9 +110,6 @@ scale. The `wts` argument is currently ignored except for `GeneralizedLinearMixedModel` models with a `Binomial` distribution. -!!! warning - Models are assumed to be full rank. - !!! note Note that `simulate!` methods with a `y::AbstractVector` as the first argument (besides the RNG) and `simulate` methods return the simulated response. This is @@ -152,8 +149,7 @@ end function simulate!( rng::AbstractRNG, y::AbstractVector, m::LinearMixedModel{T}; β=m.β, σ=m.σ, θ=m.θ ) where {T} - length(β) == length(fixef(m)) || - length(β) == length(coef(m)) || + length(β) == length(m.feterm.piv) || length(β) == m.feterm.rank || throw(ArgumentError("You must specify all (non-singular) βs")) β = convert(Vector{T}, β) @@ -161,11 +157,9 @@ function simulate!( θ = convert(Vector{T}, θ) isempty(θ) || setθ!(m, θ) - if length(β) ≠ length(coef(m)) - padding = length(coef(m)) - length(β) - for ii in 1:padding - push!(β, -0.0) - end + if length(β) ≠ length(m.feterm.piv) + padding = length(model.feterm.piv) - m.feterm.rank + append!(β, fill(-0.0, padding)) end # initialize y to standard normal @@ -234,8 +228,7 @@ function _simulate!( θ, resp=nothing, ) where {T} - length(β) == length(fixef(m)) || - length(β) == length(coef(m)) || + length(β) == length(m.feterm.piv) || length(β) == m.feterm.rank || throw(ArgumentError("You must specify all (non-singular) βs")) dispersion_parameter(m) || @@ -254,11 +247,9 @@ function _simulate!( d = m.resp.d - if length(β) ≠ length(coef(m)) - padding = length(coef(m)) - length(β) - for ii in 1:padding - push!(β, -0.0) - end + if length(β) ≠ length(m.feterm.piv) + padding = length(model.feterm.piv) - m.feterm.rank + append!(β, fill(-0.0, padding)) end fast = (length(m.θ) == length(m.optsum.final)) diff --git a/test/bootstrap.jl b/test/bootstrap.jl index f3a0da076..6683d7a9b 100644 --- a/test/bootstrap.jl +++ b/test/bootstrap.jl @@ -189,7 +189,7 @@ end @test pb1 == restorereplicates(seekstart(io), MixedModels.unfit!(deepcopy(m1))) @test pb1 ≈ restorereplicates(seekstart(io), m1, Float16) rtol=1 -end + end @testset "Bernoulli simulate! and GLMM bootstrap" begin contra = dataset(:contra) @@ -220,6 +220,22 @@ end σ=2, progress=false) @test sum(issingular(bs)) == 0 end + + @testset "Rank deficient" begin + rng = MersenneTwister(0); + x = rand(rng, 100); + data = (x = x, x2 = 1.5 .* x, y = rand(rng, 100), z = repeat('A':'T', 5)) + model = @suppress fit(MixedModel, @formula(y ~ x + x2 + (1|z)), data; progress=false) + boot = quickboot(model, 10) + + dropped_idx = model.feterm.piv[end] + dropped_coef = coefnames(model)[dropped_idx] + @test all(boot.β) do nt + # if we're the dropped coef, then we must be -0.0 + # need isequal because of -0.0 + return nt.coefname != dropped_coef || isequal(nt.β, -0.0) + end + end end @testset "show and summary" begin @@ -236,7 +252,7 @@ end @test nrow(df) == 151 @test propertynames(df) == collect(propertynames(pr.tbl)) - @testset "CI method comparison" begin + @testset "CI method comparison" begin level = 0.68 ci_boot_equaltail = confint(pb; level, method=:equaltail) ci_boot_shortest = confint(pb; level, method=:shortest)