diff --git a/Project.toml b/Project.toml index 49fed8a..96ed0c6 100644 --- a/Project.toml +++ b/Project.toml @@ -8,7 +8,9 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" [compat] +Aqua = "0.8" Distributions = "0.25" +HypothesisTests = "0.11" SpecialFunctions = "2" Test = "1.6" TestItemRunner = "0.2" @@ -16,8 +18,9 @@ julia = "1.6.7" [extras] Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a" [targets] -test = ["Aqua", "Test", "TestItemRunner"] +test = ["Aqua", "Test", "TestItemRunner", "HypothesisTests"] diff --git a/docs/make.jl b/docs/make.jl index fe28ffa..e0f0cc5 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -23,9 +23,11 @@ makedocs(; "index.md", "utils.md", "Implemented Distributions" => [ + "distros/PCHD.md", "distros/GenGamma.md", "distros/PGW.md", "distros/EW.md", + "distros/LogLogistic.md", ], "references.md" ], diff --git a/docs/src/distros/LogLogistic.md b/docs/src/distros/LogLogistic.md new file mode 100644 index 0000000..7d9791b --- /dev/null +++ b/docs/src/distros/LogLogistic.md @@ -0,0 +1,36 @@ +# LogLogistic + +## Definition + +The [LogLogistic](https://en.wikipedia.org/wiki/Log-logistic_distribution) distribution is the probability distribution of a random variable whose logarithm has a logistic distribution. It is similar in shape to the log-normal distribution but has heavier tails. + +It is used in survival analysis as a parametric model for events whose rate increases initially and decreases later, as, for example, mortality rate from cancer following diagnosis or treatment. It has also been used in hydrology to model stream flow and precipitation, in economics as a simple model of the distribution of wealth or income, and in networking to model the transmission times of data considering both the network and the software. + +## Examples + +Let us sample a dataset from an Exponentiated Weibull distribution: + +```@example 1 +using SurvivalDistributions, Distributions, Random, Plots, StatsBase +Random.seed!(123) +D = LogLogistic(1,2) +sim = rand(D,1000); +``` + +First, let's have a look at the hazard function: +```@example 1 +plot(t -> hazard(D,t), ylabel = "Hazard", xlims = (0,10)) +``` + +Then, we can verify the coherence of our code by comparing the obtained sample and the true pdf: +```@example 1 +histogram(sim, normalize=:pdf, bins = range(0, 5, length=30)) +plot!(t -> pdf(D,t), ylabel = "Density", xlims = (0,5)) +``` + +We could also compare the empirical and theroetical cdfs: +```@example 1 +ecdfsim = ecdf(sim) +plot(x -> ecdfsim(x), 0, 5, label = "ECDF", linecolor = "gray", linewidth=3) +plot!(t -> cdf(D,t), xlabel = "x", ylabel = "CDF vs. ECDF", xlims = (0,5)) +``` \ No newline at end of file diff --git a/docs/src/distros/PCHD.md b/docs/src/distros/PCHD.md new file mode 100644 index 0000000..638e3f4 --- /dev/null +++ b/docs/src/distros/PCHD.md @@ -0,0 +1,39 @@ +# Piecewise constant hazard distributions + +## Definition + +The `PiecewiseConstantHazardDistribution` is one of the most simple and yet most usefull distribution provided in this package. These distributions are defined by their hazard functions, which are assumed to be piecewise constant (hence their names). + +While dealing with census data and rate tables, having a survival model defined by a piecewise constant hazard is very common. In particular, random lifes extracted from `RateTable`s from [`RateTables.jl`](https://github.com/JuliaSurv/RateTables.jl) follows this pattern. + + +## Examples + +```@example 1 +using SurvivalDistributions, Distributions, Random, Plots, StatsBase +Random.seed!(123) +∂t = rand(20) +λ = rand(20) +D = PiecewiseConstantHazardDistribution(∂t,λ) +sim = rand(D,1000); +``` + +First, let's have a look at the hazard function: +```@example 1 +plot(t -> hazard(D,t), ylabel = "Hazard", xlims = (0,10)) +``` + +As excepted, it is quite random. + +Then, we can verify the coherence of our code by comparing the obtained sample and the true pdf: +```@example 1 +histogram(sim, normalize=:pdf, bins = range(0, 5, length=30)) +plot!(t -> pdf(D,t), ylabel = "Density", xlims = (0,5)) +``` + +The comparison is not too bad ! We could also compare the empirical and theroetical cdfs: +```@example 1 +ecdfsim = ecdf(sim) +plot(x -> ecdfsim(x), 0, 5, label = "ECDF", linecolor = "gray", linewidth=3) +plot!(t -> cdf(D,t), xlabel = "x", ylabel = "CDF vs. ECDF", xlims = (0,5)) +``` \ No newline at end of file diff --git a/src/SurvivalDistributions.jl b/src/SurvivalDistributions.jl index 7cd19b4..dbc9899 100644 --- a/src/SurvivalDistributions.jl +++ b/src/SurvivalDistributions.jl @@ -4,19 +4,23 @@ module SurvivalDistributions using SpecialFunctions: loggamma - using Distributions: logcdf, logccdf, ccdf, cdf, pdf, logpdf, quantile, rand, ContinuousUnivariateDistribution, UnivariateDistribution, @distr_support, AbstractRNG, Weibull, Gamma, Logistic - import Distributions: logcdf, logccdf, ccdf, cdf, pdf, logpdf, quantile, rand + using Distributions: logcdf, logccdf, ccdf, cdf, pdf, logpdf, quantile, rand, ContinuousUnivariateDistribution, UnivariateDistribution, @distr_support, AbstractRNG, Weibull, Gamma, Logistic, expectation + import Distributions: logcdf, logccdf, ccdf, cdf, pdf, logpdf, quantile, rand, expectation # Export a few utilities : include("utilities.jl") export censored_loglikelihood, hazard, loghazard, cumhazard # Export other distributions: + include("distros/AbstractHazardDistribution.jl") + include("distros/PiecewiseConstantHazardDistribution.jl") include("distros/ExpoDist.jl") + include("distros/ExponentiatedWeibull.jl") include("distros/GeneralizedGamma.jl") include("distros/PowerGeneralizedWeibull.jl") include("distros/LogLogistic.jl") - export ExpoDist, ExponentiatedWeibull, GeneralizedGamma, PowerGeneralizedWeibull, LogLogistic + + export ExpoDist, ExponentiatedWeibull, GeneralizedGamma, PowerGeneralizedWeibull, LogLogistic, PiecewiseConstantHazardDistribution end diff --git a/src/distros/AbstractHazardDistribution.jl b/src/distros/AbstractHazardDistribution.jl new file mode 100644 index 0000000..2764593 --- /dev/null +++ b/src/distros/AbstractHazardDistribution.jl @@ -0,0 +1,16 @@ +abstract type AbstractHazardDistribution <: ContinuousUnivariateDistribution end +@distr_support AbstractHazardDistribution 0.0 Inf +loghazard(X::AbstractHazardDistribution, t::Real) = log(hazard(X,t)) +cumhazard(X::AbstractHazardDistribution, t::Real) = quadgk(u -> hazard(X,u), 0, t)[1] +logccdf( X::AbstractHazardDistribution, t::Real) = -cumhazard(X,t) +ccdf( X::AbstractHazardDistribution, t::Real) = exp(-cumhazard(X,t)) +cdf( X::AbstractHazardDistribution, t::Real) = -expm1(-cumhazard(X,t)) +logcdf( X::AbstractHazardDistribution, t::Real) = log1mexp(-cumhazard(X,t)) +pdf( X::AbstractHazardDistribution, t::Real) = hazard(X,t)*ccdf(X,t) +logpdf( X::AbstractHazardDistribution, t::Real) = loghazard(X,t) - cumhazard(X,t) +function quantile( X::AbstractHazardDistribution, t::Real) + u = log(1-t) + return find_zero(x -> u + cumhazard(X,x), (0.0, Inf)) +end + + diff --git a/src/distros/ExpoDist.jl b/src/distros/ExpoDist.jl index 691bf01..74f551d 100644 --- a/src/distros/ExpoDist.jl +++ b/src/distros/ExpoDist.jl @@ -1,7 +1,7 @@ """ -ExpoDist(γ, X) + ExpoDist(γ, X) - A power distribution with power γ and base distribution X<:ContinuousUnivariateDistribution is defined as the distribution that has cumulative distribution function ``F^γ`` where F was the distribution function of X. +A power distribution with power γ and base distribution X<:ContinuousUnivariateDistribution is defined as the distribution that has cumulative distribution function ``F^γ`` where F was the distribution function of X. """ struct ExpoDist{D} <: ContinuousUnivariateDistribution γ::Float64 diff --git a/src/distros/ExponentiatedWeibull.jl b/src/distros/ExponentiatedWeibull.jl index c63221e..36c0f2c 100644 --- a/src/distros/ExponentiatedWeibull.jl +++ b/src/distros/ExponentiatedWeibull.jl @@ -1,18 +1,13 @@ """ - ExponentiatedWeibull(sigma,nu,gamma) + ExponentiatedWeibull(α,θ,γ) -The *ExponentiatedWeibull distribution* with scale `sigma`, shape `nu` and second shape `gamma` has probability density function +The [Exponentiated Weibull distribution](https://en.wikipedia.org/wiki/Exponentiated_Weibull_distribution) is obtain by exponentiating the cdf of the [Weibull distribution](https://en.wikipedia.org/wiki/Weibull_distribution). This simple transformation adds a second shape parameter that, interestingly, induces a lot of flexibility on the hazard function. The hazard function of the Exponentiated Weibull distribution can capture the basic shapes: constant, increasing, decreasing, bathtub, and unimodal, making it appealing for survival models. -```math -f(x; parameters) = ... -``` - -More details and examples of usage could be provided in this docstring. - -Maybe this distribution could simply be constructed from a transformation of the original Weibull ? +A random variable X follows an `ExponentiatedWeibull(α,θ,γ)` distribution when it has cumulative distribution function ``F_X = F_W^{γ}`` where ``F_W`` is the cumulative distribution function of a `Weibull(α,θ)`. References: -* [Link to my reference so that people understand what it is](https://myref.com) +* [Exponentiated Weibull distribution](https://en.wikipedia.org/wiki/Exponentiated_Weibull_distribution) +* [Weibull distribution](https://en.wikipedia.org/wiki/Weibull_distribution) """ const ExponentiatedWeibull{T} = ExpoDist{Weibull{T}} -ExponentiatedWeibull(sigma,nu,gamma) = ExpoDist(gamma, Weibull(nu,sigma)) \ No newline at end of file +ExponentiatedWeibull(α,θ,γ) = ExpoDist(γ, Weibull(α,θ)) \ No newline at end of file diff --git a/src/distros/GeneralizedGamma.jl b/src/distros/GeneralizedGamma.jl index 314aa0a..ad2ec9f 100644 --- a/src/distros/GeneralizedGamma.jl +++ b/src/distros/GeneralizedGamma.jl @@ -1,18 +1,11 @@ """ - GeneralizedGamma(sigma,nu,gamma) + GeneralizedGamma(σ,nu,gamma) -The *GeneralizedGamma distribution* with scale `sigma`, shape `nu` and second shape `gamma` has probability density function - -```math -f(x; parameters) = ... -``` - -More details and examples of usage could be provided in this docstring. - -Maybe this distribution could simply be constructed from a transformation of the original Weibull ? +The [Generalised Gamma](https://en.wikipedia.org/wiki/Generalized_gamma_distribution) (GG) distribution is a three-parameter distribution with support on ``{\\mathbb R}_+``. The corresponding hazard function can accommodate bathtub, unimodal and monotone (increasing and decreasing) hazard shapes. The GG distribution has become popular in survival analysis due to its flexibility. References: -* [Link to my reference so that people understand what it is](https://myref.com) +* [Generalised Gamma](https://en.wikipedia.org/wiki/Generalized_gamma_distribution) +* [stacy:1962](@cite) Stacy, E.W. A generalization of the gamma distribution, *The Annals of Mathematical Statistics*, 1962 """ struct GeneralizedGamma{T<:Real} <: ContinuousUnivariateDistribution sigma::T diff --git a/src/distros/LogLogistic.jl b/src/distros/LogLogistic.jl index 82f378b..5b71979 100644 --- a/src/distros/LogLogistic.jl +++ b/src/distros/LogLogistic.jl @@ -1,34 +1,38 @@ """ - LogLogistic(mu,sigma) + LogLogistic(μ,σ) -To be described... +According to [its wikipedia page](https://en.wikipedia.org/wiki/Log-logistic_distribution), the the log-logistic distribution (known as the Fisk distribution in economics) is a continuous probability distribution for a non-negative random variable. It is used in survival analysis as a parametric model for events whose rate increases initially and decreases later, as, for example, mortality rate from cancer following diagnosis or treatment. It has also been used in hydrology to model stream flow and precipitation, in economics as a simple model of the distribution of wealth or income, and in networking to model the transmission times of data considering both the network and the software. +The log-logistic distribution is the probability distribution of a random variable whose logarithm has a logistic distribution. It is similar in shape to the log-normal distribution but has heavier tails. Unlike the log-normal, its cumulative distribution function can be written in closed form. + +It is characterized by its density function as ```math -f(x; parameters) = ... +f(x) = \\frac{(\\frac{β}{α})(\\frac{x}{α})^{β-1} }{(1 + (\\frac{x}{α})^{β})^2}, ``` +where α = e^μ and β = 1/σ. """ struct LogLogistic{T<:Real} <: ContinuousUnivariateDistribution - mu::T - sigma::T - function LogLogistic(mu,sigma) - T = promote_type(Float64, eltype.((mu,sigma))...) - return new{T}(T(mu), T(sigma)) + X::Logistic{T} + function LogLogistic(μ,σ) + X = Logistic(μ, σ) + return new{eltype(X)}(X) end end LogLogistic() = LogLogistic(1,1) -params(d::LogLogistic) = (d.mu,d.sigma) +params(d::LogLogistic) = (d.X.μ,d.X.θ) @distr_support LogLogistic 0.0 Inf function loghazard(d::LogLogistic, t::Real) - lt = log.(t) - lpdf0 = logpdf.(Logistic(d.mu, d.sigma), lt) .- lt - ls0 = logccdf.(Logistic(d.mu, d.sigma), lt) - return lpdf0 .- ls0 + lt = log(t) + lpdf0 = logpdf(Logistic(d.X.μ, d.X.θ), lt) + ls0 = logccdf(Logistic(d.X.μ, d.X.θ), lt) + return lpdf0 - ls0 - lt end function cumhazard(d::LogLogistic,t::Real) lt = log.(t) - return -logccdf.(Logistic(d.mu, d.sigma), lt) + return -logccdf.(Logistic(d.X.μ, d.X.θ), lt) end logpdf(d::LogLogistic, t::Real) = loghazard(d,t) - cumhazard(d,t) -cdf(d::LogLogistic, t::Real) = -expm1(-cumhazard(d,t)) \ No newline at end of file +cdf(d::LogLogistic, t::Real) = -expm1(-cumhazard(d,t)) +rand(rng::AbstractRNG, d::LogLogistic) = exp(rand(rng,d.X)) diff --git a/src/distros/PiecewiseConstantHazardDistribution.jl b/src/distros/PiecewiseConstantHazardDistribution.jl new file mode 100644 index 0000000..ea9cd1a --- /dev/null +++ b/src/distros/PiecewiseConstantHazardDistribution.jl @@ -0,0 +1,60 @@ +struct PiecewiseConstantHazardDistribution <: AbstractHazardDistribution + ∂t::Vector{Float64} + λ::Vector{Float64} +end +# the three folowing functions are actually enough i think to be able to sample eficiently for piecewise constant hazard distributions. +function hazard(D::PiecewiseConstantHazardDistribution, t::Real) + u = 0.0 + for i in 1:length(D.∂t) + u += D.∂t[i] + if t < u + return D.λ[i] + end + end + return D.λ[end] +end +function cumhazard(D::PiecewiseConstantHazardDistribution, t::Real) + Λ = 0.0 + u = 0.0 + for j in eachindex(D.∂t) + u += D.∂t[j] + if t > u + Λ += D.λ[j]*D.∂t[j] + else + Λ += D.λ[j]*(t-(u-D.∂t[j])) + return Λ + end + end + # We consider that the last box is in fact infinitely wide (exponential tail) + return Λ + (t-u)*L.λ[end] +end +function quantile(D::PiecewiseConstantHazardDistribution, p::Real) + Λ_target = -log(1-p) + Λ = 0.0 + u = 0.0 + for j in eachindex(D.∂t) + Λ += D.λ[j]*D.∂t[j] + u += D.∂t[j] + if Λ_target < Λ + u -= (Λ - Λ_target) / D.λ[j] + return u + end + end + return u +end +function expectation(L::PiecewiseConstantHazardDistribution) + S = 1.0 + E = 0.0 + for j in eachindex(D.∂t) + if D.λ[j] > 0 + S_inc = exp(-D.λ[j]*D.∂t[j]) + E += S * (1 - S_inc) / D.λ[j] + S *= S_inc + else + E += S * D.∂t[j] + end + end + # This reminder assumes a exponential life time afer the maximuum age. + R = ifelse(D.λ[end] == 0.0, 0.0, S / D.λ[end]) + return E + R +end diff --git a/src/distros/PowerGeneralizedWeibull.jl b/src/distros/PowerGeneralizedWeibull.jl index 808c27f..dc58323 100644 --- a/src/distros/PowerGeneralizedWeibull.jl +++ b/src/distros/PowerGeneralizedWeibull.jl @@ -1,18 +1,17 @@ """ - PowerGeneralizedWeibull(sigma,nu,gamma) + PowerGeneralizedWeibull(σ,ν,γ) -The *PowerGeneralizedWeibull distribution* with scale `sigma`, shape `nu` and second shape `gamma` has probability density function +The Power Generalised Weibull (PGW) distribution is a three-parameter distribution with support on ``{\\mathbb R}_+``. The corresponding hazard function can accommodate bathtub, unimodal and monotone (increasing and decreasing) hazard shapes. The PGW distribution has become popular in survival analysis given the tractability of its hazard and survival functions. + +The `PowerGeneralizedWeibull(σ,ν,γ)` distribution, with scale `σ`, shape `ν` (nu) and second shape `γ` has probability density function ```math -f(x; parameters) = ... +f(t;σ,ν,γ) = \\dfrac{ν}{γ σ^ν}t^{ν-1} \\left[ 1 + \\left(\\dfrac{t}{σ}\\right)^ν\\right]^{\\left(\\frac{1}{γ}-1\\right)} \\exp\\left\\{ 1- \\left[ 1 + \\left(\\dfrac{t}{σ}\\right)^ν\\right]^{\\frac{1}{γ}} +\\right\\}. ``` -More details and examples of usage could be provided in this docstring. - -Maybe this distribution could simply be constructed from a transformation of the original Weibull ? - References: -* [Link to my reference so that people understand what it is](https://myref.com) +* [nikulin:2009](@cite) Nikulin, M. and Haghighi, F. On the power generalized Weibull family: model for cancer censored data. Metron -- International Journal of Statistics, 2009 """ struct PowerGeneralizedWeibull{T<:Real} <: ContinuousUnivariateDistribution sigma::T diff --git a/test/runtests.jl b/test/runtests.jl index b9e874d..0dc31b6 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,3 +1,50 @@ using TestItemRunner @run_package_tests + +@testitem "trivial test" begin + @test true +end + +@testitem "Code quality (Aqua.jl)" begin + using Aqua + Aqua.test_all( + SurvivalDistributions; + ambiguities=false, + ) +end + +@testitem "Check Hazard Values for Exponential distribution" begin + using Distributions + λ, t = 10rand(), 3rand() + X = Exponential(1 / λ) + @test hazard(X, t) ≈ λ + @test cumhazard(X, t) ≈ t * λ +end + +@testitem "Check Hazard Values for LogNormal distribution" begin + using Distributions + + function hLogNormal(t, mu, sigma, logh::Bool=false) + lpdf0 = logpdf.(LogNormal(mu, sigma), t) + ls0 = logccdf.(LogNormal(mu, sigma), t) + val = lpdf0 .- ls0 + if logh + return val + else + return exp.(val) + end + end + + μ, σ, t = rand(), rand(), rand() + X = LogNormal(μ,σ) + + @test hLogNormal(t,μ,σ) ≈ hazard(X,t) +end + +@testitem "Check pwchd" begin + using Distributions, HypothesisTests + X = PiecewiseConstantHazardDistribution(rand(10),rand(10)) + x = rand(X,100000) + ApproximateOneSampleKSTest(x, X) +end \ No newline at end of file diff --git a/test/sampletest.jl b/test/sampletest.jl deleted file mode 100644 index 05bb729..0000000 --- a/test/sampletest.jl +++ /dev/null @@ -1,3 +0,0 @@ -@testitem "trivial test" begin - @test true -end \ No newline at end of file