diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4336c99..c4f501c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,29 @@
 # Unreleased
 
+# 0.9.0
+
+**Major, breaking API changes.**
+
+1. Wrapper types `Value` and `ValueGradient` are removed. Interface functions return real numbers (`logdensity`), or a real number and a vector (`logdensity_and_gradient`). The calling convention of these types is simplified.
+
+2. Capabilities of log density objects are described by traits, see `capabilities`.
+
+3. `dimension` is now distinct from `TransformVariables.dimension`, as it is not really the same thing.
+
+4. Condition-based wrappers removed, as it was interfering with AD (mostly Zygote).
+
+5. Documentation significantly improved.
+
+6. Code organized into a single file (except for conditional loads), tests greatly simplified.
+
+# 0.8.3
+
+- update to work with new Zygote interface
+
+# 0.8.2
+
+- no code change, just a version bump to fix registoy problems
+
 # 0.8.1
 
 - minor package and version fixes
diff --git a/Project.toml b/Project.toml
index cd7d4bb..39ac938 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "LogDensityProblems"
 uuid = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
 authors = ["Tamas K. Papp <tkpapp@gmail.com>"]
-version = "0.8.3"
+version = "0.9.0"
 
 [deps]
 ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197"
diff --git a/docs/Project.toml b/docs/Project.toml
index 7b09c00..64fad68 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -4,4 +4,4 @@ Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 
 [compat]
-Documenter = "~0.21"
+Documenter = "~0.23"
diff --git a/docs/make.jl b/docs/make.jl
index 7894c46..c8cf2f8 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -7,8 +7,7 @@ makedocs(
     clean = true,
     authors = "Tamás K. Papp",
     checkdocs = :export,
-    pages = Any["Home" => "index.md",
-                "Internals" => "internals.md"]
+    pages = Any["Documentation" => "index.md"]
 )
 
 deploydocs(
diff --git a/docs/src/index.md b/docs/src/index.md
index 4038be3..e82c53d 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -1,42 +1,183 @@
 # Introduction
 
-This package provides the following functionality:
+This package serves two purposes:
 
-1. It defines the [`logdensity`](@ref) method with corresponding interface, which can be used by other packages that operate on (log) densities and need to evaluate the log densities or the gradients (eg [MCMC](https://en.wikipedia.org/wiki/Markov_chain_Monte_Carlo), [MAP](https://en.wikipedia.org/wiki/Maximum_a_posteriori_estimation), [ML](https://en.wikipedia.org/wiki/Maximum_likelihood_estimation) or similar methods).
+1. Introduce a common API for packages that operate on *log densities*, which for these purposes are black box ``\mathbb{R}^n \to \mathbb{R}`` mappings. Using the interface of introduced in this package, you can query ``n``, evaluate the log density and optionally its gradient, and determine if a particular object supports these methods using traits. **This usage is relevant primarily for package developers** who write generic algorithms that use (log) densities that correspond to posteriors and likelihoods, eg [MCMC](https://en.wikipedia.org/wiki/Markov_chain_Monte_Carlo), [MAP](https://en.wikipedia.org/wiki/Maximum_a_posteriori_estimation), [ML](https://en.wikipedia.org/wiki/Maximum_likelihood_estimation). An example is [DynamicHMC.jl](https://github.com/tpapp/DynamicHMC.jl). This is documented in [the API section](@ref log-density-api).
 
-2. It defines the [`ADgradient`](@ref) which makes objects that support `logdensity` to calculate log density *values* calculate log density *gradients* using various automatic differentiation packages.
+2. Make it easier for **users who want to perform inference** using the above methods (and packages) to
+    - *define their own log densities*, either taking a vector of real numbers as input, or extracting and
+      transforming parameters using the [TransformVariables.jl](https://github.com/tpapp/TransformVariables.jl)
+      package,
+    - *obtain gradients* of these log densities using one of the supported *automatic differentiation* packages
+      of Julia.
+This is documented in the next section, with a worked example.
 
-3. It defines the wrapper [`TransformedLogDensity`](@ref) using the [TransformVariables.jl](https://github.com/tpapp/TransformVariables.jl) package, allowing callables that take a set of parameters transformed from a flat vector of real numbers to support the `logdensity` interface.
+# Working with log density problems
 
-4. Various utility functions for debugging and testing log densities.
+Consider an inference problem where IID draws are obtained from a normal distribution,
+```math
+x_i \sim N(\mu, \sigma)
+```
+for ``i = 1, \dots, N``. It can be shown that the *log likelihood* conditional on ``\mu`` and ``\sigma`` is
+```math
+\ell = -N\log \sigma - \sum_{i = 1}^N \frac{(x-\mu)^2}{2\sigma^2} =
+-N\left( \log \sigma + \frac{S + (\bar{x} - \mu)^2}{2\sigma^2} \right)
+```
+where we have dropped constant terms, and defined the sufficient statistics
+```math
+\bar{x} = \frac{1}{N} \sum_{i = 1}^N x_i
+```
+and
+```math
+S = \frac{1}{N} \sum_{i = 1}^N (x_i - \bar{x})^2
+```
+
+Finally, we use priors
+```math
+\mu \sim N(0, 5), \sigma \sim N(0, 2)
+```
+which yield the log prior
+```math
+-\sigma^2/8 - \mu^2/50
+```
+which is added to the log likelihood to obtain the log posterior.
+
+It is useful to define a *callable* that implements this, taking some vector `x` as an input and calculating the summary statistics, then, when called with a `NamedTuple` containing the parameters, evaluating to the log posterior.
+
+```@example 1
+using Random; Random.seed!(1) # hide
+using Statistics, Parameters # imported for our implementation
+
+struct NormalPosterior{T} # contains the summary statistics
+    N::Int
+    x̄::T
+    S::T
+end
+
+# calculate summary statistics from a data vector
+function NormalPosterior(x::AbstractVector)
+    NormalPosterior(length(x), mean(x), var(x; corrected = false))
+end
+
+# define a callable that unpacks parameters, and evaluates the log likelihood
+function (problem::NormalPosterior)(θ)
+    @unpack μ, σ = θ
+    @unpack N, x̄, S = problem
+    loglikelihood = -N * (log(σ) + (S + abs2(μ - x̄)) / (2 * abs2(σ)))
+    logprior = - abs2(σ)/8 - abs2(μ)/50
+    loglikelihood + logprior
+end
+
+problem = NormalPosterior(randn(100))
+nothing # hide
+```
+
+Let's try out the posterior calculation:
+
+```@repl 1
+problem((μ = 0.0, σ = 1.0))
+```
+
+!!! note
+    Just evaluating your log density function like above is a great way to test and benchmark your implementation. See the “Performance Tips” section of the Julia manual for optimization advice.
+
+## Using the TransformVariables package
+
+In our example, we require ``\sigma > 0``, otherwise the problem is meaningless. However, many MCMC samplers prefer to operate on *unconstrained* spaces ``\mathbb{R}^n``. The TransformVariables package was written to transform unconstrained to constrained spaces, and help with the log Jacobian correction (more on that later). That package has detailed documentation, now we just define a transformation from a length 2 vector to a `NamedTuple` with fields `μ` (unconstrained) and `σ > 0`.
+
+```@repl 1
+using LogDensityProblems, TransformVariables
+ℓ = TransformedLogDensity(as((μ = asℝ, σ = asℝ₊)), problem)
+```
+
+Then we can query the dimension of this problem, and evaluate the log density:
+```@repl 1
+LogDensityProblems.dimension(ℓ)
+LogDensityProblems.logdensity(ℓ, zeros(2))
+```
 
-## Inference
+!!! note
+    Before running time-consuming algorithms like MCMC, it is advisable to test and benchmark your log density evaluations separately. The same applies to [`LogDensityProblems.logdensity_and_gradient`](@ref).
 
 ```@docs
-logdensity
-dimension
-LogDensityProblems.Value
-LogDensityProblems.ValueGradient
-LogDensityProblems.ValueGradientBuffer
+TransformedLogDensity
+```
+
+## Manual unpacking and transformation
+
+If you prefer to implement the transformation yourself, you just have to define the following three methods for your problem: declare that it can evaluate log densities (but not their gradient, hence the `0` order), allow the dimension of the problem to be queried, and then finally code the density calculation with the transformation. (Note that using [`TransformedLogDensity`](@ref) takes care of all of these for you, as shown above).
+
+```@example 1
+function LogDensityProblems.capabilities(::Type{<:NormalPosterior})
+    LogDensityProblems.LogDensityOrder{0}()
+end
+
+LogDensityProblems.dimension(::NormalPosterior) = 2
+
+function LogDensityProblems.logdensity(problem::NormalPosterior, x)
+    μ, logσ = x
+    σ = exp(logσ)
+    problem((μ = μ, σ = σ)) + logσ
+end
+nothing # hide
+```
+
+```@repl 1
+LogDensityProblems.logdensity(problem, zeros(2))
 ```
 
-## Gradient via automatic differentiation
+Here we use the exponential function to transform from ``\mathbb{R}`` to the positive reals, but this requires that we correct the log density with the *logarithm* of the Jacobian, which here happens to be ``\log(\sigma)``.
 
+## Automatic differentiation
+
+Using either definition, you can now transform to another object which is capable of evaluating the *gradient*, using automatic differentiation. The wrapper for this is
 ```@docs
 ADgradient
 ```
+Note that support for Zygote is experimental. At the moment, I would recommend that you use `Flux`.
 
-## Transformed problem definition
+Now observe that we can obtain gradients, too:
+```@repl 1
+import ForwardDiff
+∇ℓ = ADgradient(:ForwardDiff, ℓ)
+LogDensityProblems.capabilities(∇ℓ)
+LogDensityProblems.logdensity_and_gradient(∇ℓ, zeros(2))
+```
 
-```@docs
-TransformedLogDensity
+## Manually calculated derivatives
+
+If you prefer not to use automatic differentiation, you can wrap your own derivatives following the template
+```julia
+function LogDensityProblems.capabilities(::Type{<:NormalPosterior})
+    LogDensityProblems.LogDensityOrder{1}() # can do gradient
+end
+
+LogDensityProblems.dimension(::NormalPosterior) = 2 # for this problem
+
+function LogDensityProblems.logdensity_and_gradient(problem::NormalPosterior, x)
+    logdens = ...
+    grad = ...
+    logdens, grad
+end
 ```
 
-## Benchmarking, diagnostics, and utilities
+# Various utilities
+
+You may find these utilities useful for debugging and optimization.
 
 ```@docs
 LogDensityProblems.stresstest
 LogDensityProblems.benchmark_ForwardDiff_chunks
-LogDensityProblems.@iffinite
-LogDensityRejectErrors
-```
\ No newline at end of file
+```
+
+# [Log densities API](@id log-density-api)
+
+Use the functions below for evaluating gradients and querying their dimension and other information. These symbols are not exported, as they are mostly used by package developers and in any case would need to be `import`ed or qualified to add methods to.
+
+```@docs
+LogDensityProblems.capabilities
+LogDensityProblems.LogDensityOrder
+LogDensityProblems.dimension
+LogDensityProblems.logdensity
+LogDensityProblems.logdensity_and_gradient
+```
diff --git a/docs/src/internals.md b/docs/src/internals.md
deleted file mode 100644
index b9634b5..0000000
--- a/docs/src/internals.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Internals
-
-```@docs
-LogDensityProblems.AbstractLogDensityProblem
-LogDensityProblems.LogDensityWrapper
-LogDensityProblems.ADGradientWrapper
-LogDensityProblems.heuristic_chunks
-LogDensityProblems.RejectLogDensity
-```
diff --git a/src/AD.jl b/src/AD.jl
deleted file mode 100644
index 10b3770..0000000
--- a/src/AD.jl
+++ /dev/null
@@ -1,62 +0,0 @@
-#####
-##### Wrappers for automatic differentiation.
-#####
-
-export ADgradient
-
-"""
-An abstract type that wraps another log density for calculating the gradient via AD.
-
-Automatically defines a `logdensity(Value, ...)` method, subtypes should define a
-`logdensity(ValueGradient, ...)` one.
-"""
-abstract type ADGradientWrapper <: LogDensityWrapper end
-
-function logdensity(::Type{Real}, fℓ::ADGradientWrapper, x::AbstractVector)
-    logdensity(Real, fℓ.ℓ, x)
-end
-
-"""
-$(SIGNATURES)
-
-Return a closure that evaluetes the log density. Call this function to ensure stable tags
-for `ForwardDiff`.
-"""
-_logdensity_closure(ℓ) = x -> logdensity(Real, ℓ, x)
-
-"""
-$(SIGNATURES)
-
-Wrap `P` using automatic differentiation to obtain a gradient.
-
-`kind` is usually a `Val` type, containing a symbol that refers to a package. The symbol can
-also be used directly as eg
-
-```julia
-ADgradient(:ForwardDiff, P)
-```
-
-See `methods(ADgradient)`. Note that some methods are defined conditionally on the relevant
-package being loaded.
-"""
-ADgradient(kind::Symbol, P; kwargs...) =
-    ADgradient(Val{kind}(), P; kwargs...)
-
-function ADgradient(v::Val{kind}, P; kwargs...) where kind
-    @info "Don't know how to AD with $(kind), consider `import $(kind)` if there is such a package."
-    throw(MethodError(ADgradient, (v, P)))
-end
-
-####
-#### wrappers - specific
-####
-
-function __init__()
-    @require ForwardDiff="f6369f11-7733-5829-9624-2563aa707210" include("AD_ForwardDiff.jl")
-    @require Flux="587475ba-b771-5e3f-ad9e-33799f191a9c" include("AD_Flux.jl")
-    @require ReverseDiff="37e2e3b7-166d-5795-8a7a-e32c996b4267" include("AD_ReverseDiff.jl")
-    if VERSION ≥ v"1.1.0"
-        # workaround for https://github.com/FluxML/Zygote.jl/issues/104
-        @require Zygote="e88e6eb3-aa80-5325-afca-941959d7151f" include("AD_Zygote.jl")
-    end
-end
diff --git a/src/AD_Flux.jl b/src/AD_Flux.jl
index 3be523c..849f49b 100644
--- a/src/AD_Flux.jl
+++ b/src/AD_Flux.jl
@@ -1,3 +1,7 @@
+#####
+##### Gradient AD implementation using Flux
+#####
+
 import .Flux
 
 struct FluxGradientLogDensity{L} <: ADGradientWrapper
@@ -13,14 +17,9 @@ ADgradient(::Val{:Flux}, ℓ) = FluxGradientLogDensity(ℓ)
 
 Base.show(io::IO, ∇ℓ::FluxGradientLogDensity) = print(io, "Flux AD wrapper for ", ∇ℓ.ℓ)
 
-function logdensity(::Type{ValueGradient}, ∇ℓ::FluxGradientLogDensity, x::AbstractVector)
+function logdensity_and_gradient(∇ℓ::FluxGradientLogDensity, x::AbstractVector)
     @unpack ℓ = ∇ℓ
-    y, back = Flux.Tracker.forward(_logdensity_closure(ℓ), x)
+    y, back = Flux.Tracker.forward(x -> logdensity(ℓ, x), x)
     yval = Flux.Tracker.data(y)
-    ValueGradient(yval, isfinite(yval) ? first(Flux.Tracker.data.(back(1))) : similar(x))
-end
-
-function logdensity(::ValueGradientBuffer, ∇ℓ::FluxGradientLogDensity, x::AbstractVector)
-    # NOTE this implementation ignores the buffer
-    logdensity(ValueGradient, ∇ℓ, x)
+    yval, first(Flux.Tracker.data.(back(1)))
 end
diff --git a/src/AD_ForwardDiff.jl b/src/AD_ForwardDiff.jl
index e718be3..fc012d7 100644
--- a/src/AD_ForwardDiff.jl
+++ b/src/AD_ForwardDiff.jl
@@ -1,7 +1,9 @@
-import .ForwardDiff
+#####
+##### Gradient AD implementation using ForwardDiff
+#####
 
-
-# AD using ForwardDiff
+import .ForwardDiff
+using BenchmarkTools: @belapsed
 
 struct ForwardDiffLogDensity{L, C} <: ADGradientWrapper
     ℓ::L
@@ -15,11 +17,16 @@ end
 
 _default_chunk(ℓ) = ForwardDiff.Chunk(dimension(ℓ))
 
-_default_gradientconfig(ℓ, chunk::ForwardDiff.Chunk) =
+# defined to make the tag match
+_logdensity_closure(ℓ) = x -> logdensity(ℓ, x)
+
+function _default_gradientconfig(ℓ, chunk::ForwardDiff.Chunk)
     ForwardDiff.GradientConfig(_logdensity_closure(ℓ), zeros(dimension(ℓ)), chunk)
+end
 
-_default_gradientconfig(ℓ, chunk::Integer) =
+function _default_gradientconfig(ℓ, chunk::Integer)
     _default_gradientconfig(ℓ, ForwardDiff.Chunk(chunk))
+end
 
 """
 $(SIGNATURES)
@@ -37,11 +44,11 @@ function ADgradient(::Val{:ForwardDiff}, ℓ;
     ForwardDiffLogDensity(ℓ, gradientconfig)
 end
 
-function logdensity(vgb::ValueGradientBuffer, fℓ::ForwardDiffLogDensity, x::AbstractVector)
+function logdensity_and_gradient(fℓ::ForwardDiffLogDensity, x::AbstractVector)
     @unpack ℓ, gradientconfig = fℓ
-    result = ForwardDiff.gradient!(DiffResults.MutableDiffResult(vgb),
-                                   _logdensity_closure(ℓ), x, gradientconfig)
-    ValueGradient(result)
+    buffer = _diffresults_buffer(ℓ, x)
+    result = ForwardDiff.gradient!(buffer, _logdensity_closure(ℓ), x, gradientconfig)
+    _diffresults_extract(result)
 end
 
 """
@@ -66,21 +73,21 @@ Benchmark a log density problem with various chunk sizes using ForwardDiff.
 `chunks`, which defaults to all possible chunk sizes, determines the chunks that are tried.
 
 The function returns `chunk => time` pairs, where `time` is the benchmarked runtime in
-seconds, as determined by `BenchmarkTools.@belapsed`.
+seconds, as determined by `BenchmarkTools.@belapsed`. The gradient is evaluated at `x`
+(defaults to zeros).
 
 *Runtime may be long* because of tuned benchmarks, so when `markprogress == true` (the
- default), dots are printed to mark progress.
+default), dots are printed to mark progress.
 
-This function is not exported, but part of the API.
+This function is not exported, but part of the API when `ForwardDiff` is imported.
 """
 function benchmark_ForwardDiff_chunks(ℓ;
                                       chunks = heuristic_chunks(dimension(ℓ), 20),
-                                      resulttype = ValueGradient,
-                                      markprogress = true)
+                                      markprogress = true,
+                                      x = zeros(dimension(ℓ)))
     map(chunks) do chunk
         ∇ℓ = ADgradient(Val(:ForwardDiff), ℓ; chunk = ForwardDiff.Chunk(chunk))
-        x = zeros(dimension(ℓ))
         markprogress && print(".")
-        chunk => @belapsed logdensity($(resulttype), $(∇ℓ), $(x))
+        chunk => @belapsed logdensity_and_gradient($(∇ℓ), $(x))
     end
 end
diff --git a/src/AD_ReverseDiff.jl b/src/AD_ReverseDiff.jl
index 18052c1..a83f1c4 100644
--- a/src/AD_ReverseDiff.jl
+++ b/src/AD_ReverseDiff.jl
@@ -1,19 +1,14 @@
-import .ReverseDiff, .DiffResults
+#####
+##### Gradient AD implementation using ForwardDiff
+#####
+
+import .ReverseDiff
 
 struct ReverseDiffLogDensity{L, C} <: ADGradientWrapper
     ℓ::L
     gradientconfig::C
 end
 
-Base.show(io::IO, ℓ::ReverseDiffLogDensity) = print(io, "ReverseDiff AD wrapper for ", ℓ.ℓ)
-
-function logdensity(vgb::ValueGradientBuffer, fℓ::ReverseDiffLogDensity, x::AbstractVector)
-    @unpack ℓ, gradientconfig = fℓ
-    result = ReverseDiff.gradient!(DiffResults.MutableDiffResult(vgb),
-                                   x -> logdensity(Real, ℓ, x), x, gradientconfig)
-    ValueGradient(result)
-end
-
 """
 $(SIGNATURES)
 
@@ -24,3 +19,12 @@ function ADgradient(::Val{:ReverseDiff}, ℓ)
     cfg = ReverseDiff.GradientConfig(z)
     ReverseDiffLogDensity(ℓ, cfg)
 end
+
+Base.show(io::IO, ℓ::ReverseDiffLogDensity) = print(io, "ReverseDiff AD wrapper for ", ℓ.ℓ)
+
+function logdensity_and_gradient(fℓ::ReverseDiffLogDensity, x::AbstractVector)
+    @unpack ℓ, gradientconfig = fℓ
+    buffer = _diffresults_buffer(ℓ, x)
+    result = ReverseDiff.gradient!(buffer, x -> logdensity(ℓ, x), x, gradientconfig)
+    _diffresults_extract(result)
+end
diff --git a/src/AD_Zygote.jl b/src/AD_Zygote.jl
index d5ed8fd..4798867 100644
--- a/src/AD_Zygote.jl
+++ b/src/AD_Zygote.jl
@@ -10,20 +10,14 @@ $(SIGNATURES)
 Gradient using algorithmic/automatic differentiation via Zygote.
 
 !!! NOTE
-    Experimental, use latest `Zygote#master` and `IRTools#master`.
+    Experimental, bug reports welcome.
 """
 ADgradient(::Val{:Zygote}, ℓ) = ZygoteGradientLogDensity(ℓ)
 
 Base.show(io::IO, ∇ℓ::ZygoteGradientLogDensity) = print(io, "Zygote AD wrapper for ", ∇ℓ.ℓ)
 
-function logdensity(::Type{ValueGradient}, ∇ℓ::ZygoteGradientLogDensity, x::AbstractVector)
+function logdensity_and_gradient(∇ℓ::ZygoteGradientLogDensity, x::AbstractVector)
     @unpack ℓ = ∇ℓ
     y, back = Zygote.forward(_logdensity_closure(ℓ), x)
-    gradient = isfinite(y) ? first(back(Zygote.sensitivity(y))) : zeros(typeof(y), dimension(ℓ))
-    ValueGradient(y, gradient)
-end
-
-function logdensity(::ValueGradientBuffer, ∇ℓ::ZygoteGradientLogDensity, x::AbstractVector)
-    # NOTE this implementation ignores the buffer
-    logdensity(ValueGradient, ∇ℓ, x)
+    y, first(back(Zygote.sensitivity(y)))
 end
diff --git a/src/DiffResults_helpers.jl b/src/DiffResults_helpers.jl
new file mode 100644
index 0000000..9ee4e1a
--- /dev/null
+++ b/src/DiffResults_helpers.jl
@@ -0,0 +1,29 @@
+#####
+##### Helper functions for working with DiffResults
+#####
+##### Only included when required by AD wrappers.
+
+import .DiffResults
+
+"""
+$(SIGNATURES)
+
+Allocate a DiffResults buffer for a gradient, taking the element type of `x` into account
+(heuristically).
+"""
+function _diffresults_buffer(ℓ, x)
+    T = eltype(x)
+    S = T <: Real ? float(Real) : Float64 # heuristic
+    DiffResults.MutableDiffResult(zero(S), (Vector{S}(undef, dimension(ℓ)), ))
+end
+
+"""
+$(SIGNATURES)
+
+Extract a return value for [`logdensity_and_gradient`](@ref) from a DiffResults buffer,
+constructed with [`diffresults_buffer`](@ref). Gradient is not copied as caller created the
+vector.
+"""
+function _diffresults_extract(diffresult::DiffResults.DiffResult)
+    DiffResults.value(diffresult)::Real, DiffResults.gradient(diffresult)
+end
diff --git a/src/LogDensityProblems.jl b/src/LogDensityProblems.jl
index 4f3a587..ba3f355 100644
--- a/src/LogDensityProblems.jl
+++ b/src/LogDensityProblems.jl
@@ -1,21 +1,254 @@
+"""
+A unified interface for log density problems, for
+
+1. defining mappings to a log density (eg Bayesian for inference),
+
+2. optionally obtaining a gradient using automatic differentiation,
+
+3. defining a common interface for working with such log densities and gradients (eg MCMC).
+
+These use cases utilize different parts of this package, make sure you read the
+documentation.
+"""
 module LogDensityProblems
 
+export TransformedLogDensity, ADgradient
+
 using ArgCheck: @argcheck
-using BenchmarkTools: @belapsed
 using DocStringExtensions: SIGNATURES, TYPEDEF
-import DiffResults
 using Parameters: @unpack
-using Random: AbstractRNG, GLOBAL_RNG
+import Random
 using Requires: @require
+import TransformVariables
+
+####
+#### interface for problems
+####
+
+"""
+$(TYPEDEF)
+
+A trait that means that a log density supports evaluating derivatives up to order `K`.
+
+Typical values for `K` are `0` (just the log density) and `1` (log density and gradient).
+"""
+struct LogDensityOrder{K}
+    function LogDensityOrder{K}() where K
+        _K = Int(K)
+        @argcheck _K ≥ 0
+        new{_K}()
+    end
+end
+
+LogDensityOrder(K::Integer) = LogDensityOrder{K}()
+
+Base.isless(::LogDensityOrder{A}, ::LogDensityOrder{B}) where {A, B} = A ≤ B
+
+"""
+$(SIGNATURES)
+
+Test if the type (or a value, for convenience) supports the log density interface.
+
+When `nothing` is returned, it doesn't support this interface.  When `LogDensityOrder{K}()`
+is returned (typically with `K == 0` or `K = 1`), derivatives up to order `K` are supported.
+*All other return values are invalid*.
+
+# Interface description
+
+The following methods need to be implemented for the interface:
+
+1. [`dimension`](@ref) returns the *dimension* of the domain,
+
+2. [`logdensity`](@ref) evaluates the log density at a given point.
+
+3. [`logdensity_and_gradient`](@ref) when `K ≥ 1`.
+
+See also [`LogDensityProblems.stresstest`](@ref) for stress testing.
+"""
+capabilities(T::Type) = nothing
+
+capabilities(x) = capabilities(typeof(x)) # convenience function
+
+"""
+    dimension(ℓ)
+
+Dimension of the input vectors `x` for log density `ℓ`. See [`logdensity`](@ref),
+[`logdensity_and_gradient`](@ref).
+
+!!! note
+    This function is *distinct* from `TransformedVariables.dimension`.
+"""
+function dimension end
+
+"""
+    logdensity(ℓ, x)
+
+Evaluate the log density `ℓ` at `x`, which has length compatible with its
+[`dimension`](@ref).
+
+Return a real number, which may or may not be finite (can also be `NaN`). Non-finite values
+other than `-Inf` are invalid but do not error, caller should deal with these appropriately.
+"""
+function logdensity end
+
+"""
+    logdensity_and_gradient(ℓ, x)
+
+Evaluate the log density `ℓ` and its gradient at `x`, which has length compatible with its
+[`dimension`](@ref).
+
+Return two values:
+
+- the log density as real number, which equivalent to `logdensity(ℓ, x)`
+
+- *if* the log density is finite, the gradient, a vector of real numbers, otherwise this
+   value is arbitrary and should be ignored.
+"""
+function logdensity_and_gradient end
+
+#####
+##### Transformed log density (typically Bayesian inference)
+#####
+
+"""
+    TransformedLogDensity(transformation, log_density_function)
+
+A problem in Bayesian inference. Vectors of length compatible with the dimension (obtained
+from `transformation`) are transformed into a general object `θ` (unrestricted type, but a
+named tuple is recommended for clean code), correcting for the log Jacobian determinant of
+the transformation.
+
+`log_density_function(θ)` is expected to return *real numbers*. For zero densities or
+infeasible `θ`s, `-Inf` or similar should be returned, but for efficiency of inference most
+methods recommend using `transformation` to avoid this. It is recommended that
+`log_density_function` is a callable object that also encapsulates the data for the problem.
+
+Use the property accessors `ℓ.transformation` and `ℓ.log_density_function` to access the
+arguments of `ℓ::TransformedLogDensity`, these are part of the API.
+
+# Usage note
+
+This is the most convenient way to define log densities, as `capabilities`, `logdensity`,
+and `dimension` are automatically defined. To obtain a type that supports derivatives, use
+[`ADgradient`](@ref).
+"""
+struct TransformedLogDensity{T <: TransformVariables.AbstractTransform, L}
+    transformation::T
+    log_density_function::L
+end
+
+function Base.show(io::IO, ℓ::TransformedLogDensity)
+    print(io, "TransformedLogDensity of dimension $(dimension(ℓ))")
+end
+
+capabilities(::Type{<:TransformedLogDensity}) = LogDensityOrder{0}()
+
+dimension(p::TransformedLogDensity) = TransformVariables.dimension(p.transformation)
+
+function logdensity(p::TransformedLogDensity, x::AbstractVector)
+    @unpack transformation, log_density_function = p
+    TransformVariables.transform_logdensity(transformation, log_density_function, x)
+end
+
+#####
+##### AD wrappers --- interface and generic code
+#####
+
+"""
+An abstract type that wraps another log density for calculating the gradient via AD.
+
+Automatically defines the methods `capabilities`, `dimension`, and `logdensity` forwarding
+to the field `ℓ`, subtypes should define a [`logdensity_and_gradientent`](@ref).
+
+This is an implementation helper, not part of the API.
+"""
+abstract type ADGradientWrapper end
+
+logdensity(ℓ::ADGradientWrapper, x::AbstractVector) = logdensity(ℓ.ℓ, x)
+
+capabilities(::Type{<:ADGradientWrapper}) = LogDensityOrder{1}()
+
+dimension(ℓ::ADGradientWrapper) = dimension(ℓ.ℓ)
+
+Base.parent(ℓ::ADGradientWrapper) = ℓ.ℓ
+
+"""
+$(SIGNATURES)
+
+Wrap `P` using automatic differentiation to obtain a gradient.
+
+`kind` is usually a `Val` type with a symbol that refers to a package, for example
+```julia
+ADgradient(Val(:ForwardDiff), P)
+```
+
+The symbol can also be used directly as eg
+
+```julia
+ADgradient(:ForwardDiff, P)
+```
+
+and should mostly be equivalent if the compiler manages to fold the constant.
+
+`parent` can be used to retrieve the original argument.
+
+See `methods(ADgradient)`. Note that **some methods are defined conditionally on the
+relevant package being loaded.**
+"""
+ADgradient(kind::Symbol, P; kwargs...) = ADgradient(Val{kind}(), P; kwargs...)
+
+function ADgradient(v::Val{kind}, P; kwargs...) where kind
+    @info "Don't know how to AD with $(kind), consider `import $(kind)` if there is such a package."
+    throw(MethodError(ADgradient, (v, P)))
+end
+
+####
+#### AD wrappers - specific
+####
+
+function __init__()
+    @require DiffResults="163ba53b-c6d8-5494-b064-1a9d43ac40c5" include("DiffResults_helpers.jl")
+    @require ForwardDiff="f6369f11-7733-5829-9624-2563aa707210" include("AD_ForwardDiff.jl")
+    @require Flux="587475ba-b771-5e3f-ad9e-33799f191a9c" include("AD_Flux.jl")
+    @require ReverseDiff="37e2e3b7-166d-5795-8a7a-e32c996b4267" include("AD_ReverseDiff.jl")
+    if VERSION ≥ v"1.1.0"
+        # workaround for https://github.com/FluxML/Zygote.jl/issues/104
+        @require Zygote="e88e6eb3-aa80-5325-afca-941959d7151f" include("AD_Zygote.jl")
+    end
+end
+
+####
+#### stress testing
+####
+
+"""
+$(SIGNATURES)
+
+Test `ℓ` with random values.
+
+`N` random vectors are drawn from a standard multivariate Cauchy distribution, scaled with
+`scale` (which can be a scalar or a conformable vector).
+
+Each random vector is then used as an argument in `f(ℓ, ...)`. `logdensity` and
+`logdensity_and_gradient` are  recommended for `f`.
 
-using TransformVariables: AbstractTransform, transform_logdensity, TransformVariables,
-    dimension, random_reals, random_arg
+In case the call produces an error, the value is recorded as a failure, which are returned
+by the function.
 
-include("result_types.jl")
-include("problem.jl")
-include("transformed.jl")
-include("reject_errors.jl")
-include("AD.jl")
-include("stresstest.jl")
+Not exported, but part of the API.
+"""
+function stresstest(f, ℓ; N = 1000, rng::Random.AbstractRNG = Random.GLOBAL_RNG, scale = 1)
+    failures = Vector{Float64}[]
+    d = dimension(ℓ)
+    for _ in 1:N
+        x = TransformVariables.random_reals(d; scale = scale, cauchy = true, rng = rng)
+        try
+            f(ℓ, x)
+        catch e
+            push!(failures, x)
+        end
+    end
+    failures
+end
 
 end # module
diff --git a/src/problem.jl b/src/problem.jl
deleted file mode 100644
index 5a94df7..0000000
--- a/src/problem.jl
+++ /dev/null
@@ -1,65 +0,0 @@
-#####
-##### interface for problems
-#####
-
-export logdensity, dimension
-
-"""
-Abstract type for log density representations, which support the following
-interface for `ℓ::AbstractLogDensityProblem`:
-
-1. [`dimension`](@ref) returns the *dimension* of the domain of `ℓ`,
-
-2. [`logdensity`](@ref) evaluates the log density `ℓ` at a given point.
-
-See also [`LogDensityProblems.stresstest`](@ref) for stress testing.
-"""
-abstract type AbstractLogDensityProblem end
-
-"""
-    logdensity(resulttype, ℓ, x)
-
-Evaluate the [`AbstractLogDensityProblem`](@ref) `ℓ` at `x`, which has length compatible
-with its [`dimension`](@ref).
-
-The argument `resulttype` determines the type of the result:
-
-1. `Real` for an unchecked evaluation of the log density which should return a `::Real`
-number (that could be `NaN`, `Inf`, etc),
-
-1. [`Value`](@ref) for a checked log density, returning a `Value`,
-
-2. [`ValueGradient`](@ref) also calculates the gradient, returning a `ValueGradient`,
-
-3. [`ValueGradientBuffer`](@ref) calculates a `ValueGradient` *potentially* (but always
-consistently for argument types) using the provided buffer for the gradient. In this case,
-the element type of the array may determine the result element type.
-
-# Implementation note
-
-Most types should just define the methods for `Real` and `ValueGradientBuffer` (when
-applicable), as `Value` and `ValueGradient` fall back to these, respectively.
-"""
-logdensity(::Type{Value}, ℓ, x::AbstractVector) = Value(logdensity(Real, ℓ, x))
-
-function logdensity(::Type{ValueGradient}, ℓ, x::AbstractVector{T}) where T
-    S = (T <: Real && isconcretetype(T)) ? T : Float64
-    logdensity(ValueGradientBuffer(Vector{S}(undef, dimension(ℓ))), ℓ, x)
-end
-
-####
-#### wrappers — general
-####
-
-"""
-An abstract type that wraps another log density in its field `ℓ`.
-
-# Notes
-
-Implementation detail, *not exported*.
-"""
-abstract type LogDensityWrapper <: AbstractLogDensityProblem end
-
-Base.parent(w::LogDensityWrapper) = w.ℓ
-
-TransformVariables.dimension(w::LogDensityWrapper) = dimension(parent(w))
diff --git a/src/reject_errors.jl b/src/reject_errors.jl
deleted file mode 100644
index d126901..0000000
--- a/src/reject_errors.jl
+++ /dev/null
@@ -1,62 +0,0 @@
-#####
-##### Catching errors and treating them as a -∞ log density.
-#####
-
-export reject_logdensity, LogDensityRejectErrors
-
-struct LogDensityRejectErrors{E, L} <: LogDensityWrapper
-    ℓ::L
-end
-
-"""
-$(SIGNATURES)
-
-Wrap a logdensity `ℓ` so that errors `<: E` are caught and replaced with a ``-∞`` value.
-
-`E` defaults to `InvalidLogDensityExceptions`.
-
-# Note
-
-Use cautiously, as catching errors can mask errors in your code. The recommended use case is
-for catching quirks and corner cases of AD. See also [`stresstest`](@ref) as an alternative
-to using this wrapper.
-
-Also, some AD packages don't handle `try` - `catch` blocks, so it is advised to use this as
-the outermost wrapper.
-"""
-LogDensityRejectErrors{E}(ℓ::L) where {E,L} = LogDensityRejectErrors{E,L}(ℓ)
-
-LogDensityRejectErrors(ℓ) = LogDensityRejectErrors{InvalidLogDensityException}(ℓ)
-
-minus_inf_like(::Type{Real}, x) = convert(eltype(x), -Inf)
-
-minus_inf_like(::Type{Value}, x) = Value(minus_inf_like(Real, x))
-
-minus_inf_like(::Type{ValueGradient}, x) = ValueGradient(minus_inf_like(Real, x), similar(x))
-
-minus_inf_like(vgb::ValueGradientBuffer, x) = ValueGradient(minus_inf_like(Real, x), vgb.buffer)
-
-function _logdensity_reject_errors(kind, w::LogDensityRejectErrors{E}, x) where {E}
-    try
-        logdensity(kind, parent(w), x)
-    catch e
-        if e isa E
-            minus_inf_like(kind, x)
-        else
-            rethrow(e)
-        end
-    end
-end
-
-logdensity(::Type{Real}, w::LogDensityRejectErrors, x::AbstractVector) =
-    _logdensity_reject_errors(Real, w, x)
-
-logdensity(::Type{Value}, w::LogDensityRejectErrors, x::AbstractVector) =
-    _logdensity_reject_errors(Value, w, x)
-
-logdensity(::Type{ValueGradient}, w::LogDensityRejectErrors, x::AbstractVector) =
-    _logdensity_reject_errors(ValueGradient, w, x)
-
-function logdensity(vgb::ValueGradientBuffer, w::LogDensityRejectErrors, x::AbstractVector)
-    _logdensity_reject_errors(vgb, w, x)
-end
diff --git a/src/result_types.jl b/src/result_types.jl
deleted file mode 100644
index 1014e9a..0000000
--- a/src/result_types.jl
+++ /dev/null
@@ -1,145 +0,0 @@
-#####
-##### Types for containing the result of log density value and gradient evaluations.
-#####
-##### Also used to specify the kind of log density evaluation. Types are not exported, but
-##### part of the API.
-#####
-
-export InvalidLogDensityException
-
-"""
-$(TYPEDEF)
-
-Thrown when `Value` or `ValueGradient` is called with invalid arguments.
-"""
-struct InvalidLogDensityException{T} <: Exception
-    "Location information: 0 is the value, positive integers are the gradient."
-    index::Int
-    "The invalid value."
-    value::T
-end
-
-function Base.showerror(io::IO, ex::InvalidLogDensityException)
-    @unpack index, value = ex
-    print(io, "InvalidLogDensityException: ",
-          index == 0 ? "value" : "gradient[$(index)]",
-          " is $(value)")
-end
-
-struct Value{T <: Real}
-    value::T
-    function Value{T}(value::T) where {T <: Real}
-        @argcheck (isfinite(value) || value == -Inf) InvalidLogDensityException(0, value)
-        new{T}(value)
-    end
-end
-
-"""
-$(SIGNATURES)
-
-Holds the value of a logdensity at a given point.
-
-Constructor ensures that the value is either finite, or ``-∞``.
-
-All other values (eg `NaN` or `Inf` for the `value`) lead to an error.
-
-See also [`logdensity`](@ref).
-"""
-Value(value::T) where {T <: Real} = Value{T}(value)
-
-Base.eltype(::Type{Value{T}}) where T = T
-
-struct ValueGradient{T, V <: AbstractVector{T}}
-    value::T
-    gradient::V
-    function ValueGradient{T,V}(value::T, gradient::V
-                                ) where {T <: Real, V <: AbstractVector{T}}
-        if value ≠ -Inf
-            @argcheck isfinite(value) InvalidLogDensityException(0, value)
-            invalid = findfirst(!isfinite, gradient)
-            if invalid ≢ nothing
-                throw(InvalidLogDensityException(invalid, gradient[invalid]))
-            end
-        end
-        new{T,V}(value, gradient)
-    end
-end
-
-"""
-$(SIGNATURES)
-
-Holds the value and gradient of a logdensity at a given point.
-
-Constructor ensures that either
-
-1. both the value and the gradient are finite,
-
-2. the value is ``-∞`` (then gradient is not checked).
-
-All other values (eg `NaN` or `Inf` for the `value`) lead to an error.
-
-See also [`logdensity`](@ref).
-"""
-ValueGradient(value::T, gradient::V) where {T <: Real, V <: AbstractVector{T}} =
-    ValueGradient{T,V}(value, gradient)
-
-function ValueGradient(value::T1, gradient::AbstractVector{T2}) where {T1,T2}
-    T = promote_type(T1, T2)
-    ValueGradient(T(value), T ≡ T2 ? gradient : map(T, gradient))
-end
-
-Base.eltype(::Type{ValueGradient{T,V}}) where {T,V} = T
-
-Base.isfinite(v::Union{Value, ValueGradient}) = isfinite(v.value)
-
-Base.isinf(v::Union{Value, ValueGradient}) = isinf(v.value)
-
-"""
-$(TYPEDEF)
-
-A wrapper for a vector that indicates that the vector *may* be used for the gradient in a
-`ValueGradient`. Consequences are undefined if it is modified later, implicitly the caller
-guarantees that it will not be used for anything else while the gradient is retrieved.
-
-See [`logdensity`](@ref).
-"""
-struct ValueGradientBuffer{T <: Real, V <: AbstractVector{T}}
-    buffer::V
-    function ValueGradientBuffer(buffer::AbstractVector{T}) where T
-        @argcheck T <: Real && isconcretetype(T)
-        new{T,typeof(buffer)}(buffer)
-    end
-end
-
-###
-### conversion to DiffResult types — these are not part of the API
-###
-
-function DiffResults.MutableDiffResult(vgb::ValueGradientBuffer)
-    @unpack buffer = vgb
-    DiffResults.MutableDiffResult(first(buffer), (buffer, ))
-end
-
-function ValueGradient(result::DiffResults.DiffResult)
-    ValueGradient(DiffResults.value(result), DiffResults.gradient(result))
-end
-
-###
-### utilities
-###
-
-"""
-$(SIGNATURES)
-
-If `expr` evaluates to a non-finite value, `return` with that, otherwise evaluate to that
-value. Useful for returning early from non-finite likelihoods.
-
-Part of the API, but not exported.
-"""
-macro iffinite(expr)
-    quote
-        result = $(esc(expr))
-        isfinite(result) || return result
-        result
-    end
-end
diff --git a/src/stresstest.jl b/src/stresstest.jl
deleted file mode 100644
index e0f5ba7..0000000
--- a/src/stresstest.jl
+++ /dev/null
@@ -1,29 +0,0 @@
-####
-#### stress testing
-####
-
-TransformVariables.random_arg(ℓ::AbstractLogDensityProblem; kwargs...) = random_reals(dimension(ℓ); kwargs...)
-
-"""
-$(SIGNATURES)
-
-Test `ℓ` with random values.
-
-`N` random vectors are drawn from a standard multivariate Cauchy distribution, scaled with
-`scale` (which can be a scalar or a conformable vector). In case the call produces an error,
-the value is recorded as a failure, which are returned by the function.
-
-Not exported, but part of the API.
-"""
-function stresstest(ℓ; N = 1000, rng::AbstractRNG = GLOBAL_RNG, scale = 1, resulttype = Value)
-    failures = Vector{Float64}[]
-    for _ in 1:N
-        x = random_arg(ℓ; scale = scale, cauchy = true, rng = rng)
-        try
-            logdensity(resulttype, ℓ, x)
-        catch e
-            push!(failures, x)
-        end
-    end
-    failures
-end
diff --git a/src/transformed.jl b/src/transformed.jl
deleted file mode 100644
index 5b7d85f..0000000
--- a/src/transformed.jl
+++ /dev/null
@@ -1,43 +0,0 @@
-#####
-##### Transformed log density (typically Bayesian inference)
-#####
-
-export TransformedLogDensity
-
-"""
-    TransformedLogDensity(transformation, log_density_function)
-
-A problem in Bayesian inference. Vectors of length `dimension(transformation)` are
-transformed into a general object `θ` (unrestricted type, but a named tuple is recommended
-for clean code), correcting for the log Jacobian determinant of the transformation.
-
-It is recommended that `log_density_function` is a callable object that also encapsulates
-the data for the problem.
-
-`log_density_function(θ)` is expected to return *real numbers*. For zero densities or
-infeasible `θ`s, `-Inf` or similar should be returned, but for efficiency of inference most
-methods recommend using `transformation` to avoid this.
-
-Use the property accessors `ℓ.transformation` and `ℓ.log_density_function` to access the
-arguments of `ℓ::TransformedLogDensity`, these are part of the API.
-"""
-struct TransformedLogDensity{T <: AbstractTransform, L} <: AbstractLogDensityProblem
-    transformation::T
-    log_density_function::L
-end
-
-function Base.show(io::IO, ℓ::TransformedLogDensity)
-    print(io, "TransformedLogDensity of dimension $(dimension(ℓ.transformation))")
-end
-
-"""
-$(SIGNATURES)
-
-The dimension of the problem, ie the length of the vectors in its domain.
-"""
-TransformVariables.dimension(p::TransformedLogDensity) = dimension(p.transformation)
-
-function logdensity(::Type{Real}, p::TransformedLogDensity, x::AbstractVector)
-    @unpack transformation, log_density_function = p
-    transform_logdensity(transformation, log_density_function, x)
-end
diff --git a/test/runtests.jl b/test/runtests.jl
index 70e057f..c101622 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,315 +1,136 @@
 using LogDensityProblems, Test, Distributions, TransformVariables
-using LogDensityProblems: Value, ValueGradient, ValueGradientBuffer
+import LogDensityProblems: capabilities, dimension, logdensity
+using LogDensityProblems: logdensity_and_gradient, LogDensityOrder
 
-import ForwardDiff, Flux, ReverseDiff
+import ForwardDiff, Flux, ReverseDiff, TransformVariables, Random
 using Parameters: @unpack
-using TransformVariables
-import Random
 
 ####
 #### test setup and utilities
 ####
 
+###
+### reproducible randomness
+###
+
 Random.seed!(1)
 
+###
+### comparisons (for testing)
+###
+
 """
     a ≅ b
 
-Compare fields and types (strictly), for unit testing. For less strict comparison, use `≈`.
+Compare log denfields and types, for unit testing.
 """
-≅(::Any, ::Any) = false
-≅(a::Value{T}, b::Value{T}) where {T} = a.value == b.value
-≅(a::ValueGradient{T,V}, b::ValueGradient{T,V}) where {T,V} =
-    a.value == b.value && (a.value == -Inf || a.gradient == b.gradient)
-
-_wide_tol(a, b) = max(√eps(a.value), √eps(b.value))
-
-function Base.isapprox(a::Value, b::Value; atol = _wide_tol(a, b), rtol = atol)
-    isapprox(a.value, b.value; atol = atol, rtol = rtol)
+≅(::Any, ::Any, atol = 0) = false
+
+function ≅(a::Real, b::Real, atol = 0)
+    if isnan(a)
+        isnan(b)
+    elseif isinf(a)
+        a == b
+    else
+        abs(a - b) ≤ atol
+    end
 end
 
-function Base.isapprox(a::ValueGradient, b::ValueGradient; atol = _wide_tol(a, b), rtol = atol)
-    isapprox(a.value, b.value; atol = atol, rtol = rtol) &&
-        (a.value == -Inf || isapprox(a.gradient, b.gradient; atol = √eps(), rtol = atol))
+function ≅(a::Tuple{Real,Any}, b::Tuple{Real,Any}, atol = 0)
+    ≅(first(a), first(b), atol) || return false
+    !isfinite(first(a)) || isapprox(last(a), last(b); atol = atol, rtol = 0)
 end
 
-####
-#### result types
-####
-
-@testset "Value constructor" begin
-    @test eltype(Value(1.0)) ≡ Float64
-    @test_throws InvalidLogDensityException(0, Inf) Value(Inf)
-    @test_throws InvalidLogDensityException Value(NaN) # FIXME more specific test
-    @test isfinite(Value(1.0))
-    @test !isinf(Value(1.0))
-    @test !isfinite(Value(-Inf))
-    @test isinf(Value(-Inf))
-    @test_throws MethodError Value(:something)
+@testset "comparisons for unit testing" begin
+    @test 1 ≅ 1
+    @test !(1 ≅ 2)
+    @test Inf ≅ Inf
+    @test (1, [1, 2]) ≅ (1, [1, 2])
+    @test !((1, [1, 2]) ≅ (1, [1, 3]))
+    @test !((3, [1, 2]) ≅ (1, [1, 2]))
+    @test (-Inf, [1, 2]) ≅ (-Inf, [1, 2])
+    @test (-Inf, [1, 2]) ≅ (-Inf, [1, 3])
+    @test (-Inf, [1, 2]) ≅ (-Inf, nothing)
 end
 
-@testset "ValueGradient constructor" begin
-    @test eltype(ValueGradient(1.0, [2.0])) ≡ Float64
-    @test_throws InvalidLogDensityException(0, Inf) ValueGradient(Inf, [1.0])
-    @test_throws InvalidLogDensityException(1, Inf) ValueGradient(2.0, [Inf])
-    @test !isfinite(ValueGradient(-Inf, [12.0]))
-    @test isinf(ValueGradient(-Inf, [12.0]))
-    @test isfinite(ValueGradient(1.0, [12.0]))
-    @test !isinf(ValueGradient(1.0, [12.0]))
-    @test ValueGradient(1, [2.0]) ≅ ValueGradient(1.0, [2.0]) # conversion
-end
+###
+### simple log density for testing
+###
 
-@testset "error printing" begin
-    @test sprint(showerror, InvalidLogDensityException(0, NaN)) ==
-        "InvalidLogDensityException: value is NaN"
-    @test sprint(showerror, InvalidLogDensityException(1, Inf)) ==
-        "InvalidLogDensityException: gradient[1] is Inf"
+struct TestLogDensity{F}
+    ℓ::F
 end
+logdensity(ℓ::TestLogDensity, x) = ℓ.ℓ(x)
+dimension(::TestLogDensity) = 3
+test_logdensity1(x) = -2*abs2(x[1]) - 3*abs2(x[2]) - 5*abs2(x[3])
+test_logdensity(x) = any(x .< 0) ? -Inf : test_logdensity1(x)
+test_gradient(x) = x .* [-4, -6, -10]
+TestLogDensity() = TestLogDensity(test_logdensity) # default: -Inf for negative input
+Base.show(io::IO, ::TestLogDensity) = print(io, "TestLogDensity")
 
 ####
-#### transformed Bayesian problem
+#### traits
 ####
 
-@testset "transformed Bayesian problem" begin
-    t = as((y = asℝ₊, ))
-    d = LogNormal(1.0, 2.0)
-    logposterior = ((x, ), ) -> logpdf(d, x)
+@test capabilities("a fish") ≡ nothing
 
-    # a Bayesian problem
-    p = TransformedLogDensity(t, logposterior)
-    @test dimension(p) == 1
-    @test p.transformation ≡ t
+@testset "LogDensityOrder" begin
+    @test LogDensityOrder(1) == LogDensityOrder(1)
+    @test_throws ArgumentError LogDensityOrder(-1)
+    @test LogDensityOrder(2) ≥ LogDensityOrder(1)
+end
 
-    # gradient of a problem
-    ∇p = ADgradient(:ForwardDiff, p)
-    @test dimension(∇p) == 1
-    @test parent(∇p).transformation ≡ t
+####
+#### AD backends
+####
 
+@testset "AD via ForwardDiff" begin
+    ℓ = TestLogDensity()
+    ∇ℓ = ADgradient(:ForwardDiff, ℓ)
+    @test repr(∇ℓ) == "ForwardDiff AD wrapper for " * repr(ℓ) * ", w/ chunk size 3"
+    @test dimension(∇ℓ) == 3
+    @test capabilities(∇ℓ) ≡ LogDensityOrder(1)
     for _ in 1:100
-        x = random_arg(p)
-        θ, lj = transform_and_logjac(t, x)
-        px = logdensity(Value, p, x)
-        @test logpdf(d, θ.y) + lj ≈ (px::Value).value
-        @test (logdensity(Value, ∇p, x)::Value).value ≈ px.value
-        ∇px = logdensity(ValueGradient, ∇p, x)
-        @test (∇px::ValueGradient).value ≈ px.value
-        @test ∇px.gradient ≈ [ForwardDiff.derivative(x -> logpdf(d, exp(x)) + x, x[1])]
+        x = randn(3)
+        @test logdensity(∇ℓ, x) ≅ test_logdensity(x)
+        @test logdensity_and_gradient(∇ℓ, x) ≅ (test_logdensity(x), test_gradient(x))
     end
 end
 
-@testset "-∞ log densities" begin
-    t = as(Array, 2)
-    validx = x -> all(x .> 0)
-    p = TransformedLogDensity(t, x -> validx(x) ?  sum(abs2, x)/2 : -Inf)
-    ∇p = ADgradient(:ForwardDiff, p)
-
-    @test dimension(p) == dimension(∇p) == dimension(t)
-    @test p.transformation ≡ parent(∇p).transformation ≡ t
-
-    for _ in 1:100
-        x = random_arg(p)
-        px = logdensity(Value, ∇p, x)
-        ∇px = logdensity(ValueGradient, ∇p, x)
-        @test px isa Value
-        @test ∇px isa ValueGradient
-        @test px.value ≈ ∇px.value
-        if validx(x)
-            @test isfinite(px)
-            @test isfinite(∇px)
-            @test ∇px.value ≈ sum(abs2, x)/2
-            @test ∇px.gradient ≈ x
-        else
-            @test isinf(px)
-            @test isinf(∇px)
-        end
-    end
+@testset "chunk heuristics for ForwardDiff" begin
+    @test LogDensityProblems.heuristic_chunks(82) == vcat(1:4:81, [82])
 end
 
-@testset "benchmark ForwardDiff problems" begin
+@testset "benchmark ForwardDiff chunk size" begin
     ℓ = TransformedLogDensity(as(Array, 20), x -> -sum(abs2, x))
     b = LogDensityProblems.benchmark_ForwardDiff_chunks(ℓ)
     @test b isa Vector{Pair{Int,Float64}}
     @test length(b) ≤ 20
 end
 
-@testset "stresstest" begin
-    f = x -> all(x .< 0) ? NaN : -sum(abs2, x)
-    ℓ = TransformedLogDensity(as(Array, 2), f)
-    failures = LogDensityProblems.stresstest(ℓ; N = 1000)
-    @test 230 ≤ length(failures) ≤ 270
-    @test all(x -> all(x .< 0), failures)
-end
-
-@testset "show" begin
-    t = as(Array, 5)
-    p = TransformedLogDensity(t, x -> -sum(abs2, x))
-    @test repr(p) == "TransformedLogDensity of dimension 5"
-    @test repr(ADgradient(:ForwardDiff, p; chunk = 2)) ==
-        ("ForwardDiff AD wrapper for " * repr(p) * ", w/ chunk size 2")
-    @test repr(ADgradient(:ReverseDiff, p)) == ("ReverseDiff AD wrapper for " * repr(p))
-    @test repr(ADgradient(:Flux, p)) == ("Flux AD wrapper for " * repr(p))
-end
-
-@testset "@iffinite" begin
-    flag = [0]
-    f(x) = (y = LogDensityProblems.@iffinite x; flag[1] += 1; y)
-    @test f(NaN) ≡ NaN
-    @test flag == [0]
-    @test f(1) ≡ 1
-    @test flag == [1]
-end
-
-@testset "reject wrapper" begin
-
-    # function that throws various errors
-    function f(x)
-        y = first(x)
-        if y > 1
-            throw(DomainError(x))
-        elseif y > 0
-            throw(ArgumentError("bad"))
-        elseif y > -1
-            y
-        else
-            Inf
-        end
-    end
-    x_dom = [1.5]
-    x_arg = [0.5]
-    x_inf = [-1.5]
-    x_ok = [-0.5]
-
-    # test unwrapped (for consistency)
-    P = TransformedLogDensity(as(Array, 1), f)
-    ∇P = ADgradient(:ForwardDiff, P)
-    vgb = ValueGradientBuffer(randn(1))
-
-    # -∞, not a valid value
-    @test logdensity(Real, P, x_inf) == Inf
-    @test_throws InvalidLogDensityException(0, Inf) logdensity(Value, P, x_inf)
-    @test_throws InvalidLogDensityException logdensity(ValueGradient, ∇P, x_inf)
-    @test_throws InvalidLogDensityException logdensity(vgb, ∇P, x_inf)
-
-    # ArgumentError
-    @test_throws ArgumentError logdensity(Real, P, x_arg)
-    @test_throws ArgumentError logdensity(Value, P, x_arg)
-    @test_throws ArgumentError logdensity(ValueGradient, ∇P, x_arg)
-    @test_throws ArgumentError logdensity(vgb, ∇P, x_arg)
-
-    # DomainError
-    @test_throws DomainError logdensity(Real, P, x_dom)
-    @test_throws DomainError logdensity(Value, P, x_dom)
-    @test_throws DomainError logdensity(ValueGradient, ∇P, x_dom)
-    @test_throws DomainError logdensity(vgb, ∇P, x_dom)
-
-    # valid values
-    @test logdensity(Real, P, x_ok) == -0.5
-    @test logdensity(Value, P, x_ok) ≅ Value(-0.5)
-    @test logdensity(ValueGradient, ∇P, x_ok) ≅ ValueGradient(-0.5, [1.0])
-    @test logdensity(vgb, ∇P, x_ok) ≅ ValueGradient(-0.5, [1.0])
-
-    # test wrapped -- we catch domain and invalid log density errors
-    R = LogDensityRejectErrors{Union{DomainError,InvalidLogDensityException}}(∇P)
-
-    # InvalidLogDensityException and DomainError converted to -∞
-    @test logdensity(Real, R, x_inf) == Inf # no error
-    @test logdensity(Real, R, x_dom) == -Inf # converted
-    @test logdensity(Value, R, x_inf) ≅ logdensity(Value, R, x_dom) ≅ Value(-Inf)
-    @test logdensity(ValueGradient, R, x_inf) ≅ logdensity(ValueGradient, R, x_dom) ≅
-        ValueGradient(-Inf, x_inf)
-    @test logdensity(vgb, R, x_inf) ≅ logdensity(vgb, R, x_dom) ≅ ValueGradient(-Inf, x_inf)
-
-    # ArgumentError passes through
-    @test_throws ArgumentError logdensity(Real, R, x_arg)
-    @test_throws ArgumentError logdensity(Value, R, x_arg)
-    @test_throws ArgumentError logdensity(ValueGradient, R, x_arg)
-    @test_throws ArgumentError logdensity(vgb, R, x_arg)
-
-    # valid values pass through
-
-    @test logdensity(Real, R, x_ok) == -0.5
-    @test logdensity(Value, R, x_ok) ≅ Value(-0.5)
-    @test logdensity(ValueGradient, R, x_ok) ≅ ValueGradient(-0.5, [1.0])
-    @test logdensity(vgb, R, x_ok) ≅ ValueGradient(-0.5, [1.0])
-
-    # test constructor
-    @test LogDensityRejectErrors{InvalidLogDensityException}(∇P) ≡
-        LogDensityRejectErrors(∇P)
-end
-
-####
-#### various AD tests
-####
-
-struct TestLogDensity{F}
-    ℓ::F
-end
-TransformVariables.dimension(::TestLogDensity) = 3
-test_logdensity1(x) = -2*abs2(x[1]) - 3*abs2(x[2]) - 5*abs2(x[3])
-test_logdensity(x) = any(x .< 0) ? -Inf : test_logdensity1(x)
-test_gradient(x) = x .* [-4, -6, -10]
-TestLogDensity() = TestLogDensity(test_logdensity) # default: -Inf for negative input
-LogDensityProblems.logdensity(::Type{Real}, ℓ::TestLogDensity, x) = ℓ.ℓ(x)
-
-@testset "AD via ForwardDiff" begin
-    ∇ℓ = ADgradient(:ForwardDiff, TestLogDensity())
+@testset "AD via ReverseDiff" begin
+    ℓ = TestLogDensity()
+    ∇ℓ = ADgradient(:ReverseDiff, ℓ)
+    @test repr(∇ℓ) == "ReverseDiff AD wrapper for " * repr(ℓ)
     @test dimension(∇ℓ) == 3
-    buffer = randn(3)
-    vb = ValueGradientBuffer(buffer)
-    buffer32 = randn(Float32, 3) # test non-matching buffer type
-    vb32 = ValueGradientBuffer(buffer32)
+    @test capabilities(∇ℓ) ≡ LogDensityOrder(1)
     for _ in 1:100
         x = randn(3)
-        @test logdensity(Real, ∇ℓ, x) ≈ test_logdensity(x)
-        @test logdensity(Value, ∇ℓ, x) ≅ Value(test_logdensity(x))
-        vg = ValueGradient(test_logdensity(x), test_gradient(x))
-        @test logdensity(ValueGradient, ∇ℓ, x) ≅ vg
-        vg2 = logdensity(vb, ∇ℓ, x)
-        @test vg2.gradient ≡ buffer
-        @test vg2 ≅ vg
-        vg3 = logdensity(vb32, ∇ℓ, x)
-        @test vg3.gradient ≡ buffer32
-        @test vg3 ≈ vg
-        @test vg3 isa ValueGradient{Float32, Vector{Float32}}
+        @test logdensity(∇ℓ, x) ≅ test_logdensity(x)
+        @test logdensity_and_gradient(∇ℓ, x) ≅ (test_logdensity(x), test_gradient(x))
     end
 end
 
 @testset "AD via Flux" begin
-    ∇ℓ = ADgradient(:Flux, TestLogDensity())
-    @test dimension(∇ℓ) == 3
-    buffer = randn(3)
-    vb = ValueGradientBuffer(buffer)
-    for _ in 1:100
-        x = randn(3)
-        @test logdensity(Real, ∇ℓ, x) ≈ test_logdensity(x)
-        @test logdensity(Value, ∇ℓ, x) ≅ Value(test_logdensity(x))
-        vg = ValueGradient(test_logdensity(x), test_gradient(x))
-        @test logdensity(ValueGradient, ∇ℓ, x) ≅ vg
-        # NOTE don't test buffer ≡, as that is not implemented for Flux
-        @test logdensity(vb, ∇ℓ, x) ≅ vg
-    end
-end
-
-@testset "AD via ReverseDiff" begin
-    ∇ℓ = ADgradient(:ReverseDiff, TestLogDensity())
+    ℓ = TestLogDensity()
+    ∇ℓ = ADgradient(:Flux, ℓ)
+    @test repr(∇ℓ) == "Flux AD wrapper for " * repr(ℓ)
     @test dimension(∇ℓ) == 3
-    buffer = randn(3)
-    vb = ValueGradientBuffer(buffer)
-    buffer32 = randn(Float32, 3) # test non-matching buffer type
-    vb32 = ValueGradientBuffer(buffer32)
+    @test capabilities(∇ℓ) ≡ LogDensityOrder(1)
     for _ in 1:100
         x = randn(3)
-        @test logdensity(Real, ∇ℓ, x) ≈ test_logdensity(x)
-        @test logdensity(Value, ∇ℓ, x) ≅ Value(test_logdensity(x))
-        vg = ValueGradient(test_logdensity(x), test_gradient(x))
-        @test logdensity(ValueGradient, ∇ℓ, x) ≅ vg
-        vg2 = logdensity(vb, ∇ℓ, x)
-        @test vg2.gradient ≡ buffer
-        @test vg2 ≅ vg
-        vg3 = logdensity(vb32, ∇ℓ, x)
-        @test vg3.gradient ≡ buffer32
-        @test vg3 ≈ vg
-        @test vg3 isa ValueGradient{Float32, Vector{Float32}}
+        @test logdensity(∇ℓ, x) ≅ test_logdensity(x)
+        @test logdensity_and_gradient(∇ℓ, x) ≅ (test_logdensity(x), test_gradient(x))
     end
 end
 
@@ -321,18 +142,13 @@ if VERSION ≥ v"1.1.0"
         # cf https://github.com/FluxML/Zygote.jl/issues/271
         ℓ = TestLogDensity(test_logdensity1)
         ∇ℓ = ADgradient(:Zygote, ℓ)
-        @test repr(∇ℓ) == ("Zygote AD wrapper for " * repr(ℓ))
+        @test repr(∇ℓ) == "Zygote AD wrapper for " * repr(ℓ)
         @test dimension(∇ℓ) == 3
-        buffer = randn(3)
-        vb = ValueGradientBuffer(buffer)
+        @test capabilities(∇ℓ) ≡ LogDensityOrder(1)
         for _ in 1:100
             x = randn(3)
-            @test logdensity(Real, ∇ℓ, x) ≈ test_logdensity1(x)
-            @test logdensity(Value, ∇ℓ, x) ≅ Value(test_logdensity1(x))
-            vg = ValueGradient(test_logdensity1(x), test_gradient(x))
-            @test logdensity(ValueGradient, ∇ℓ, x) ≅ vg
-            # NOTE don't test buffer ≡, as that is not implemented for Zygote
-            @test logdensity(vb, ∇ℓ, x) ≅ vg
+            @test logdensity(∇ℓ, x) ≅ test_logdensity1(x)
+            @test logdensity_and_gradient(∇ℓ, x) ≅ (test_logdensity1(x), test_gradient(x))
         end
     end
 end
@@ -343,6 +159,67 @@ end
     @test_logs((:info, msg), @test_throws(MethodError, ADgradient(:Foo, P)))
 end
 
-@testset "chunk heuristics for ForwardDiff" begin
-    @test LogDensityProblems.heuristic_chunks(82) == vcat(1:4:81, [82])
+####
+#### transformed Bayesian problem
+####
+
+@testset "transformed Bayesian problem" begin
+    t = as((y = asℝ₊, ))
+    d = LogNormal(1.0, 2.0)
+    logposterior = ((x, ), ) -> logpdf(d, x)
+
+    # a Bayesian problem
+    p = TransformedLogDensity(t, logposterior)
+    @test repr(p) == "TransformedLogDensity of dimension 1"
+    @test dimension(p) == 1
+    @test p.transformation ≡ t
+    @test capabilities(p) == LogDensityOrder(0)
+
+    # gradient of a problem
+    ∇p = ADgradient(:ForwardDiff, p)
+    @test dimension(∇p) == 1
+    @test parent(∇p).transformation ≡ t
+
+    for _ in 1:100
+        x = random_arg(t)
+        θ, lj = transform_and_logjac(t, x)
+        px = logdensity(p, x)
+        @test logpdf(d, θ.y) + lj ≈ (px::Real)
+        px2, ∇px = logdensity_and_gradient(∇p, x)
+        @test px2 == px
+        @test ∇px ≈ [ForwardDiff.derivative(x -> logpdf(d, exp(x)) + x, x[1])]
+    end
+end
+
+@testset "-∞ log densities" begin
+    t = as(Array, 2)
+    validx = x -> all(x .> 0)
+    p = TransformedLogDensity(t, x -> validx(x) ?  sum(abs2, x)/2 : -Inf)
+    ∇p = ADgradient(:ForwardDiff, p)
+
+    @test dimension(p) == dimension(∇p) == TransformVariables.dimension(t)
+    @test p.transformation ≡ parent(∇p).transformation ≡ t
+
+    for _ in 1:100
+        x = random_arg(t)
+        px = logdensity(∇p, x)
+        px_∇px = logdensity_and_gradient(∇p, x)
+        @test px isa Real
+        @test px_∇px isa Tuple{Real,Any}
+        @test first(px_∇px) ≡ px
+        if validx(x)
+            @test px ≈ sum(abs2, x)/2
+            @test last(px_∇px) ≈ x
+        else
+            @test isinf(px)
+        end
+    end
+end
+
+@testset "stresstest" begin
+    @info "stress testing"
+    ℓ = TestLogDensity(x -> all(x .< 0) ? error("invalid") : -sum(abs2, x))
+    failures = LogDensityProblems.stresstest(logdensity, ℓ; N = 500)
+    @test 50 ≤ length(failures) ≤ 100
+    @test all(x -> all(x .< 0), failures)
 end