Skip to content

Commit

Permalink
fix: improve workinng, add MAP, make chains shorter
Browse files Browse the repository at this point in the history
  • Loading branch information
cako committed Nov 19, 2024
1 parent 80bf466 commit 956e74e
Showing 1 changed file with 60 additions and 48 deletions.
108 changes: 60 additions & 48 deletions examples/plot_bayeslinearregr.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,26 +69,35 @@
# :math:`\mathbf{y} = \mathbf{A} \mathbf{x}` in a least-squares sense.
# Using PyLops, the ``/`` operator solves the iteratively (i.e.,
# :py:func:`scipy.sparse.linalg.lsqr`).
xnest = LRop / yn
noise_est = np.sqrt(np.sum((yn - LRop @ xnest) ** 2) / (N - 1))
# In Bayesian terminology, this estimator is known as the maximulum likelihood
# estimation (MLE).
x_mle = LRop / yn
noise_mle = np.sqrt(np.sum((yn - LRop @ x_mle) ** 2) / (N - 1))

###############################################################################
# Alternatively, we may regularize the problem. In this case we will condition
# the solution towards smaller magnitude parameters, we can use a regularized
# least squares approach. Since the weight is pretty small, we expect the
# result to be very similar to the one above.
sigma_prior = 20
eps = 1 / np.sqrt(2) / sigma_prior
x_map, *_ = pylops.optimization.basic.lsqr(LRop, yn, damp=eps)
noise_map = np.sqrt(np.sum((yn - LRop @ x_map) ** 2) / (N - 1))

###############################################################################
# Let's plot the best fitting line for the case of noise free and noisy data
fig, ax = plt.subplots(figsize=(8, 4))
ax.plot(
np.array([t.min(), t.max()]),
np.array([t.min(), t.max()]) * x[1] + x[0],
"k",
lw=4,
label=rf"True: $x_0$ = {x[0]:.2f}, $x_1$ = {x[1]:.2f}",
)
ax.plot(
np.array([t.min(), t.max()]),
np.array([t.min(), t.max()]) * xnest[1] + xnest[0],
"--g",
lw=4,
label=rf"MAP Estimated: $x_0$ = {xnest[0]:.2f}, $x_1$ = {xnest[1]:.2f}",
)
for est, est_label, c in zip(
[x, x_mle, x_map], ["True", "MLE", "MAP"], ["k", "C0", "C1"]
):
ax.plot(
np.array([t.min(), t.max()]),
np.array([t.min(), t.max()]) * est[1] + est[0],
color=c,
ls="--" if est_label == "MAP" else "-",
lw=4,
label=rf"{est_label}: $x_0$ = {est[0]:.2f}, $x_1$ = {est[1]:.2f}",
)
ax.scatter(t, y, c="r", s=70)
ax.scatter(t, yn, c="g", s=70)
ax.legend()
Expand All @@ -105,17 +114,23 @@
#
# To do so, we need to define the priors and the likelihood.
#
# Priors in Bayesian analysis can be interpreted as the probabilistic
# equivalent to regularization. Finding the maximum a posteriori (MAP) estimate
# to a least-squares problem with a Gaussian prior on the parameters is
# is equivalent to applying a Tikhonov (L2) regularization to these parameters.
# A Laplace prior is equivalent to a sparse (L1) regularization. In addition,
# the weight of the regularization is controlled by the "scale" of the
# distribution of the prior; the standard deviation (in the case of a Gaussian)
# is inversely proportional strength of the regularization.
# As hinted above, priors in Bayesian analysis can be interpreted as the
# probabilistic equivalent to regularization. Finding the maximum a posteriori
# (MAP) estimate to a least-squares problem with a Gaussian prior on the
# parameters is equivalent to applying a Tikhonov (L2) regularization to these
# parameters. A Laplace prior is equivalent to a sparse (L1) regularization.
# In addition, the weight of the regularization is controlled by the "scale" of
# the distribution of the prior; the standard deviation (in the case of a Gaussian)
# is inversely proportional strength of the regularization. So if we use the same
# sigma_prior above as the standard deviation of our prior distribition, we
# should get the same MAP out of them. In practice, in Bayesian analysis we are
# not only interested in point estimates like MAP, but rather, the whole
# posterior distribution. If you want the MAP only, there are better,
# methods to obtain them, such as the one shown above.
#
# In this problem we will use weak, not very informative priors, by settings
# their "weights" to be small (high scale parameters):
# In this problem we will use weak, not very informative priors, by setting
# their prior to accept a wide range of probable values. This is equivalent to
# setting the "weights" to be small, as shown above:
#
# .. math::
# x_0 \sim N(0, 20)
Expand All @@ -142,14 +157,14 @@

# Define priors
sp = pm.HalfCauchy("σ", beta=10)
xp = pm.Normal("x", 0, sigma=20, shape=dims)
xp = pm.Normal("x", 0, sigma=sigma_prior, shape=dims)
mu = pm.Deterministic("mu", pytensor_lrop(xp))

# Define likelihood
likelihood = pm.Normal("y", mu=mu, sigma=sp, observed=y_data)

# Inference!
idata = pm.sample(2000, tune=1000, chains=2)
idata = pm.sample(500, tune=200, chains=2)

###############################################################################
# The plot below is known as the "trace" plot. The left column displays the
Expand All @@ -166,17 +181,17 @@

axes = az.plot_trace(idata, figsize=(10, 7), var_names=["~mu"])
axes[0, 0].axvline(x[0], label="True Intercept", lw=2, color="k")
axes[0, 0].axvline(xnest[0], label="Intercept MAP", lw=2, color="C0", ls="--")
axes[0, 0].axvline(x_map[0], label="Intercept MAP", lw=2, color="C0", ls="--")
axes[0, 0].axvline(x[1], label="True Slope", lw=2, color="darkgray")
axes[0, 0].axvline(xnest[1], label="Slope MAP", lw=2, color="C1", ls="--")
axes[0, 0].axvline(x_map[1], label="Slope MAP", lw=2, color="C1", ls="--")
axes[0, 1].axhline(x[0], label="True Intercept", lw=2, color="k")
axes[0, 1].axhline(xnest[0], label="Intercept MAP", lw=2, color="C0", ls="--")
axes[0, 1].axhline(x_map[0], label="Intercept MAP", lw=2, color="C0", ls="--")
axes[0, 1].axhline(x[1], label="True Slope", lw=2, color="darkgray")
axes[0, 1].axhline(xnest[1], label="Slope MAP", lw=2, color="C1", ls="--")
axes[0, 1].axhline(x_map[1], label="Slope MAP", lw=2, color="C1", ls="--")
axes[1, 0].axvline(sigma, label="True Sigma", lw=2, color="k")
axes[1, 0].axvline(noise_est, label="Sigma MAP", lw=2, color="C0", ls="--")
axes[1, 0].axvline(noise_map, label="Sigma MAP", lw=2, color="C0", ls="--")
axes[1, 1].axhline(sigma, label="True Sigma", lw=2, color="k")
axes[1, 1].axhline(noise_est, label="Sigma MAP", lw=2, color="C0", ls="--")
axes[1, 1].axhline(noise_map, label="Sigma MAP", lw=2, color="C0", ls="--")
for ax in axes.ravel():
ax.legend()
ax.get_figure().tight_layout()
Expand All @@ -203,20 +218,17 @@
hdi_prob=0.95,
ax=ax,
)
ax.plot(
np.array([t.min(), t.max()]),
np.array([t.min(), t.max()]) * x[1] + x[0],
"k",
lw=4,
label=rf"True: $x_0$ = {x[0]:.2f}, $x_1$ = {x[1]:.2f}",
)
ax.plot(
np.array([t.min(), t.max()]),
np.array([t.min(), t.max()]) * xnest[1] + xnest[0],
"--g",
lw=4,
label=rf"MAP Estimated: $x_0$ = {xnest[0]:.2f}, $x_1$ = {xnest[1]:.2f}",
)
for est, est_label, c in zip(
[x, x_mle, x_map], ["True", "MLE", "MAP"], ["k", "C0", "C1"]
):
ax.plot(
np.array([t.min(), t.max()]),
np.array([t.min(), t.max()]) * est[1] + est[0],
color=c,
ls="--" if est_label == "MAP" else "-",
lw=4,
label=rf"{est_label}: $x_0$ = {est[0]:.2f}, $x_1$ = {est[1]:.2f}",
)
ax.scatter(t, y, c="r", s=70)
ax.scatter(t, yn, c="g", s=70)
ax.legend()
Expand Down

0 comments on commit 956e74e

Please sign in to comment.