From 35079bb5da6885997b89e275e8fd256ced4cbb25 Mon Sep 17 00:00:00 2001 From: Ricardo Vieira Date: Fri, 13 Dec 2024 11:22:57 +0100 Subject: [PATCH] Bump PyMC dependency --- environment.yml | 2 +- pymc_marketing/clv/distributions.py | 119 +++++---------------------- pymc_marketing/mlflow.py | 18 +--- pyproject.toml | 2 +- tests/clv/models/test_gamma_gamma.py | 2 +- 5 files changed, 26 insertions(+), 117 deletions(-) diff --git a/environment.yml b/environment.yml index 8dcdfd43b..10a5b2615 100644 --- a/environment.yml +++ b/environment.yml @@ -15,7 +15,7 @@ dependencies: - pydantic - preliz # NOTE: Keep minimum pymc version in sync with ci.yml `OLDEST_PYMC_VERSION` -- pymc>=5.12.0,<5.16.0 +- pymc>=5.20.0 - scikit-learn>=1.1.1 - seaborn>=0.12.2 - xarray diff --git a/pymc_marketing/clv/distributions.py b/pymc_marketing/clv/distributions.py index 0f23ad4a4..656c58f4a 100644 --- a/pymc_marketing/clv/distributions.py +++ b/pymc_marketing/clv/distributions.py @@ -13,8 +13,9 @@ # limitations under the License. """Distributions for the CLV module.""" +from functools import reduce + import numpy as np -import pymc as pm import pytensor.tensor as pt from pymc.distributions.continuous import PositiveContinuous from pymc.distributions.dist_math import betaln, check_parameters @@ -28,26 +29,16 @@ class ContNonContractRV(RandomVariable): name = "continuous_non_contractual" - ndim_supp = 1 - ndims_params = [0, 0, 0, 0] + signature = "(),(),()->(2)" dtype = "floatX" _print_name = ("ContNonContract", "\\operatorname{ContNonContract}") - def make_node(self, rng, size, dtype, lam, p, T): - T = pt.as_tensor_variable(T) - - return super().make_node(rng, size, dtype, lam, p, T) + def __call__(self, lam, p, T, size=None, **kwargs): + return super().__call__(lam, p, T, size=size, **kwargs) @classmethod def rng_fn(cls, rng, lam, p, T, size): - size = pm.distributions.shape_utils.to_tuple(size) - - # TODO: broadcast sizes - lam = np.asarray(lam) - p = np.asarray(p) - T = np.asarray(T) - - if size == (): + if size is None: size = np.broadcast_shapes(lam.shape, p.shape, T.shape) lam = np.broadcast_to(lam, size) @@ -74,9 +65,6 @@ def rng_fn(cls, rng, lam, p, T, size): return np.stack([t_x, x], axis=-1) - def _supp_shape_from_params(*args, **kwargs): - return (2,) - continuous_non_contractual = ContNonContractRV() @@ -129,13 +117,14 @@ def logp(value, lam, p, T): ) logp = pt.switch( - pt.any( - ( + reduce( + pt.bitwise_or, + [ pt.and_(pt.ge(t_x, 0), zero_observations), pt.lt(t_x, 0), pt.lt(x, 0), pt.gt(t_x, T), - ), + ], ), -np.inf, logp, @@ -152,29 +141,16 @@ def logp(value, lam, p, T): class ContContractRV(RandomVariable): name = "continuous_contractual" - ndim_supp = 1 - ndims_params = [0, 0, 0, 0] + signature = "(),(),()->(3)" dtype = "floatX" _print_name = ("ContinuousContractual", "\\operatorname{ContinuousContractual}") - def make_node(self, rng, size, dtype, lam, p, T): - T = pt.as_tensor_variable(T) - - return super().make_node(rng, size, dtype, lam, p, T) - def __call__(self, lam, p, T, size=None, **kwargs): return super().__call__(lam, p, T, size=size, **kwargs) @classmethod def rng_fn(cls, rng, lam, p, T, size): - size = pm.distributions.shape_utils.to_tuple(size) - - # To do: broadcast sizes - lam = np.asarray(lam) - p = np.asarray(p) - T = np.asarray(T) - - if size == (): + if size is None: size = np.broadcast_shapes(lam.shape, p.shape, T.shape) lam = np.broadcast_to(lam, size) @@ -254,24 +230,15 @@ def logp(value, lam, p, T): ) logp = pt.switch( - pt.any(pt.or_(pt.lt(t_x, 0), zero_observations)), - -np.inf, - logp, - ) - logp = pt.switch( - pt.all( - pt.or_(pt.eq(churn, 0), pt.eq(churn, 1)), - ), - logp, - -np.inf, - ) - logp = pt.switch( - pt.any( - ( + reduce( + pt.bitwise_or, + [ + zero_observations, pt.lt(t_x, 0), pt.lt(x, 0), pt.gt(t_x, T), - ), + pt.bitwise_not(pt.bitwise_or(pt.eq(churn, 0), pt.eq(churn, 1))), + ], ), -np.inf, logp, @@ -289,34 +256,16 @@ def logp(value, lam, p, T): class ParetoNBDRV(RandomVariable): name = "pareto_nbd" - ndim_supp = 1 - ndims_params = [0, 0, 0, 0, 0] + signature = "(),(),(),(),()->(2)" dtype = "floatX" _print_name = ("ParetoNBD", "\\operatorname{ParetoNBD}") - def make_node(self, rng, size, dtype, r, alpha, s, beta, T): - r = pt.as_tensor_variable(r) - alpha = pt.as_tensor_variable(alpha) - s = pt.as_tensor_variable(s) - beta = pt.as_tensor_variable(beta) - T = pt.as_tensor_variable(T) - - return super().make_node(rng, size, dtype, r, alpha, s, beta, T) - def __call__(self, r, alpha, s, beta, T, size=None, **kwargs): return super().__call__(r, alpha, s, beta, T, size=size, **kwargs) @classmethod def rng_fn(cls, rng, r, alpha, s, beta, T, size): - size = pm.distributions.shape_utils.to_tuple(size) - - r = np.asarray(r) - alpha = np.asarray(alpha) - s = np.asarray(s) - beta = np.asarray(beta) - T = np.asarray(T) - - if size == (): + if size is None: size = np.broadcast_shapes( r.shape, alpha.shape, s.shape, beta.shape, T.shape ) @@ -357,9 +306,6 @@ def sim_data(lam, mu, T): return output - def _supp_shape_from_params(*args, **kwargs): - return (2,) - pareto_nbd = ParetoNBDRV() @@ -489,34 +435,16 @@ def logp(value, r, alpha, s, beta, T): class BetaGeoBetaBinomRV(RandomVariable): name = "beta_geo_beta_binom" - ndim_supp = 1 - ndims_params = [0, 0, 0, 0, 0] + signature = "(),(),(),(),()->(2)" dtype = "floatX" _print_name = ("BetaGeoBetaBinom", "\\operatorname{BetaGeoBetaBinom}") - def make_node(self, rng, size, dtype, alpha, beta, gamma, delta, T): - alpha = pt.as_tensor_variable(alpha) - beta = pt.as_tensor_variable(beta) - gamma = pt.as_tensor_variable(gamma) - delta = pt.as_tensor_variable(delta) - T = pt.as_tensor_variable(T) - - return super().make_node(rng, size, dtype, alpha, beta, gamma, delta, T) - def __call__(self, alpha, beta, gamma, delta, T, size=None, **kwargs): return super().__call__(alpha, beta, gamma, delta, T, size=size, **kwargs) @classmethod def rng_fn(cls, rng, alpha, beta, gamma, delta, T, size) -> np.ndarray: - size = pm.distributions.shape_utils.to_tuple(size) - - alpha = np.asarray(alpha) - beta = np.asarray(beta) - gamma = np.asarray(gamma) - delta = np.asarray(delta) - T = np.asarray(T) - - if size == (): + if size is None: size = np.broadcast_shapes( alpha.shape, beta.shape, gamma.shape, delta.shape, T.shape ) @@ -557,9 +485,6 @@ def sim_data(purchase_prob, churn_prob, T): return output - def _supp_shape_from_params(*args, **kwargs): - return (2,) - beta_geo_beta_binom = BetaGeoBetaBinomRV() diff --git a/pymc_marketing/mlflow.py b/pymc_marketing/mlflow.py index 77ec7a5cb..58a1d552e 100644 --- a/pymc_marketing/mlflow.py +++ b/pymc_marketing/mlflow.py @@ -163,18 +163,6 @@ def log_arviz_summary( os.remove(path) -def _backwards_compatiable_data_vars(model: Model) -> list[TensorVariable]: - # TODO: Remove with PyMC update - non_data = ( - model.observed_RVs + model.free_RVs + model.deterministics + model.potentials - ) - vars = { - key: value for key, value in model.named_vars.items() if value not in non_data - } - - return list(vars.values()) - - def log_data(model: Model, idata: az.InferenceData) -> None: """Log the data used in the model to MLflow. @@ -189,11 +177,7 @@ def log_data(model: Model, idata: az.InferenceData) -> None: The InferenceData object returned by the sampling method. """ - data_vars: list[TensorVariable] = ( - _backwards_compatiable_data_vars(model) - if not hasattr(model, "data_vars") - else model.data_vars - ) + data_vars: list[TensorVariable] = model.data_vars features = { var.name: idata.constant_data[var.name].to_numpy() diff --git a/pyproject.toml b/pyproject.toml index a1fe3c508..e413d7df4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ dependencies = [ "pandas", "pydantic>=2.1.0", # NOTE: Used as minimum pymc version with ci.yml `OLDEST_PYMC_VERSION` - "pymc>=5.13.0,<5.16.0", + "pymc>=5.20.0", "scikit-learn>=1.1.1", "seaborn>=0.12.2", "xarray>=2024.1.0", diff --git a/tests/clv/models/test_gamma_gamma.py b/tests/clv/models/test_gamma_gamma.py index 5cd9cab92..9348913eb 100644 --- a/tests/clv/models/test_gamma_gamma.py +++ b/tests/clv/models/test_gamma_gamma.py @@ -211,7 +211,7 @@ def test_spend(self, distribution): preds.mean(("draw", "chain")), expected_preds_mean, rtol=0.1 ) np.testing.assert_allclose( - preds.std(("draw", "chain")), expected_preds_std, rtol=0.25 + preds.std(("draw", "chain")), expected_preds_std, rtol=0.5 ) else: