Skip to content

Commit

Permalink
Vendor model_builder from pymc-experimental (pymc-labs#339)
Browse files Browse the repository at this point in the history
* Vendor model_builder from pymc-experimental

* Remove pymc-experimental dependency

* Add `pymc_marketing.vendored` to setuptools packages

* Fix setuptools package reference

* Exclude vendored from codecov

* Fix codecov paths

* Move model_builder directly inside pymc-marketing

* Remove vendored.pymc_experimental as setuptools package

* Start cleaning up mypy errors

* fixing mypy errors

* implementing abstract methods

* Add model_builder tests

* Satisfy linter for test_model_builder.py

---------

Co-authored-by: Michal Raczycki <[email protected]>
  • Loading branch information
maresb and michaelraczycki authored Aug 17, 2023
1 parent 882fffa commit 73765d4
Show file tree
Hide file tree
Showing 13 changed files with 1,056 additions and 21 deletions.
2 changes: 1 addition & 1 deletion codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ coverage:
threshold: 2%
base: auto
paths:
- "pymc-marketing/"
- "pymc_marketing"
# advanced settings
branches:
- main
Expand Down
3 changes: 0 additions & 3 deletions mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,3 @@ ignore_missing_imports = True

[mypy-scipy.*]
ignore_missing_imports = True

[mypy-pymc_experimental.*]
ignore_missing_imports = True
29 changes: 23 additions & 6 deletions pymc_marketing/clv/models/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,20 @@
import types
import warnings
from pathlib import Path
from typing import Dict, Optional, Tuple
from typing import Any, Dict, Optional, Tuple, Union

import arviz as az
import numpy as np
import pandas as pd
import pymc as pm
from pymc import str_for_dist
from pymc.backends import NDArray
from pymc.backends.base import MultiTrace
from pymc_experimental.model_builder import ModelBuilder
from pytensor.tensor import TensorVariable
from xarray import Dataset

from pymc_marketing.model_builder import ModelBuilder


class CLVModel(ModelBuilder):
_model_type = ""
Expand All @@ -27,7 +30,7 @@ def __init__(
def __repr__(self):
return f"{self._model_type}\n{self.model.str_repr()}"

def fit(
def fit( # type: ignore
self,
fit_method: str = "mcmc",
**kwargs,
Expand All @@ -44,7 +47,7 @@ def fit(
Other keyword arguments passed to the underlying PyMC routines
"""

self.build_model()
self.build_model() # type: ignore

if fit_method == "mcmc":
self._fit_mcmc(**kwargs)
Expand Down Expand Up @@ -179,7 +182,7 @@ def load(cls, fname: str):
)
model.idata = idata

model.build_model()
model.build_model() # type: ignore

if model.id != idata.attrs["id"]:
raise ValueError(
Expand Down Expand Up @@ -225,7 +228,7 @@ def default_sampler_config(self) -> Dict:
def _serializable_model_config(self) -> Dict:
return self.model_config

def sample_prior_predictive(
def sample_prior_predictive( # type: ignore
self,
samples: int = 1000,
extend_idata: bool = True,
Expand Down Expand Up @@ -285,3 +288,17 @@ def fit_summary(self, **kwargs):
return res["mean"].rename("value")
else:
return az.summary(self.fit_result, **kwargs)

@property
def output_var(self):
pass

def generate_and_preprocess_model_data(
self,
X: Union[pd.DataFrame, pd.Series],
y: Union[pd.Series, np.ndarray[Any, Any]],
) -> None:
pass

def _data_setter(self):
pass
2 changes: 1 addition & 1 deletion pymc_marketing/clv/models/beta_geo.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def default_model_config(self) -> Dict[str, Dict]:
"r_prior": {"dist": "HalfFlat", "kwargs": {}},
}

def build_model(
def build_model( # type: ignore
self,
) -> None:
with pm.Model(coords=self.coords) as self.model:
Expand Down
3 changes: 2 additions & 1 deletion pymc_marketing/clv/models/gamma_gamma.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def expected_customer_spend(
mean_transaction_value, frequency = to_xarray(
customer_id, mean_transaction_value, frequency
)

assert self.idata is not None, "Model must be fitted first"
p = self.idata.posterior["p"]
q = self.idata.posterior["q"]
v = self.idata.posterior["v"]
Expand Down Expand Up @@ -104,6 +104,7 @@ def distribution_new_customer_spend(
def expected_new_customer_spend(self) -> xarray.DataArray:
"""Expected transaction value for a new customer"""

assert self.idata is not None, "Model must be fitted first"
p_mean = self.idata.posterior["p"]
q_mean = self.idata.posterior["q"]
v_mean = self.idata.posterior["v"]
Expand Down
3 changes: 2 additions & 1 deletion pymc_marketing/clv/models/pareto_nbd.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def default_model_config(self) -> Dict[str, Dict]:
"beta_prior": {"dist": "Weibull", "kwargs": {"alpha": 2, "beta": 10}},
}

def build_model(
def build_model( # type: ignore
self,
) -> None:
with pm.Model(coords=self.coords) as self.model:
Expand All @@ -245,6 +245,7 @@ def _unload_params(
self,
) -> Tuple[Any, ...]:
"""Utility function retrieving posterior parameters for predictive methods"""
assert self.idata is not None, "Model must be fit first."
return tuple([self.idata.posterior[param] for param in self._params])

# TODO: Convert to list comprehension to support covariates?
Expand Down
2 changes: 1 addition & 1 deletion pymc_marketing/clv/models/shifted_beta_geo.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def default_model_config(self) -> Dict:
"beta_prior": {"dist": "HalfFlat", "kwargs": {}},
}

def build_model(
def build_model( # type: ignore
self,
) -> None:
with pm.Model(coords=self.coords) as self.model:
Expand Down
8 changes: 4 additions & 4 deletions pymc_marketing/mmm/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import pandas as pd
import pymc as pm
import seaborn as sns
from pymc_experimental.model_builder import ModelBuilder
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer
from xarray import DataArray, Dataset
Expand All @@ -24,6 +23,7 @@
ValidateDateColumn,
ValidateTargetColumn,
)
from pymc_marketing.model_builder import ModelBuilder

__all__ = ("BaseMMM", "MMM")

Expand Down Expand Up @@ -271,7 +271,7 @@ def plot_prior_predictive(

ax.plot(
np.asarray(self.X[self.date_column]),
np.asarray(self.preprocessed_data["y"]),
np.asarray(self.preprocessed_data["y"]), # type: ignore
color="black",
)
ax.set(
Expand Down Expand Up @@ -323,7 +323,7 @@ def plot_posterior_predictive(
)

target_to_plot: np.ndarray = np.asarray(
self.y if original_scale else self.preprocessed_data["y"]
self.y if original_scale else self.preprocessed_data["y"] # type: ignore
)
ax.plot(
np.asarray(self.X[self.date_column]),
Expand Down Expand Up @@ -423,7 +423,7 @@ def plot_components_contributions(self, **plt_kwargs: Any) -> plt.Figure:
)
ax.plot(
np.asarray(self.X[self.date_column]),
np.asarray(self.preprocessed_data["y"]),
np.asarray(self.preprocessed_data["y"]), # type: ignore
color="black",
)
ax.legend(title="components", loc="center left", bbox_to_anchor=(1, 0.5))
Expand Down
2 changes: 1 addition & 1 deletion pymc_marketing/mmm/delayed_saturated_mmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def default_sampler_config(self) -> Dict:
def output_var(self):
return "y"

def generate_and_preprocess_model_data(
def generate_and_preprocess_model_data( # type: ignore
self, X: Union[pd.DataFrame, pd.Series], y: pd.Series
) -> None:
"""
Expand Down
Loading

0 comments on commit 73765d4

Please sign in to comment.