Merge pull request #109 from pymc-labs/add_basic_tests

Add tests Model Builder. Starts #66 on mypy, and #25 on tests
pymc-labs · Dec 4, 2022 · 33d92b5 · 33d92b5
2 parents 152844a + 98306bc
commit 33d92b5
Show file tree

Hide file tree

Showing 5 changed files with 102 additions and 12 deletions.
diff --git a/.isort.cfg b/.isort.cfg
@@ -1,2 +1,2 @@
 [settings]
-known_third_party = arviz,matplotlib,numpy,pandas,patsy,pymc,scipy,seaborn,setuptools,sklearn,statsmodels,xarray
+known_third_party = arviz,matplotlib,numpy,pandas,patsy,pymc,pytest,scipy,seaborn,setuptools,sklearn,statsmodels,xarray
diff --git a/causalpy/pymc_models.py b/causalpy/pymc_models.py
@@ -1,7 +1,8 @@
-from typing import Dict
+from typing import Any, Dict, Optional
 
 import arviz as az
 import numpy as np
+import pandas as pd
 import pymc as pm
 from arviz import r2_score
 
@@ -11,19 +12,33 @@ class ModelBuilder(pm.Model):
     This is a wrapper around pm.Model to give scikit-learn like API
     """
 
-    def __init__(self, sample_kwargs: Dict = {}):
+    def __init__(self, sample_kwargs: Optional[Dict[str, Any]] = None):
         super().__init__()
         self.idata = None
-        self.sample_kwargs = sample_kwargs
-
-    def build_model(self, X, y, coords):
-        raise NotImplementedError
+        self.sample_kwargs = sample_kwargs if sample_kwargs is not None else {}
+
+    def build_model(self, X, y, coords) -> None:
+        """Build the model.
+
+        Example
+        -------
+        >>> class CausalPyModel(ModelBuilder):
+        >>>    def build_model(self, X, y):
+        >>>        with self:
+        >>>            X_ = pm.MutableData(name="X", value=X)
+        >>>            y_ = pm.MutableData(name="y", value=y)
+        >>>            beta = pm.Normal("beta", mu=0, sigma=1, shape=X_.shape[1])
+        >>>            sigma = pm.HalfNormal("sigma", sigma=1)
+        >>>            mu = pm.Deterministic("mu", pm.math.dot(X_, beta))
+        >>>            pm.Normal("y_hat", mu=mu, sigma=sigma, observed=y_)
+        """
+        raise NotImplementedError("This method must be implemented by a subclass")
 
-    def _data_setter(self, X):
+    def _data_setter(self, X) -> None:
         with self.model:
             pm.set_data({"X": X})
 
-    def fit(self, X, y, coords):
+    def fit(self, X, y, coords: Optional[Dict[str, Any]] = None) -> None:
         """Draw samples from posterior, prior predictive, and posterior predictive
         distributions.
         """
@@ -43,7 +58,7 @@ def predict(self, X):
             )
         return post_pred
 
-    def score(self, X, y):
+    def score(self, X, y) -> pd.Series:
         """Score the Bayesian :math:`R^2` given inputs ``X`` and outputs ``y``.
 
         .. caution::

diff --git a/causalpy/tests/conftest.py b/causalpy/tests/conftest.py
@@ -0,0 +1,8 @@
+import numpy as np
+import pytest
+
+
+@pytest.fixture(scope="session")
+def rng() -> np.random.Generator:
+    seed: int = sum(map(ord, "causalpy"))
+    return np.random.default_rng(seed=seed)
diff --git a/causalpy/tests/test_dummy.py b/causalpy/tests/test_dummy.py
diff --git a/causalpy/tests/test_pymc_models.py b/causalpy/tests/test_pymc_models.py
@@ -0,0 +1,69 @@
+import arviz as az
+import numpy as np
+import pandas as pd
+import pymc as pm
+import pytest
+
+from causalpy.pymc_models import ModelBuilder
+
+
+class MyToyModel(ModelBuilder):
+    def build_model(self, X, y, coords):
+        with self:
+            X_ = pm.MutableData(name="X", value=X)
+            y_ = pm.MutableData(name="y", value=y)
+            beta = pm.Normal("beta", mu=0, sigma=1, shape=X_.shape[1])
+            sigma = pm.HalfNormal("sigma", sigma=1)
+            mu = pm.Deterministic("mu", pm.math.dot(X_, beta))
+            pm.Normal("y_hat", mu=mu, sigma=sigma, observed=y_)
+
+
+class TestModelBuilder:
+    def test_init(self):
+        mb = ModelBuilder()
+        assert mb.idata is None
+        assert mb.sample_kwargs == {}
+
+    @pytest.mark.parametrize(
+        argnames="coords", argvalues=[{"a": 1}, None], ids=["coords-dict", "coord-None"]
+    )
+    @pytest.mark.parametrize(
+        argnames="y", argvalues=[np.ones(3), None], ids=["y-array", "y-None"]
+    )
+    @pytest.mark.parametrize(
+        argnames="X", argvalues=[np.ones(2), None], ids=["X-array", "X-None"]
+    )
+    def test_model_builder(self, X, y, coords) -> None:
+        with pytest.raises(
+            NotImplementedError, match="This method must be implemented by a subclass"
+        ):
+            ModelBuilder().build_model(X=X, y=y, coords=coords)
+
+    def test_fit_build_not_implemented(self):
+        with pytest.raises(
+            NotImplementedError, match="This method must be implemented by a subclass"
+        ):
+            ModelBuilder().fit(X=np.ones(2), y=np.ones(3), coords={"a": 1})
+
+    @pytest.mark.parametrize(
+        argnames="coords",
+        argvalues=[None, {"a": 1}],
+        ids=["None-coords", "dict-coords"],
+    )
+    def test_fit_predict(self, coords, rng) -> None:
+        X = rng.normal(loc=0, scale=1, size=(20, 2))
+        y = rng.normal(loc=0, scale=1, size=(20,))
+        model = MyToyModel(sample_kwargs={"chains": 2, "draws": 2})
+        model.fit(X, y, coords=coords)
+        predictions = model.predict(X=X)
+        score = model.score(X=X, y=y)
+        assert isinstance(model.idata, az.InferenceData)
+        assert az.extract(data=model.idata, var_names=["beta"]).shape == (2, 2 * 2)
+        assert az.extract(data=model.idata, var_names=["sigma"]).shape == (2 * 2,)
+        assert az.extract(data=model.idata, var_names=["mu"]).shape == (20, 2 * 2)
+        assert az.extract(
+            data=model.idata, group="posterior_predictive", var_names=["y_hat"]
+        ).shape == (20, 2 * 2)
+        assert isinstance(score, pd.Series)
+        assert score.shape == (2,)
+        assert isinstance(predictions, az.InferenceData)