Merge pull request #45 from rasbt/tf_regressor

tensorflow linear regressor
rasbt · Apr 23, 2016 · 11db7ed · 11db7ed
2 parents 7f08c72 + 6d5602a
commit 11db7ed
Show file tree

Hide file tree

Showing 19 changed files with 1,158 additions and 6 deletions.
diff --git a/ci/.travis_test.sh b/ci/.travis_test.sh
@@ -4,12 +4,14 @@ set -e
 
 if [ "$TENSORFLOW" == "true" ]; then
     if [[ "$COVERAGE" == "true" ]]; then
-        nosetests -s -v mlxtend.tf_classifier --nologcapture --with-coverage
+        nosetests -s -v mlxtend/tf_classifier --nologcapture --with-coverage
+        nosetests -s -v mlxtend/tf_regressor --nologcapture --with-coverage
     else
-        nosetests -s -v mlxtend.tf_classifier --nologcapture
+        nosetests -s -v mlxtend/tf_classifier --nologcapture
+        nosetests -s -v mlxtend/tf_regressor --nologcapture
 else
     if [[ "$COVERAGE" == "true" ]]; then
-        nosetests -s -v --with-coverage --exclude-dir=mlxtend/tf_classifier --exclude-dir=mlxtend/data --exclude-dir=mlxtend/general_plotting
+        nosetests -s -v --with-coverage --exclude-dir=mlxtend/tf_classifier --exclude-dir=mlxtend/tf_regressor --exclude-dir=mlxtend/data --exclude-dir=mlxtend/general_plotting
     else
         nosetests -s -v --exclude-dir=mlxtend/tf_classifier --exclude-dir=mlxtend/data --exclude-dir=mlxtend/general_plotting
     fi

diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
@@ -45,6 +45,8 @@ pages:
   - regressor:
     - user_guide/regressor/LinearRegression.md
     - user_guide/regressor/StackingRegressor.md
+  - tf_regressor:
+    - user_guide/tf_regressor/TfLinearRegression.md
   - regression_utils:
     - user_guide/regression_utils/plot_linear_regression.md
   - feature_selection:
@@ -104,8 +106,9 @@ pages:
   - api_subpackages/mlxtend.file_io.md
   - api_subpackages/mlxtend.general_plotting.md
   - api_subpackages/mlxtend.preprocessing.md
-  - api_subpackages/mlxtend.regression_utils.md
   - api_subpackages/mlxtend.regressor.md
+  - api_subpackages/mlxtend.tf_regressor.md
+  - api_subpackages/mlxtend.regression_utils.md
   - api_subpackages/mlxtend.text.md
   - api_subpackages/mlxtend.utils.md
 - Installation: installation.md

diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md
@@ -2,10 +2,12 @@
 
 ---
 
-### Version 0.4.1
+### Version 0.4.1dev
 
 ##### New Features
 
+- New TensorFlow estimator for Linear Regression ([`tf_regressor.TfLinearRegression`](./user_guide/tf_regressor/TfLinearRegression.md))
+
 ##### Changes
 
 - Adding optional `dropout` to the [`tf_classifier.TfMultiLayerPerceptron`](./user_guide/tf_classifier/TfMultiLayerPerceptron.md) classifier for regularization

diff --git a/docs/sources/user_guide/tf_regressor/TfLinearRegression.ipynb b/docs/sources/user_guide/tf_regressor/TfLinearRegression.ipynb
diff --git a/...es/user_guide/tf_regressor/TfLinearRegression_files/TfLinearRegression_13_0.png b/...es/user_guide/tf_regressor/TfLinearRegression_files/TfLinearRegression_13_0.png
diff --git a/...es/user_guide/tf_regressor/TfLinearRegression_files/TfLinearRegression_15_2.png b/...es/user_guide/tf_regressor/TfLinearRegression_files/TfLinearRegression_15_2.png
diff --git a/...es/user_guide/tf_regressor/TfLinearRegression_files/TfLinearRegression_17_2.png b/...es/user_guide/tf_regressor/TfLinearRegression_files/TfLinearRegression_17_2.png
diff --git a/...es/user_guide/tf_regressor/TfLinearRegression_files/TfLinearRegression_19_0.png b/...es/user_guide/tf_regressor/TfLinearRegression_files/TfLinearRegression_19_0.png
diff --git a/...es/user_guide/tf_regressor/TfLinearRegression_files/TfLinearRegression_21_1.png b/...es/user_guide/tf_regressor/TfLinearRegression_files/TfLinearRegression_21_1.png
diff --git a/...es/user_guide/tf_regressor/TfLinearRegression_files/TfLinearRegression_22_0.png b/...es/user_guide/tf_regressor/TfLinearRegression_files/TfLinearRegression_22_0.png
diff --git a/...es/user_guide/tf_regressor/TfLinearRegression_files/TfLinearRegression_27_1.png b/...es/user_guide/tf_regressor/TfLinearRegression_files/TfLinearRegression_27_1.png
diff --git a/mlxtend/tf_regressor/__init__.py b/mlxtend/tf_regressor/__init__.py
@@ -0,0 +1,9 @@
+# Sebastian Raschka 2014-2016
+# mlxtend Machine Learning Library Extensions
+# Author: Sebastian Raschka <sebastianraschka.com>
+#
+# License: BSD 3 clause
+
+from .tf_linear_regression import TfLinearRegression
+
+__all__ = ["TfLinearRegression"]
diff --git a/mlxtend/tf_regressor/tests/test_tf_base.py b/mlxtend/tf_regressor/tests/test_tf_base.py
@@ -0,0 +1,113 @@
+# Sebastian Raschka 2014-2016
+# mlxtend Machine Learning Library Extensions
+# Author: Sebastian Raschka <sebastianraschka.com>
+#
+# License: BSD 3 clause
+
+from mlxtend.tf_regressor.tf_base import _TfBaseRegressor
+import numpy as np
+from mlxtend.utils import assert_raises
+
+
+def test_init():
+    tfr = _TfBaseRegressor(print_progress=0, random_seed=1)
+
+
+def test_check_arrays_1():
+    X = np.array([1, 2, 3])
+    y = np.array([1, 1, 1])
+    tfr = _TfBaseRegressor(print_progress=0, random_seed=1)
+    assert_raises(ValueError,
+                  'X must be a 2D array. Try X[:, numpy.newaxis]',
+                  tfr._check_arrays,
+                  X)
+
+    assert_raises(ValueError,
+                  'X must be a 2D array. Try X[:, numpy.newaxis]',
+                  tfr._check_arrays,
+                  X, y)
+
+
+def test_check_arrays_2():
+    X = np.array([[1], [2], [3]])
+    y = np.array([1, 1])
+    tfr = _TfBaseRegressor(print_progress=0, random_seed=1)
+
+    assert_raises(ValueError,
+                  'X and y must contain the same number of samples',
+                  tfr._check_arrays,
+                  X, y)
+
+
+def test_check_arrays_3():
+    X = list([[1], [2], [3]])
+    tfr = _TfBaseRegressor(print_progress=0, random_seed=1)
+
+    assert_raises(ValueError,
+                  'X must be a numpy array',
+                  tfr._check_arrays,
+                  X)
+
+
+def test_check_arrays_4():
+    X = np.array([[1], [2], [3]])
+    y = np.array([1, 2, 3])
+    tfr = _TfBaseRegressor(print_progress=0, random_seed=1)
+    tfr._check_arrays(X, y)
+
+
+def test_check_arrays_5():
+    X = np.array([[1], [2], [3]])
+    y = [1, 2, 3]
+    tfr = _TfBaseRegressor(print_progress=0, random_seed=1)
+
+    assert_raises(ValueError,
+                  'y must be a numpy array.',
+                  tfr._check_arrays,
+                  X, y)
+
+
+def test_check_arrays_6():
+    X = np.array([[1], [2], [3]])
+    y = X
+    tfr = _TfBaseRegressor(print_progress=0, random_seed=1)
+
+    assert_raises(ValueError,
+                  'y must be a 1D numpy array.',
+                  tfr._check_arrays,
+                  X, y)
+
+
+def test_fit():
+    X = np.array([[1], [2], [3]])
+    y = np.array([1, 2, 3])
+    tfr = _TfBaseRegressor(print_progress=0, random_seed=1)
+    tfr.fit(X, y)
+
+
+def test_predict_1():
+    X = np.array([[1], [2], [3]])
+    tfr = _TfBaseRegressor(print_progress=0, random_seed=1)
+
+    assert_raises(AttributeError,
+                  'Model is not fitted, yet.',
+                  tfr.predict,
+                  X)
+
+
+def test_predict_2():
+    X = np.array([[1], [2], [3]])
+    y = np.array([1, 2, 3])
+    tfr = _TfBaseRegressor(print_progress=0, random_seed=1)
+
+    tfr.fit(X, y)
+    tfr.predict(X)
+
+
+def test_shuffle():
+    X = np.array([[1], [2], [3]])
+    y = np.array([1, 2, 3])
+    tfr = _TfBaseRegressor(print_progress=0, random_seed=1)
+    X_sh, y_sh = tfr._shuffle(arrays=[X, np.array(y)])
+    np.testing.assert_equal(X_sh, np.array([[1], [3], [2]]))
+    np.testing.assert_equal(y_sh, np.array([1, 3, 2]))
diff --git a/mlxtend/tf_regressor/tests/test_tf_linear_regression.py b/mlxtend/tf_regressor/tests/test_tf_linear_regression.py
@@ -0,0 +1,40 @@
+# Sebastian Raschka 2014-2016
+# mlxtend Machine Learning Library Extensions
+# Author: Sebastian Raschka <sebastianraschka.com>
+#
+# License: BSD 3 clause
+
+
+from mlxtend.tf_regressor import TfLinearRegression
+from mlxtend.data import boston_housing_data
+import numpy as np
+from numpy.testing import assert_almost_equal
+
+
+np.random.seed(1)
+X = np.array([np.random.normal(1.0, 4.55) for i in range(100)])
+y = np.array([x1 * 0.1 + 0.1 + np.random.normal(0.0, 0.05) for x1 in X])
+X = X[:, np.newaxis]
+X2 = np.hstack((X, X))
+
+
+def test_univariate_univariate_gradient_descent():
+    gd_lr = TfLinearRegression(eta=0.05,
+                               epochs=55,
+                               random_seed=1,
+                               print_progress=0)
+    gd_lr.fit(X, y)
+    assert_almost_equal(gd_lr.bias_, np.array([0.11]), decimal=2)
+    assert_almost_equal(gd_lr.weights_, np.array([0.10]), decimal=2)
+    assert_almost_equal(gd_lr.predict(X), y, decimal=1)
+
+
+def test_multivariate_gradient_descent():
+    gd_lr = TfLinearRegression(eta=0.005,
+                               epochs=250,
+                               random_seed=1,
+                               print_progress=0)
+    gd_lr.fit(X2, y)
+    assert_almost_equal(gd_lr.predict(X2), y, decimal=1)
+    assert_almost_equal(gd_lr.bias_, np.array([0.1]), decimal=2)
+    assert_almost_equal(gd_lr.weights_, np.array([-1.1, 1.2]), decimal=2)
diff --git a/mlxtend/tf_regressor/tf_base.py b/mlxtend/tf_regressor/tf_base.py
@@ -0,0 +1,133 @@
+# Sebastian Raschka 2014-2016
+# mlxtend Machine Learning Library Extensions
+#
+# Base Regressor (Regressor Parent Class)
+# Author: Sebastian Raschka <sebastianraschka.com>
+#
+# License: BSD 3 clause
+
+import numpy as np
+from sys import stderr
+from time import time
+
+
+class _TfBaseRegressor(object):
+
+    """Parent Class Base Regressor
+
+    A base class that is implemented by
+    regressor child classes.
+
+    """
+    def __init__(self, print_progress=0, random_seed=None):
+        self.print_progress = print_progress
+        self.random_seed = random_seed
+        self._is_fitted = False
+
+    def fit(self, X, y, init_weights=True):
+        """Learn weight coefficients from training data.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
+            Training vectors, where n_samples is the number of samples and
+            n_features is the number of features.
+        y : array-like, shape = [n_samples]
+            Target values.
+        init_weights : bool (default: True)
+            Reinitialize weights
+
+        Returns
+        -------
+        self : object
+
+        """
+        self._is_fitted = False
+        if not (init_weights is None or isinstance(init_weights, bool)):
+            raise AttributeError("init_weights must be True or False")
+        self._check_arrays(X=X, y=y)
+        if self.random_seed is not None:
+            np.random.seed(self.random_seed)
+        self._fit(X=X, y=y, init_weights=init_weights)
+        self._is_fitted = True
+        return self
+
+    def _fit(self, X, y, init_weights=True):
+        # Implemented in child class
+        pass
+
+    def predict(self, X):
+        """Predict class labels of X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
+            Training vectors, where n_samples is the number of samples and
+            n_features is the number of features.
+
+        Returns
+        ----------
+        class_labels : array-like, shape = [n_samples]
+          Predicted class labels.
+
+        """
+        self._check_arrays(X)
+        if not self._is_fitted:
+            raise AttributeError('Model is not fitted, yet.')
+        return self._predict(X)
+
+    def _predict(self, X):
+        # Implemented in child class
+        pass
+
+    def _shuffle(self, arrays):
+        """Shuffle arrays in unison."""
+        r = np.random.permutation(len(arrays[0]))
+        return [ary[r] for ary in arrays]
+
+    def _print_progress(self, epoch, cost=None, time_interval=10):
+        if self.print_progress > 0:
+            s = '\rEpoch: %d/%d' % (epoch, self.epochs)
+            if cost:
+                s += ' | Cost %.2f' % cost
+            if self.print_progress > 1:
+                if not hasattr(self, 'ela_str_'):
+                    self.ela_str_ = '00:00:00'
+                if not epoch % time_interval:
+                    ela_sec = time() - self.init_time_
+                    self.ela_str_ = self._to_hhmmss(ela_sec)
+                s += ' | Elapsed: %s' % self.ela_str_
+                if self.print_progress > 2:
+                    if not hasattr(self, 'eta_str_'):
+                        self.eta_str_ = '00:00:00'
+                    if not epoch % time_interval:
+                        eta_sec = ((ela_sec / float(epoch)) *
+                                   self.epochs - ela_sec)
+                        self.eta_str_ = self._to_hhmmss(eta_sec)
+                    s += ' | ETA: %s' % self.eta_str_
+            stderr.write(s)
+            stderr.flush()
+
+    def _to_hhmmss(self, sec):
+        m, s = divmod(sec, 60)
+        h, m = divmod(m, 60)
+        return "%d:%02d:%02d" % (h, m, s)
+
+    def _check_arrays(self, X, y=None):
+        if isinstance(X, list):
+            raise ValueError('X must be a numpy array')
+        if not len(X.shape) == 2:
+            raise ValueError('X must be a 2D array. Try X[:, numpy.newaxis]')
+        try:
+            if y is None:
+                return
+        except(AttributeError):
+            pass
+        else:
+            if not isinstance(y, np.ndarray):
+                raise ValueError('y must be a numpy array.')
+            if not len(y.shape) == 1:
+                raise ValueError('y must be a 1D numpy array.')
+
+        if not len(y) == X.shape[0]:
+            raise ValueError('X and y must contain the same number of samples')