Skip to content

Commit

Permalink
Merge pull request #45 from rasbt/tf_regressor
Browse files Browse the repository at this point in the history
tensorflow linear regressor
  • Loading branch information
rasbt committed Apr 23, 2016
2 parents 7f08c72 + 6d5602a commit 11db7ed
Show file tree
Hide file tree
Showing 19 changed files with 1,158 additions and 6 deletions.
8 changes: 5 additions & 3 deletions ci/.travis_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@ set -e

if [ "$TENSORFLOW" == "true" ]; then
if [[ "$COVERAGE" == "true" ]]; then
nosetests -s -v mlxtend.tf_classifier --nologcapture --with-coverage
nosetests -s -v mlxtend/tf_classifier --nologcapture --with-coverage
nosetests -s -v mlxtend/tf_regressor --nologcapture --with-coverage
else
nosetests -s -v mlxtend.tf_classifier --nologcapture
nosetests -s -v mlxtend/tf_classifier --nologcapture
nosetests -s -v mlxtend/tf_regressor --nologcapture
else
if [[ "$COVERAGE" == "true" ]]; then
nosetests -s -v --with-coverage --exclude-dir=mlxtend/tf_classifier --exclude-dir=mlxtend/data --exclude-dir=mlxtend/general_plotting
nosetests -s -v --with-coverage --exclude-dir=mlxtend/tf_classifier --exclude-dir=mlxtend/tf_regressor --exclude-dir=mlxtend/data --exclude-dir=mlxtend/general_plotting
else
nosetests -s -v --exclude-dir=mlxtend/tf_classifier --exclude-dir=mlxtend/data --exclude-dir=mlxtend/general_plotting
fi
Expand Down
5 changes: 4 additions & 1 deletion docs/mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ pages:
- regressor:
- user_guide/regressor/LinearRegression.md
- user_guide/regressor/StackingRegressor.md
- tf_regressor:
- user_guide/tf_regressor/TfLinearRegression.md
- regression_utils:
- user_guide/regression_utils/plot_linear_regression.md
- feature_selection:
Expand Down Expand Up @@ -104,8 +106,9 @@ pages:
- api_subpackages/mlxtend.file_io.md
- api_subpackages/mlxtend.general_plotting.md
- api_subpackages/mlxtend.preprocessing.md
- api_subpackages/mlxtend.regression_utils.md
- api_subpackages/mlxtend.regressor.md
- api_subpackages/mlxtend.tf_regressor.md
- api_subpackages/mlxtend.regression_utils.md
- api_subpackages/mlxtend.text.md
- api_subpackages/mlxtend.utils.md
- Installation: installation.md
Expand Down
4 changes: 3 additions & 1 deletion docs/sources/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@

---

### Version 0.4.1
### Version 0.4.1dev

##### New Features

- New TensorFlow estimator for Linear Regression ([`tf_regressor.TfLinearRegression`](./user_guide/tf_regressor/TfLinearRegression.md))

##### Changes

- Adding optional `dropout` to the [`tf_classifier.TfMultiLayerPerceptron`](./user_guide/tf_classifier/TfMultiLayerPerceptron.md) classifier for regularization
Expand Down
618 changes: 618 additions & 0 deletions docs/sources/user_guide/tf_regressor/TfLinearRegression.ipynb

Large diffs are not rendered by default.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
9 changes: 9 additions & 0 deletions mlxtend/tf_regressor/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Sebastian Raschka 2014-2016
# mlxtend Machine Learning Library Extensions
# Author: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause

from .tf_linear_regression import TfLinearRegression

__all__ = ["TfLinearRegression"]
113 changes: 113 additions & 0 deletions mlxtend/tf_regressor/tests/test_tf_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# Sebastian Raschka 2014-2016
# mlxtend Machine Learning Library Extensions
# Author: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause

from mlxtend.tf_regressor.tf_base import _TfBaseRegressor
import numpy as np
from mlxtend.utils import assert_raises


def test_init():
tfr = _TfBaseRegressor(print_progress=0, random_seed=1)


def test_check_arrays_1():
X = np.array([1, 2, 3])
y = np.array([1, 1, 1])
tfr = _TfBaseRegressor(print_progress=0, random_seed=1)
assert_raises(ValueError,
'X must be a 2D array. Try X[:, numpy.newaxis]',
tfr._check_arrays,
X)

assert_raises(ValueError,
'X must be a 2D array. Try X[:, numpy.newaxis]',
tfr._check_arrays,
X, y)


def test_check_arrays_2():
X = np.array([[1], [2], [3]])
y = np.array([1, 1])
tfr = _TfBaseRegressor(print_progress=0, random_seed=1)

assert_raises(ValueError,
'X and y must contain the same number of samples',
tfr._check_arrays,
X, y)


def test_check_arrays_3():
X = list([[1], [2], [3]])
tfr = _TfBaseRegressor(print_progress=0, random_seed=1)

assert_raises(ValueError,
'X must be a numpy array',
tfr._check_arrays,
X)


def test_check_arrays_4():
X = np.array([[1], [2], [3]])
y = np.array([1, 2, 3])
tfr = _TfBaseRegressor(print_progress=0, random_seed=1)
tfr._check_arrays(X, y)


def test_check_arrays_5():
X = np.array([[1], [2], [3]])
y = [1, 2, 3]
tfr = _TfBaseRegressor(print_progress=0, random_seed=1)

assert_raises(ValueError,
'y must be a numpy array.',
tfr._check_arrays,
X, y)


def test_check_arrays_6():
X = np.array([[1], [2], [3]])
y = X
tfr = _TfBaseRegressor(print_progress=0, random_seed=1)

assert_raises(ValueError,
'y must be a 1D numpy array.',
tfr._check_arrays,
X, y)


def test_fit():
X = np.array([[1], [2], [3]])
y = np.array([1, 2, 3])
tfr = _TfBaseRegressor(print_progress=0, random_seed=1)
tfr.fit(X, y)


def test_predict_1():
X = np.array([[1], [2], [3]])
tfr = _TfBaseRegressor(print_progress=0, random_seed=1)

assert_raises(AttributeError,
'Model is not fitted, yet.',
tfr.predict,
X)


def test_predict_2():
X = np.array([[1], [2], [3]])
y = np.array([1, 2, 3])
tfr = _TfBaseRegressor(print_progress=0, random_seed=1)

tfr.fit(X, y)
tfr.predict(X)


def test_shuffle():
X = np.array([[1], [2], [3]])
y = np.array([1, 2, 3])
tfr = _TfBaseRegressor(print_progress=0, random_seed=1)
X_sh, y_sh = tfr._shuffle(arrays=[X, np.array(y)])
np.testing.assert_equal(X_sh, np.array([[1], [3], [2]]))
np.testing.assert_equal(y_sh, np.array([1, 3, 2]))
40 changes: 40 additions & 0 deletions mlxtend/tf_regressor/tests/test_tf_linear_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Sebastian Raschka 2014-2016
# mlxtend Machine Learning Library Extensions
# Author: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause


from mlxtend.tf_regressor import TfLinearRegression
from mlxtend.data import boston_housing_data
import numpy as np
from numpy.testing import assert_almost_equal


np.random.seed(1)
X = np.array([np.random.normal(1.0, 4.55) for i in range(100)])
y = np.array([x1 * 0.1 + 0.1 + np.random.normal(0.0, 0.05) for x1 in X])
X = X[:, np.newaxis]
X2 = np.hstack((X, X))


def test_univariate_univariate_gradient_descent():
gd_lr = TfLinearRegression(eta=0.05,
epochs=55,
random_seed=1,
print_progress=0)
gd_lr.fit(X, y)
assert_almost_equal(gd_lr.bias_, np.array([0.11]), decimal=2)
assert_almost_equal(gd_lr.weights_, np.array([0.10]), decimal=2)
assert_almost_equal(gd_lr.predict(X), y, decimal=1)


def test_multivariate_gradient_descent():
gd_lr = TfLinearRegression(eta=0.005,
epochs=250,
random_seed=1,
print_progress=0)
gd_lr.fit(X2, y)
assert_almost_equal(gd_lr.predict(X2), y, decimal=1)
assert_almost_equal(gd_lr.bias_, np.array([0.1]), decimal=2)
assert_almost_equal(gd_lr.weights_, np.array([-1.1, 1.2]), decimal=2)
133 changes: 133 additions & 0 deletions mlxtend/tf_regressor/tf_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# Sebastian Raschka 2014-2016
# mlxtend Machine Learning Library Extensions
#
# Base Regressor (Regressor Parent Class)
# Author: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause

import numpy as np
from sys import stderr
from time import time


class _TfBaseRegressor(object):

"""Parent Class Base Regressor
A base class that is implemented by
regressor child classes.
"""
def __init__(self, print_progress=0, random_seed=None):
self.print_progress = print_progress
self.random_seed = random_seed
self._is_fitted = False

def fit(self, X, y, init_weights=True):
"""Learn weight coefficients from training data.
Parameters
----------
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
y : array-like, shape = [n_samples]
Target values.
init_weights : bool (default: True)
Reinitialize weights
Returns
-------
self : object
"""
self._is_fitted = False
if not (init_weights is None or isinstance(init_weights, bool)):
raise AttributeError("init_weights must be True or False")
self._check_arrays(X=X, y=y)
if self.random_seed is not None:
np.random.seed(self.random_seed)
self._fit(X=X, y=y, init_weights=init_weights)
self._is_fitted = True
return self

def _fit(self, X, y, init_weights=True):
# Implemented in child class
pass

def predict(self, X):
"""Predict class labels of X.
Parameters
----------
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
Returns
----------
class_labels : array-like, shape = [n_samples]
Predicted class labels.
"""
self._check_arrays(X)
if not self._is_fitted:
raise AttributeError('Model is not fitted, yet.')
return self._predict(X)

def _predict(self, X):
# Implemented in child class
pass

def _shuffle(self, arrays):
"""Shuffle arrays in unison."""
r = np.random.permutation(len(arrays[0]))
return [ary[r] for ary in arrays]

def _print_progress(self, epoch, cost=None, time_interval=10):
if self.print_progress > 0:
s = '\rEpoch: %d/%d' % (epoch, self.epochs)
if cost:
s += ' | Cost %.2f' % cost
if self.print_progress > 1:
if not hasattr(self, 'ela_str_'):
self.ela_str_ = '00:00:00'
if not epoch % time_interval:
ela_sec = time() - self.init_time_
self.ela_str_ = self._to_hhmmss(ela_sec)
s += ' | Elapsed: %s' % self.ela_str_
if self.print_progress > 2:
if not hasattr(self, 'eta_str_'):
self.eta_str_ = '00:00:00'
if not epoch % time_interval:
eta_sec = ((ela_sec / float(epoch)) *
self.epochs - ela_sec)
self.eta_str_ = self._to_hhmmss(eta_sec)
s += ' | ETA: %s' % self.eta_str_
stderr.write(s)
stderr.flush()

def _to_hhmmss(self, sec):
m, s = divmod(sec, 60)
h, m = divmod(m, 60)
return "%d:%02d:%02d" % (h, m, s)

def _check_arrays(self, X, y=None):
if isinstance(X, list):
raise ValueError('X must be a numpy array')
if not len(X.shape) == 2:
raise ValueError('X must be a 2D array. Try X[:, numpy.newaxis]')
try:
if y is None:
return
except(AttributeError):
pass
else:
if not isinstance(y, np.ndarray):
raise ValueError('y must be a numpy array.')
if not len(y.shape) == 1:
raise ValueError('y must be a 1D numpy array.')

if not len(y) == X.shape[0]:
raise ValueError('X and y must contain the same number of samples')
Loading

0 comments on commit 11db7ed

Please sign in to comment.