From ed1765a3d5037f2a8a815ce5a209615ed9157867 Mon Sep 17 00:00:00 2001 From: shyambhu Date: Wed, 8 Jul 2020 22:37:29 +0530 Subject: [PATCH 1/9] added "terms" and "response" option to predict method. --- pygam/pygam.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pygam/pygam.py b/pygam/pygam.py index ef3c6fa4..df1e4f36 100644 --- a/pygam/pygam.py +++ b/pygam/pygam.py @@ -416,7 +416,7 @@ def predict_mu(self, X): lp = self._linear_predictor(X) return self.link.mu(lp, self.distribution) - def predict(self, X): + def predict(self, X, type_ = "response"): """ preduct expected value of target given model and input X often this is done via expected value of GAM given input X @@ -430,7 +430,14 @@ def predict(self, X): ------- y : np.array of shape (n_samples,) containing predicted values under the model - """ + + type_ : response or terms, optional + response to provide prediction values. + terms to provide the terms values in linear portion. + """ + if type_ == 'terms': + term_values = [self._linear_predictor(X,term = term) for term in self.terms] + return term_values return self.predict_mu(X) def _modelmat(self, X, term=-1): From da96bf999a64e7cc7719ee4f231df0c2c034bb31 Mon Sep 17 00:00:00 2001 From: shyambhu Date: Thu, 9 Jul 2020 14:46:24 +0530 Subject: [PATCH 2/9] updated predict function with type_ parameter --- pygam/pygam.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pygam/pygam.py b/pygam/pygam.py index df1e4f36..90c45af2 100644 --- a/pygam/pygam.py +++ b/pygam/pygam.py @@ -431,10 +431,12 @@ def predict(self, X, type_ = "response"): y : np.array of shape (n_samples,) containing predicted values under the model - type_ : response or terms, optional + type_ : str in {'response','terms'} response to provide prediction values. terms to provide the terms values in linear portion. """ + if type_ not in ['response','terms']: + raise ValueError('type_ not equal to response or terms.') if type_ == 'terms': term_values = [self._linear_predictor(X,term = term) for term in self.terms] return term_values From bd3494aff4a14db40be230d45205160c2abbf935 Mon Sep 17 00:00:00 2001 From: shyambhu Date: Wed, 15 Jul 2020 22:14:31 +0530 Subject: [PATCH 3/9] "just updating" --- pygam/pygam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygam/pygam.py b/pygam/pygam.py index 90c45af2..bb6e76d6 100644 --- a/pygam/pygam.py +++ b/pygam/pygam.py @@ -1254,7 +1254,7 @@ def _compute_p_value(self, term_i): based on equations from Wood 2006 section 4.8.5 page 191 and errata https://people.maths.bris.ac.uk/~sw15190/igam/iGAMerrata-12.pdf - the errata shows a correction for the f-statisitc. + the errata shows a correction for the f-statistics. """ if not self._is_fitted: raise AttributeError('GAM has not been fitted. Call fit first.') From ba5058847d1a9d0de840999b78188a5bf2dd3ca1 Mon Sep 17 00:00:00 2001 From: shyambhu Date: Thu, 16 Jul 2020 00:16:52 +0530 Subject: [PATCH 4/9] corrected #275 pull request message and added test case for changes. --- pygam/pygam.py | 27 +++++++++++++++++---------- pygam/tests/test_GAM_methods.py | 12 +++++++++++- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/pygam/pygam.py b/pygam/pygam.py index 584c9ec2..d57ac86e 100644 --- a/pygam/pygam.py +++ b/pygam/pygam.py @@ -416,7 +416,7 @@ def predict_mu(self, X): lp = self._linear_predictor(X) return self.link.mu(lp, self.distribution) - def predict(self, X, type_ = "response"): + def predict(self, X, output = "response"): """ preduct expected value of target given model and input X often this is done via expected value of GAM given input X @@ -425,20 +425,27 @@ def predict(self, X, type_ = "response"): --------- X : array-like of shape (n_samples, m_features) containing the input dataset + + output : str in {'response','terms'} + response to provide prediction values. + terms to provide the terms values in linear portion. Returns ------- + y : np.array of shape (n_samples,) containing predicted values under the model - - type_ : str in {'response','terms'} - response to provide prediction values. - terms to provide the terms values in linear portion. - """ - if type_ not in ['response','terms']: - raise ValueError('type_ not equal to response or terms.') - if type_ == 'terms': - term_values = [self._linear_predictor(X,term = term) for term in self.terms] + if output is set to response. + + or + list of Numpy arrays of shape (n_samples,) + containing partial contributions of each term in each numpy array. + if output is set to terms. + """ + if output not in ['response','terms']: + raise ValueError('output not equal to response or terms.') + if output == 'terms': + term_values = [self.partial_dependence(term = i,X = X) for i,term in enumerate(self.terms) if not term.isintercept] return term_values return self.predict_mu(X) diff --git a/pygam/tests/test_GAM_methods.py b/pygam/tests/test_GAM_methods.py index f74999d5..d6715252 100644 --- a/pygam/tests/test_GAM_methods.py +++ b/pygam/tests/test_GAM_methods.py @@ -250,7 +250,17 @@ def test_get_params(): params = gam.get_params() assert(params['lam'] == 420) - +def test_predict_terms_output(self,mcycle_X_y): + """ + test to check output = 'terms' in GAM.predict() + """ + x,y = mcycle_X_y + gam = LinearGAM().fit(x,y) + terms = gam.predict(x,output = "terms") + n,m = x.shape + assert(len(terms) == m) + assert(len(terms[0]) == n) + class TestSamplingFromPosterior(object): def test_drawing_samples_from_unfitted_model(self, mcycle_X_y, mcycle_gam): From bfd94880d94ddb450b990a649c5e0d69ebbb3212 Mon Sep 17 00:00:00 2001 From: shyambhu Date: Thu, 16 Jul 2020 01:47:37 +0530 Subject: [PATCH 5/9] refurbished change --- pygam/tests/test_GAM_methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygam/tests/test_GAM_methods.py b/pygam/tests/test_GAM_methods.py index d6715252..d559cb16 100644 --- a/pygam/tests/test_GAM_methods.py +++ b/pygam/tests/test_GAM_methods.py @@ -250,7 +250,7 @@ def test_get_params(): params = gam.get_params() assert(params['lam'] == 420) -def test_predict_terms_output(self,mcycle_X_y): +def test_predict_terms_output(mcycle_X_y): """ test to check output = 'terms' in GAM.predict() """ From f411c35f0d46dbcbd60d14b212af918b503a4b2e Mon Sep 17 00:00:00 2001 From: shyambhu Date: Thu, 16 Jul 2020 02:02:05 +0530 Subject: [PATCH 6/9] added valueError check. --- pygam/tests/test_GAM_methods.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pygam/tests/test_GAM_methods.py b/pygam/tests/test_GAM_methods.py index d559cb16..75d05b34 100644 --- a/pygam/tests/test_GAM_methods.py +++ b/pygam/tests/test_GAM_methods.py @@ -260,6 +260,10 @@ def test_predict_terms_output(mcycle_X_y): n,m = x.shape assert(len(terms) == m) assert(len(terms[0]) == n) + try: + terms = gam.predict(x,output = "shyambhu") + except: + assert(True) class TestSamplingFromPosterior(object): From aa975bdee62664e0d72e00444ba28ea2515699f9 Mon Sep 17 00:00:00 2001 From: shyambhu Date: Tue, 21 Jul 2020 15:48:22 +0530 Subject: [PATCH 7/9] added wip t distribution. --- pygam/distributions.py | 98 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/pygam/distributions.py b/pygam/distributions.py index 25eabc6f..00786488 100644 --- a/pygam/distributions.py +++ b/pygam/distributions.py @@ -657,10 +657,108 @@ def sample(self, mu): """ return np.random.wald(mean=mu, scale=self.scale, size=None) +#class tDist(Distribution): +# """ +# non standardized student's t distribution. +# X = mu + sigma* T +# where T~ standard_t distribution +# """ +# +# def __init__(self,df = None,sigma = None): +# """ +# creates an instance of the tDist class +# +# Parameters +# ---------- +# scale : float or None, default: None +# scale/standard deviation of the distribution +# +# Returns +# ------- +# self +# """ +# super(tDist, self).__init__(name='tdist', df = df) +# self.nu = df +# self.sigma = sigma +# +# def log_pdf(self, y, mu, weights=None): +# """ +# computes the log of the pdf or pmf of the values under the current distribution +# +# Parameters +# ---------- +# y : array-like of length n +# target values +# mu : array-like of length n +# expected values +# weights : array-like shape (n,) or None, default: None +# sample weights +# if None, defaults to array of ones +# +# Returns +# ------- +# pdf/pmf : np.array of length n +# """ +# if weights is None: +# weights = np.ones_like(mu) +# return sp.stats.t.logpdf(y, df = self.df, loc=mu, scale= 1.0) +# +# @divide_weights +# def V(self, mu): +# """ +# have to set the definition for non-exponential families. +# """ +# return np.ones_like(mu)*[(self.sigma**2)*self.nu/(self.nu-2)] +# +# @multiply_weights +# def deviance(self, y, mu, scaled=True): +# """ +# model deviance +# +# for a t-distribution, how do we calculate deviance?? +# +# Parameters +# ---------- +# y : array-like of length n +# target values +# mu : array-like of length n +# expected values +# scaled : boolean, default: True +# whether to divide the deviance by the distribution scaled +# +# Returns +# ------- +# deviances : np.array of length n +# """ +# dev = np.nan +# +# return dev +# +# def sample(self, mu): +# """ +# Return random samples from this non-standardized t-distribution. +# +# Samples are drawn independently from distributions +# with means given by the values in `mu` and with standard deviations +# equal to the `scale` attribute if it exists otherwise 1.0. +# +# Parameters +# ---------- +# mu : array-like of shape n_samples or shape (n_simulations, n_samples) +# expected values +# +# Returns +# ------- +# random_samples : np.array of same shape as mu +# """ +# +# return np.random.standard_t(df = np.ones_like(mu)*self.nu,size=None)*self.sigma + mu + DISTRIBUTIONS = {'normal': NormalDist, 'poisson': PoissonDist, 'binomial': BinomialDist, 'gamma': GammaDist, 'inv_gauss': InvGaussDist +# 't': tDist } From 17d64280224795147e150ffaa63dbecfbd4041b1 Mon Sep 17 00:00:00 2001 From: shyambhu Date: Tue, 21 Jul 2020 17:04:22 +0530 Subject: [PATCH 8/9] added gamma to constructor following #188 PR. --- pygam/pygam.py | 41 ++++++++++++++++++--------------- pygam/tests/test_GAM_methods.py | 10 ++++++++ 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/pygam/pygam.py b/pygam/pygam.py index d57ac86e..f4289534 100644 --- a/pygam/pygam.py +++ b/pygam/pygam.py @@ -117,7 +117,11 @@ class GAM(Core, MetaTermMixin): verbose : bool, optional whether to show pyGAM warnings. - + + gamma : float, default: 1.4 + serves as a weighting to increase the impact of the influence matrix + on the score + Attributes ---------- coef_ : array, shape (n_classes, m_features) @@ -150,12 +154,14 @@ class GAM(Core, MetaTermMixin): def __init__(self, terms='auto', max_iter=100, tol=1e-4, distribution='normal', link='identity', callbacks=['deviance', 'diffs'], - fit_intercept=True, verbose=False, **kwargs): + fit_intercept=True, gamma = 1.4, verbose=False, + **kwargs): self.max_iter = max_iter self.tol = tol self.distribution = distribution self.link = link + self.gamma = gamma self.callbacks = callbacks self.verbose = verbose self.terms = TermList(terms) if isinstance(terms, Term) else terms @@ -243,7 +249,10 @@ def _validate_params(self): raise ValueError('unsupported link {}'.format(self.link)) if self.link in LINKS: self.link = LINKS[self.link]() - + + if self.gamma < 1: + raise ValueError("gamma must be >1, "\ + "but found gamma = {}".format(self.gamma)) # callbacks if not isiterable(self.callbacks): raise ValueError('Callbacks must be iterable, but found {}'\ @@ -394,7 +403,7 @@ def _linear_predictor(self, X=None, modelmat=None, b=None, term=-1): def predict_mu(self, X): """ - preduct expected value of target given model and input X + predict expected value of target given model and input X Parameters --------- @@ -418,7 +427,7 @@ def predict_mu(self, X): def predict(self, X, output = "response"): """ - preduct expected value of target given model and input X + predict expected value of target given model and input X often this is done via expected value of GAM given input X Parameters @@ -1020,7 +1029,7 @@ def _estimate_model_statistics(self, y, modelmat, inner=None, BW=None, - edof: estimated degrees freedom - scale: distribution scale, if applicable - cov: coefficient covariances - - se: standarrd errors + - se: standard errors - AIC: Akaike Information Criterion - AICc: corrected Akaike Information Criterion - pseudo_r2: dict of Pseudo R-squared metrics @@ -1153,7 +1162,7 @@ def _estimate_r2(self, X=None, y=None, mu=None, weights=None): return r2 - def _estimate_GCV_UBRE(self, X=None, y=None, modelmat=None, gamma=1.4, + def _estimate_GCV_UBRE(self, X=None, y=None, modelmat=None, add_scale=True, weights=None): """ Generalized Cross Validation and Un-Biased Risk Estimator. @@ -1167,9 +1176,6 @@ def _estimate_GCV_UBRE(self, X=None, y=None, modelmat=None, gamma=1.4, output data vector modelmat : array-like, default: None contains the spline basis for each feature evaluated at the input - gamma : float, default: 1.4 - serves as a weighting to increase the impact of the influence matrix - on the score add_scale : boolean, default: True UBRE score can be negative because the distribution scale is subtracted. to keep things positive we can add the scale back. @@ -1191,10 +1197,7 @@ def _estimate_GCV_UBRE(self, X=None, y=None, modelmat=None, gamma=1.4, see Wood 2006 pg. 177-182, 220 for more details. """ - if gamma < 1: - raise ValueError('gamma scaling should be greater than 1, '\ - 'but found gamma = {}',format(gamma)) - + if modelmat is None: modelmat = self._modelmat(X) @@ -1214,10 +1217,10 @@ def _estimate_GCV_UBRE(self, X=None, y=None, modelmat=None, gamma=1.4, if self.distribution._known_scale: # scale is known, use UBRE scale = self.distribution.scale - UBRE = 1./n * dev - (~add_scale)*(scale) + 2.*gamma/n * edof * scale + UBRE = 1./n * dev - (~add_scale)*(scale) + 2.*self.gamma/n * edof * scale else: # scale unkown, use GCV - GCV = (n * dev) / (n - gamma * edof)**2 + GCV = (n * dev) / (n - self.gamma * edof)**2 return (GCV, UBRE) def _estimate_p_values(self): @@ -2485,7 +2488,7 @@ def score(self, X, y): def predict(self, X): """ - preduct binary targets given model and input X + predict binary targets given model and input X Parameters --------- @@ -2501,7 +2504,7 @@ def predict(self, X): def predict_proba(self, X): """ - preduct targets given model and input X + predict targets given model and input X Parameters --------- @@ -2743,7 +2746,7 @@ def fit(self, X, y, exposure=None, weights=None): def predict(self, X, exposure=None): """ - preduct expected value of target given model and input X + predict expected value of target given model and input X often this is done via expected value of GAM given input X Parameters diff --git a/pygam/tests/test_GAM_methods.py b/pygam/tests/test_GAM_methods.py index 75d05b34..fb21d0d9 100644 --- a/pygam/tests/test_GAM_methods.py +++ b/pygam/tests/test_GAM_methods.py @@ -17,6 +17,16 @@ def test_LinearGAM_prediction(mcycle_X_y, mcycle_gam): preds = mcycle_gam.predict(X) assert(preds.shape == y.shape) +#def test_gamma(mcycle_X_y): +# """ +# check that if we can fit with not default gamma parameter +# """ +# X,y = mcycle_X_y +# gam = LinearGAM(gamma = 3) +# try: +# gam = LinearGAM(gamma = -1.0) +# except ValueError: +# assert(True) def test_LogisticGAM_accuracy(default_X_y): """ check that we can compute accuracy correctly From e6df9adb73d3ef8a419d23579ff53a47a26a7044 Mon Sep 17 00:00:00 2001 From: shyambhu Date: Tue, 21 Jul 2020 17:11:51 +0530 Subject: [PATCH 9/9] added test_gamma. --- pygam/tests/test_GAM_methods.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pygam/tests/test_GAM_methods.py b/pygam/tests/test_GAM_methods.py index fb21d0d9..affd0bbc 100644 --- a/pygam/tests/test_GAM_methods.py +++ b/pygam/tests/test_GAM_methods.py @@ -17,16 +17,16 @@ def test_LinearGAM_prediction(mcycle_X_y, mcycle_gam): preds = mcycle_gam.predict(X) assert(preds.shape == y.shape) -#def test_gamma(mcycle_X_y): -# """ -# check that if we can fit with not default gamma parameter -# """ -# X,y = mcycle_X_y -# gam = LinearGAM(gamma = 3) -# try: -# gam = LinearGAM(gamma = -1.0) -# except ValueError: -# assert(True) +def test_gamma(mcycle_X_y): + """ + check that if we can fit with not default gamma parameter + """ + X,y = mcycle_X_y + gam = LinearGAM(gamma = 3) + try: + gam = LinearGAM(gamma = -1.0) + except ValueError: + assert(True) def test_LogisticGAM_accuracy(default_X_y): """ check that we can compute accuracy correctly