dswah · shyamcody · Jul 8, 2020 · Jul 9, 2020 · Jul 15, 2020 · Jul 15, 2020
diff --git a/pygam/distributions.py b/pygam/distributions.py
@@ -657,10 +657,108 @@ def sample(self, mu):
         """
         return np.random.wald(mean=mu, scale=self.scale, size=None)
 
+#class tDist(Distribution):
+#    """
+#    non standardized student's t distribution.
+#    X = mu + sigma* T
+#    where T~ standard_t distribution
+#    """
+#    
+#    def __init__(self,df = None,sigma = None):
+#        """
+#        creates an instance of the tDist class
+#
+#        Parameters
+#        ----------
+#        scale : float or None, default: None
+#            scale/standard deviation of the distribution
+#
+#        Returns
+#        -------
+#        self
+#        """
+#        super(tDist, self).__init__(name='tdist', df = df)
+#        self.nu = df
+#        self.sigma = sigma
+#
+#    def log_pdf(self, y, mu, weights=None):
+#        """
+#        computes the log of the pdf or pmf of the values under the current distribution
+#
+#        Parameters
+#        ----------
+#        y : array-like of length n
+#            target values
+#        mu : array-like of length n
+#            expected values
+#        weights : array-like shape (n,) or None, default: None
+#            sample weights
+#            if None, defaults to array of ones
+#
+#        Returns
+#        -------
+#        pdf/pmf : np.array of length n
+#        """
+#        if weights is None:
+#            weights = np.ones_like(mu)
+#        return sp.stats.t.logpdf(y, df = self.df, loc=mu, scale= 1.0)
+#
+#    @divide_weights
+#    def V(self, mu):
+#        """
+#        have to set the definition for non-exponential families.
+#        """
+#        return np.ones_like(mu)*[(self.sigma**2)*self.nu/(self.nu-2)]
+#
+#    @multiply_weights
+#    def deviance(self, y, mu, scaled=True):
+#        """
+#        model deviance
+#
+#        for a t-distribution, how do we calculate deviance??
+#
+#        Parameters
+#        ----------
+#        y : array-like of length n
+#            target values
+#        mu : array-like of length n
+#            expected values
+#        scaled : boolean, default: True
+#            whether to divide the deviance by the distribution scaled
+#
+#        Returns
+#        -------
+#        deviances : np.array of length n
+#        """
+#        dev = np.nan
+#        
+#        return dev
+#
+#    def sample(self, mu):
+#        """
+#        Return random samples from this non-standardized t-distribution.
+#
+#        Samples are drawn independently from  distributions
+#        with means given by the values in `mu` and with standard deviations
+#        equal to the `scale` attribute if it exists otherwise 1.0.
+#
+#        Parameters
+#        ----------
+#        mu : array-like of shape n_samples or shape (n_simulations, n_samples)
+#            expected values
+#
+#        Returns
+#        -------
+#        random_samples : np.array of same shape as mu
+#        """
+#        
+#        return np.random.standard_t(df = np.ones_like(mu)*self.nu,size=None)*self.sigma + mu
+
 
 DISTRIBUTIONS = {'normal': NormalDist,
                  'poisson': PoissonDist,
                  'binomial': BinomialDist,
                  'gamma': GammaDist,
                  'inv_gauss': InvGaussDist
+#                 't': tDist
                  }
diff --git a/pygam/pygam.py b/pygam/pygam.py
@@ -117,7 +117,11 @@ class GAM(Core, MetaTermMixin):
 
     verbose : bool, optional
         whether to show pyGAM warnings.
-
+
+    gamma : float, default: 1.4
+    serves as a weighting to increase the impact of the influence matrix
+    on the score
+
     Attributes
     ----------
     coef_ : array, shape (n_classes, m_features)
@@ -150,12 +154,14 @@ class GAM(Core, MetaTermMixin):
     def __init__(self, terms='auto', max_iter=100, tol=1e-4,
                  distribution='normal', link='identity',
                  callbacks=['deviance', 'diffs'],
-                 fit_intercept=True, verbose=False, **kwargs):
+                 fit_intercept=True, gamma = 1.4, verbose=False,
+                 **kwargs):
 
         self.max_iter = max_iter
         self.tol = tol
         self.distribution = distribution
         self.link = link
+        self.gamma = gamma 
         self.callbacks = callbacks
         self.verbose = verbose
         self.terms = TermList(terms) if isinstance(terms, Term) else terms
@@ -243,7 +249,10 @@ def _validate_params(self):
             raise ValueError('unsupported link {}'.format(self.link))
         if self.link in LINKS:
             self.link = LINKS[self.link]()
-
+
+        if self.gamma < 1:
+            raise ValueError("gamma must be >1, "\
+                             "but found gamma = {}".format(self.gamma))
         # callbacks
         if not isiterable(self.callbacks):
             raise ValueError('Callbacks must be iterable, but found {}'\
@@ -394,7 +403,7 @@ def _linear_predictor(self, X=None, modelmat=None, b=None, term=-1):
 
     def predict_mu(self, X):
         """
-        preduct expected value of target given model and input X
+        predict expected value of target given model and input X
 
         Parameters
         ---------
@@ -416,21 +425,37 @@ def predict_mu(self, X):
         lp = self._linear_predictor(X)
         return self.link.mu(lp, self.distribution)
 
-    def predict(self, X):
+    def predict(self, X, output = "response"):
         """
-        preduct expected value of target given model and input X
+        predict expected value of target given model and input X
         often this is done via expected value of GAM given input X
 
         Parameters
         ---------
         X : array-like of shape (n_samples, m_features)
             containing the input dataset
+
+        output : str in {'response','terms'}
+                response to provide prediction values.
+                terms to provide the terms values in linear portion.
 
         Returns
         -------
+
         y : np.array of shape (n_samples,)
             containing predicted values under the model
-        """
+            if output is set to response.
+
+            or 
+            list of Numpy arrays of shape (n_samples,)
+            containing partial contributions of each term in each numpy array.
+            if output is set to terms.
+        """
+        if output not in ['response','terms']:
+            raise ValueError('output not equal to response or terms.')
+        if output == 'terms':
+            term_values = [self.partial_dependence(term = i,X = X) for i,term in enumerate(self.terms) if not term.isintercept]
+            return term_values
         return self.predict_mu(X)
 
     def _modelmat(self, X, term=-1):
@@ -1004,7 +1029,7 @@ def _estimate_model_statistics(self, y, modelmat, inner=None, BW=None,
         - edof: estimated degrees freedom
         - scale: distribution scale, if applicable
         - cov: coefficient covariances
-        - se: standarrd errors
+        - se: standard errors
         - AIC: Akaike Information Criterion
         - AICc: corrected Akaike Information Criterion
         - pseudo_r2: dict of Pseudo R-squared metrics
@@ -1137,7 +1162,7 @@ def _estimate_r2(self, X=None, y=None, mu=None, weights=None):
 
         return r2
 
-    def _estimate_GCV_UBRE(self, X=None, y=None, modelmat=None, gamma=1.4,
+    def _estimate_GCV_UBRE(self, X=None, y=None, modelmat=None,
                            add_scale=True, weights=None):
         """
         Generalized Cross Validation and Un-Biased Risk Estimator.
@@ -1151,9 +1176,6 @@ def _estimate_GCV_UBRE(self, X=None, y=None, modelmat=None, gamma=1.4,
             output data vector
         modelmat : array-like, default: None
             contains the spline basis for each feature evaluated at the input
-        gamma : float, default: 1.4
-            serves as a weighting to increase the impact of the influence matrix
-            on the score
         add_scale : boolean, default: True
             UBRE score can be negative because the distribution scale
             is subtracted. to keep things positive we can add the scale back.
@@ -1175,10 +1197,7 @@ def _estimate_GCV_UBRE(self, X=None, y=None, modelmat=None, gamma=1.4,
 
         see Wood 2006 pg. 177-182, 220 for more details.
         """
-        if gamma < 1:
-            raise ValueError('gamma scaling should be greater than 1, '\
-                             'but found gamma = {}',format(gamma))
-
+
         if modelmat is None:
             modelmat = self._modelmat(X)
 
@@ -1198,10 +1217,10 @@ def _estimate_GCV_UBRE(self, X=None, y=None, modelmat=None, gamma=1.4,
         if self.distribution._known_scale:
             # scale is known, use UBRE
             scale = self.distribution.scale
-            UBRE = 1./n *  dev - (~add_scale)*(scale) + 2.*gamma/n * edof * scale
+            UBRE = 1./n *  dev - (~add_scale)*(scale) + 2.*self.gamma/n * edof * scale
         else:
             # scale unkown, use GCV
-            GCV = (n * dev) / (n - gamma * edof)**2
+            GCV = (n * dev) / (n - self.gamma * edof)**2
         return (GCV, UBRE)
 
     def _estimate_p_values(self):
@@ -1245,7 +1264,7 @@ def _compute_p_value(self, term_i):
         based on equations from Wood 2006 section 4.8.5 page 191
         and errata https://people.maths.bris.ac.uk/~sw15190/igam/iGAMerrata-12.pdf
 
-        the errata shows a correction for the f-statisitc.
+        the errata shows a correction for the f-statistics.
         """
         if not self._is_fitted:
             raise AttributeError('GAM has not been fitted. Call fit first.')
@@ -2469,7 +2488,7 @@ def score(self, X, y):
 
     def predict(self, X):
         """
-        preduct binary targets given model and input X
+        predict binary targets given model and input X
 
         Parameters
         ---------
@@ -2485,7 +2504,7 @@ def predict(self, X):
 
     def predict_proba(self, X):
         """
-        preduct targets given model and input X
+        predict targets given model and input X
 
         Parameters
         ---------
@@ -2727,7 +2746,7 @@ def fit(self, X, y, exposure=None, weights=None):
 
     def predict(self, X, exposure=None):
         """
-        preduct expected value of target given model and input X
+        predict expected value of target given model and input X
         often this is done via expected value of GAM given input X
 
         Parameters

diff --git a/pygam/tests/test_GAM_methods.py b/pygam/tests/test_GAM_methods.py
@@ -17,6 +17,16 @@ def test_LinearGAM_prediction(mcycle_X_y, mcycle_gam):
     preds = mcycle_gam.predict(X)
     assert(preds.shape == y.shape)
 
+def test_gamma(mcycle_X_y):
+    """
+    check that if we can fit with not default gamma parameter
+    """
+    X,y = mcycle_X_y
+    gam = LinearGAM(gamma = 3)
+    try:
+        gam = LinearGAM(gamma = -1.0)
+    except ValueError:
+        assert(True)
 def test_LogisticGAM_accuracy(default_X_y):
     """
     check that we can compute accuracy correctly
@@ -250,7 +260,21 @@ def test_get_params():
     params = gam.get_params()
     assert(params['lam'] == 420)
 
-
+def test_predict_terms_output(mcycle_X_y):
+    """
+    test to check output = 'terms' in GAM.predict()
+    """
+    x,y = mcycle_X_y
+    gam = LinearGAM().fit(x,y)
+    terms = gam.predict(x,output = "terms")
+    n,m = x.shape
+    assert(len(terms) == m)
+    assert(len(terms[0]) == n)
+    try:
+        terms = gam.predict(x,output = "shyambhu")
+    except:
+        assert(True)
+
 class TestSamplingFromPosterior(object):
 
     def test_drawing_samples_from_unfitted_model(self, mcycle_X_y, mcycle_gam):