Merge branch 'release/2.7.0'

int-brain-lab · Dec 20, 2021 · 81076be · 81076be
2 parents 0c01ef8 + f7804ae
commit 81076be
Show file tree

Hide file tree

Showing 23 changed files with 931 additions and 493 deletions.
diff --git a/CITATION.cff b/CITATION.cff
@@ -0,0 +1,11 @@
+cff-version: 0.0.0
+message: "If you use this software, please cite it as below."
+authors:
+  - family-names: International Brain Laboratory
+    given-names: The
+    orcid: 
+title: "ibllib"
+version: 
+doi: 
+date-released: 2021-12-09
+url: "https://github.com/int-brain-lab/ibllib"
diff --git a/brainbox/behavior/wheel.py b/brainbox/behavior/wheel.py
@@ -4,7 +4,7 @@
 import numpy as np
 from numpy import pi
 import scipy.interpolate as interpolate
-from scipy.signal import convolve, gaussian
+from scipy.signal import convolve, windows
 from scipy.linalg import hankel
 import matplotlib.pyplot as plt
 from matplotlib.collections import LineCollection
@@ -114,7 +114,7 @@ def velocity_smoothed(pos, freq, smooth_size=0.03):
     std_samps = np.round(smooth_size * freq)  # Standard deviation relative to sampling frequency
     N = std_samps * 6  # Number of points in the Gaussian covering +/-3 standard deviations
     gauss_std = (N - 1) / 6
-    win = gaussian(N, gauss_std)
+    win = windows.gaussian(N, gauss_std)
     win = win / win.sum()  # Normalize amplitude
 
     # Convolve and multiply by sampling frequency to restore original units
@@ -274,7 +274,7 @@ def movements(t, pos, freq=1000, pos_thresh=8, t_thresh=.2, min_gap=.1, pos_thre
     peak_amps = np.fromiter(peaks, dtype=float, count=onsets.size)
     N = 10  # Number of points in the Gaussian
     STDEV = 1.8  # Equivalent to a width factor (alpha value) of 2.5
-    gauss = gaussian(N, STDEV)  # A 10-point Gaussian window of a given s.d.
+    gauss = windows.gaussian(N, STDEV)  # A 10-point Gaussian window of a given s.d.
     vel = convolve(np.diff(np.insert(pos, 0, 0)), gauss, mode='same')
     # For each movement period, find the timestamp where the absolute velocity was greatest
     peaks = (t[m + np.abs(vel[m:n]).argmax()] for m, n in zip(onset_samps, offset_samps))

diff --git a/brainbox/io/one.py b/brainbox/io/one.py
@@ -276,7 +276,6 @@ def _load_channel_locations_traj(eid, probe=None, one=None, revision=None, align
     collections = one.list_collections(eid, filename='channels*', collection=collection,
                                        revision=revision)
     probe_collection = _get_spike_sorting_collection(collections, probe)
-    print(probe_collection)
     chn_coords = one.load_dataset(eid, 'channels.localCoordinates', collection=probe_collection)
     depths = chn_coords[:, 1]
 
@@ -388,7 +387,7 @@ def load_channel_locations(eid, probe=None, one=None, aligned=False, brain_atlas
 
 
 def load_spike_sorting_fast(eid, one=None, probe=None, dataset_types=None, spike_sorter=None, revision=None,
-                            brain_regions=None, nested=True):
+                            brain_regions=None, nested=True, collection=None, return_collection=False):
     """
     From an eid, loads spikes and clusters for all probes
     The following set of dataset types are loaded:
@@ -403,12 +402,15 @@ def load_spike_sorting_fast(eid, one=None, probe=None, dataset_types=None, spike
     :param probe: name of probe to load in, if not given all probes for session will be loaded
     :param dataset_types: additional spikes/clusters objects to add to the standard default list
     :param spike_sorter: name of the spike sorting you want to load (None for default)
+    :param collection: name of the spike sorting collection to load - exclusive with spike sorter name ex: "alf/probe00"
     :param return_channels: (bool) defaults to False otherwise tries and load channels from disk
     :param brain_regions: ibllib.atlas.regions.BrainRegions object - will label acronyms if provided
     :param nested: if a single probe is required, do not output a dictionary with the probe name as key
-    :return: spikes, clusters (dict of bunch, 1 bunch per probe)
+    :param return_collection: (False) if True, will return the collection used to load
+    :return: spikes, clusters, channels (dict of bunch, 1 bunch per probe)
     """
-    collection = _collection_filter_from_args(probe, spike_sorter)
+    if collection is None:
+        collection = _collection_filter_from_args(probe, spike_sorter)
     _logger.debug(f"load spike sorting with collection filter {collection}")
     kwargs = dict(eid=eid, one=one, collection=collection, revision=revision, dataset_types=dataset_types,
                   brain_regions=brain_regions)
@@ -419,7 +421,10 @@ def load_spike_sorting_fast(eid, one=None, probe=None, dataset_types=None, spike
         channels = channels[k]
         clusters = clusters[k]
         spikes = spikes[k]
-    return spikes, clusters, channels
+    if return_collection:
+        return spikes, clusters, channels, collection
+    else:
+        return spikes, clusters, channels
 
 
 def load_spike_sorting(eid, one=None, probe=None, dataset_types=None, spike_sorter=None, revision=None,

diff --git a/brainbox/modeling/design_matrix.py b/brainbox/modeling/design_matrix.py
@@ -11,7 +11,7 @@ class DesignMatrix:
     and allow the generation of a design matrix with specified regressors
     """
 
-    def __init__(self, trialsdf, vartypes=None, binwidth=0.02):
+    def __init__(self, trialsdf, vartypes, binwidth=0.02):
         """
         Class for generating design matrices to model neural data. Provides handy routines for
         describing neural spiking activity using basis functions and other primitives.
@@ -31,7 +31,7 @@ def __init__(self, trialsdf, vartypes=None, binwidth=0.02):
 
             Obligatory columns for the dataframe are "trial_start" and "trial_end", which tell the
             constructor which time points to associate with that trial.
-        vartypes : dict, optional
+        vartypes : dict
             Dictionary of types for each of the columns in trialsdf. Columns must be of the types:
             -- timing: timing events, in which the column values are times since the start of the
                 session of an event within that trial, e.g. stimulus onset.
@@ -41,46 +41,44 @@ def __init__(self, trialsdf, vartypes=None, binwidth=0.02):
                 changes within the trial. e.g. pupil diameter.
             Dictionary keys should be columns in trialsdf, values should be strings that are equal
             to one of the above.
-
-            If vartypes is not passed, the constructor will assume you know what you are doing. Be
-            warned that this can result in the class failing in spectacular and vindictive ways.
-            by default None
         binwidth : float, optional
             Length of time bins which will be used for design matrix, by default 0.02
         """
         # Data checks #
-        if vartypes is not None:
-            validtypes = ('timing', 'continuous', 'value')
-            if not all([name in vartypes for name in trialsdf.columns]):
-                raise KeyError("Some columns were not described in vartypes")
-            if not all([value in validtypes for value in vartypes.values()]):
-                raise ValueError("Invalid values were passed in vartypes")
+        validtypes = ('timing', 'continuous', 'value')
+        if not all([name in vartypes for name in trialsdf.columns]):
+            raise KeyError("Some columns were not described in vartypes")
+        if not all([value in validtypes for value in vartypes.values()]):
+            raise ValueError("Invalid values were passed in vartypes")
 
         # Filter out cells which don't meet the criteria for minimum spiking, while doing trial
         # assignment
-        self.vartypes = vartypes
-        if vartypes is not None:
-            self.vartypes['duration'] = 'value'
+        vartypes['duration'] = 'value'
         base_df = trialsdf.copy()
         trialsdf = trialsdf.copy()  # Make sure we don't modify the original dataframe
         trbounds = trialsdf[['trial_start', 'trial_end']]  # Get the start/end of trials
         # Empty trial duration value to use later
         trialsdf['duration'] = np.nan
+        # Figure out which columns are timing variables if vartypes was passed
         timingvars = [col for col in trialsdf.columns if vartypes[col] == 'timing']
+
         for i, (start, end) in trbounds.iterrows():
             if any(np.isnan((start, end))):
                 warn(f"NaN values found in trial start or end at trial number {i}. "
                      "Discarding trial.")
                 trialsdf.drop(i, inplace=True)
                 continue
             for col in timingvars:
+                # Round values for the timing variables to the 5th decimal place and subtract
+                # trial start time.
                 trialsdf.at[i, col] = np.round(trialsdf.at[i, col] - start, decimals=5)
             trialsdf.at[i, 'duration'] = end - start
 
         # Set model parameters to begin with
         self.binwidth = binwidth
         self.covar = {}
         self.trialsdf = trialsdf
+        self.vartypes = vartypes
         self.base_df = base_df
         self.compiled = False
         return
@@ -155,7 +153,7 @@ def add_covariate_timing(self, covlabel, eventname, bases,
         else:
             raise TypeError('deltaval must be None, pandas series, or string reference'
                             f' to trialsdf column. {type(deltaval)} was passed instead.')
-        if eventname in self.vartypes and self.vartypes[eventname] != 'timing':
+        if self.vartypes[eventname] != 'timing':
             raise TypeError(f'Column {eventname} in trialsdf is not registered as a timing')
 
         vecsizes = self.trialsdf['duration'].apply(self.binf)
@@ -174,6 +172,33 @@ def add_covariate_timing(self, covlabel, eventname, bases,
 
     def add_covariate_boxcar(self, covlabel, boxstart, boxend,
                              cond=None, height=None, desc=''):
+        """
+        Convenience wrapped on add_covariate to add a boxcar covariate on the given start and end
+        variables, such that the covariate is a step function with non-zero value between those
+        values.
+
+        Note: This has not been tested yet and is not guaranteed to work, or work correctly.
+
+        Parameters
+        ----------
+        covlabel : str
+            Name of the covariate for accessing later. Can be accessed via dot syntax of the
+            instance usually.
+        boxstart : str
+            Column name in trialsdf which will be used to define the start of the boxcar
+        boxend : str
+            Column name in trialsdf which defines the end of boxcar variable
+        cond : None, list, or func, optional
+            Condition in which to apply this covariate. Can either be a list of trial indices, or
+            a function which takes in a row of the trialsdf and returns a boolen on inclusion,
+            by default None
+        height : None, str, or pandas series, optional
+            Values for the height of the boxcar during the period defined per trial. Can be a
+            reference to a column in trialsdf or a separate series, by default None
+        desc : str, optional
+            Additional information about the covariate to store as a string, by default ''
+
+        """
         if covlabel in self.covar:
             raise AttributeError(f'Covariate {covlabel} already exists in model.')
         self._compile_check()
@@ -210,6 +235,27 @@ def add_covariate_boxcar(self, covlabel, boxstart, boxend,
 
     def add_covariate_raw(self, covlabel, raw,
                           cond=None, desc=''):
+        """
+        Convenience wrapper to add a 'raw' covariate, that is to say a covariate which is a
+        continuous value that changes with time during the course of a trial.
+
+        Note: This has not been tested and is not guaranteed to work or to work correctly.
+
+        Parameters
+        ----------
+        covlabel : str
+            String used to reference covariate, can usually be accessed by instance's dot syntax
+        raw : str, func, or pandas series
+            The covariate to add to the design matrix. Can be a str reference to a column in
+            trialsdf, a function which takes in rows of trialsdf and produces a vector for each
+            row of the appropriate size given binwidth and trial duration, or a pandas series
+            of vectors of said appropriate type.
+        cond : None, list, or func, optional
+            Trials in which to apply the given covariate. Can be a list of trial numbers,
+            or a function which accepts rows of the trialsdf and returns a boolean, by default None
+        desc : str, optional
+            Additional information about the covariate for access later, by default ''
+        """
         stimlens = self.trialsdf.duration.apply(self.binf)
         if isinstance(raw, str):
             if raw not in self.trialsdf.columns:
@@ -354,7 +400,6 @@ def compile_design_matrix(self, dense=True):
             assert self.binnedspikes.shape[0] == dm.shape[0], "Oh shit. Indexing error."
         self.dm = dm
         self.trlabels = trlabels
-        # self.dm = np.roll(dm, -1, axis=0)  # Fix weird +1 offset bug in design matrix
         self.compiled = True
         return
 
@@ -384,7 +429,7 @@ def denseconv(X, bases):
         A = np.zeros((T + TB - 1, int(np.sum(indices[kCov, :]))))
         for i, j in enumerate(np.argwhere(indices[kCov, :]).flat):
             A[:, i] = np.convolve(X[:, kCov], bases[:, j])
-        BX[:, k: sI[kCov]] = A[: T, :]
+        BX[:, k: sI[kCov]] = A[:T, :]
         k = sI[kCov]
     return BX
 
@@ -400,5 +445,5 @@ def convbasis(stim, bases, offset=0):
     if offset < 0:
         X = X[-offset:, :]
     elif offset > 0:
-        X = X[: -(1 + offset), :]
+        X = X[:-offset, :]
     return X
diff --git a/brainbox/modeling/linear.py b/brainbox/modeling/linear.py
@@ -17,7 +17,7 @@
 class LinearGLM(NeuralModel):
     def __init__(self, design_matrix, spk_times, spk_clu,
                  binwidth=0.02, metric='rsq', estimator=None,
-                 train=0.8, blocktrain=False, mintrials=100):
+                 mintrials=100):
         """
         Fit a linear model using a DesignMatrix object and spike data. Can use ridge regression
         or pure linear regression
@@ -48,13 +48,15 @@ def __init__(self, design_matrix, spk_times, spk_clu,
             fitting, by default 100
         """
         super().__init__(design_matrix, spk_times, spk_clu,
-                         binwidth, train, blocktrain, mintrials)
+                         binwidth, mintrials)
         if estimator is None:
             estimator = LinearRegression()
         if not isinstance(estimator, BaseEstimator):
             raise ValueError('Estimator must be a scikit-learn estimator, e.g. LinearRegression')
         self.metric = metric
         self.estimator = estimator
+        self.link = lambda x: x
+        self.invlink = self.link
 
     def _fit(self, dm, binned, cells=None):
         """
@@ -94,26 +96,3 @@ def _fit(self, dm, binned, cells=None):
             coefs.at[cell] = weight[cell_idx, :]
             intercepts.at[cell] = intercept[cell_idx]
         return coefs, intercepts
-
-    def score(self):
-        """
-        Score model using chosen metric
-
-        Returns
-        -------
-        pandas.Series
-            Score using chosen metric (defined at instantiation) for each unit fit by the model.
-        """
-        if not hasattr(self, 'coefs'):
-            raise AttributeError('Model has not been fit yet.')
-        testmask = np.isin(self.design.trlabels, self.testinds).flatten()
-        dm, binned = self.design[testmask, :], self.binnedspikes[testmask]
-
-        scores = pd.Series(index=self.coefs.index, name='scores')
-        for cell in self.coefs.index:
-            cell_idx = np.argwhere(self.clu_ids == cell)[0, 0]
-            wt = self.coefs.loc[cell].reshape(-1, 1)
-            bias = self.intercepts.loc[cell]
-            y = binned[:, cell_idx]
-            scores.at[cell] = self._scorer(wt, bias, dm, y)
-        return scores