Skip to content

Commit

Permalink
Merge branch 'release/2.7.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
mayofaulkner committed Dec 20, 2021
2 parents 0c01ef8 + f7804ae commit 81076be
Show file tree
Hide file tree
Showing 23 changed files with 931 additions and 493 deletions.
11 changes: 11 additions & 0 deletions CITATION.cff
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
cff-version: 0.0.0
message: "If you use this software, please cite it as below."
authors:
- family-names: International Brain Laboratory
given-names: The
orcid:
title: "ibllib"
version:
doi:
date-released: 2021-12-09
url: "https://github.com/int-brain-lab/ibllib"
6 changes: 3 additions & 3 deletions brainbox/behavior/wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np
from numpy import pi
import scipy.interpolate as interpolate
from scipy.signal import convolve, gaussian
from scipy.signal import convolve, windows
from scipy.linalg import hankel
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
Expand Down Expand Up @@ -114,7 +114,7 @@ def velocity_smoothed(pos, freq, smooth_size=0.03):
std_samps = np.round(smooth_size * freq) # Standard deviation relative to sampling frequency
N = std_samps * 6 # Number of points in the Gaussian covering +/-3 standard deviations
gauss_std = (N - 1) / 6
win = gaussian(N, gauss_std)
win = windows.gaussian(N, gauss_std)
win = win / win.sum() # Normalize amplitude

# Convolve and multiply by sampling frequency to restore original units
Expand Down Expand Up @@ -274,7 +274,7 @@ def movements(t, pos, freq=1000, pos_thresh=8, t_thresh=.2, min_gap=.1, pos_thre
peak_amps = np.fromiter(peaks, dtype=float, count=onsets.size)
N = 10 # Number of points in the Gaussian
STDEV = 1.8 # Equivalent to a width factor (alpha value) of 2.5
gauss = gaussian(N, STDEV) # A 10-point Gaussian window of a given s.d.
gauss = windows.gaussian(N, STDEV) # A 10-point Gaussian window of a given s.d.
vel = convolve(np.diff(np.insert(pos, 0, 0)), gauss, mode='same')
# For each movement period, find the timestamp where the absolute velocity was greatest
peaks = (t[m + np.abs(vel[m:n]).argmax()] for m, n in zip(onset_samps, offset_samps))
Expand Down
15 changes: 10 additions & 5 deletions brainbox/io/one.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,6 @@ def _load_channel_locations_traj(eid, probe=None, one=None, revision=None, align
collections = one.list_collections(eid, filename='channels*', collection=collection,
revision=revision)
probe_collection = _get_spike_sorting_collection(collections, probe)
print(probe_collection)
chn_coords = one.load_dataset(eid, 'channels.localCoordinates', collection=probe_collection)
depths = chn_coords[:, 1]

Expand Down Expand Up @@ -388,7 +387,7 @@ def load_channel_locations(eid, probe=None, one=None, aligned=False, brain_atlas


def load_spike_sorting_fast(eid, one=None, probe=None, dataset_types=None, spike_sorter=None, revision=None,
brain_regions=None, nested=True):
brain_regions=None, nested=True, collection=None, return_collection=False):
"""
From an eid, loads spikes and clusters for all probes
The following set of dataset types are loaded:
Expand All @@ -403,12 +402,15 @@ def load_spike_sorting_fast(eid, one=None, probe=None, dataset_types=None, spike
:param probe: name of probe to load in, if not given all probes for session will be loaded
:param dataset_types: additional spikes/clusters objects to add to the standard default list
:param spike_sorter: name of the spike sorting you want to load (None for default)
:param collection: name of the spike sorting collection to load - exclusive with spike sorter name ex: "alf/probe00"
:param return_channels: (bool) defaults to False otherwise tries and load channels from disk
:param brain_regions: ibllib.atlas.regions.BrainRegions object - will label acronyms if provided
:param nested: if a single probe is required, do not output a dictionary with the probe name as key
:return: spikes, clusters (dict of bunch, 1 bunch per probe)
:param return_collection: (False) if True, will return the collection used to load
:return: spikes, clusters, channels (dict of bunch, 1 bunch per probe)
"""
collection = _collection_filter_from_args(probe, spike_sorter)
if collection is None:
collection = _collection_filter_from_args(probe, spike_sorter)
_logger.debug(f"load spike sorting with collection filter {collection}")
kwargs = dict(eid=eid, one=one, collection=collection, revision=revision, dataset_types=dataset_types,
brain_regions=brain_regions)
Expand All @@ -419,7 +421,10 @@ def load_spike_sorting_fast(eid, one=None, probe=None, dataset_types=None, spike
channels = channels[k]
clusters = clusters[k]
spikes = spikes[k]
return spikes, clusters, channels
if return_collection:
return spikes, clusters, channels, collection
else:
return spikes, clusters, channels


def load_spike_sorting(eid, one=None, probe=None, dataset_types=None, spike_sorter=None, revision=None,
Expand Down
83 changes: 64 additions & 19 deletions brainbox/modeling/design_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class DesignMatrix:
and allow the generation of a design matrix with specified regressors
"""

def __init__(self, trialsdf, vartypes=None, binwidth=0.02):
def __init__(self, trialsdf, vartypes, binwidth=0.02):
"""
Class for generating design matrices to model neural data. Provides handy routines for
describing neural spiking activity using basis functions and other primitives.
Expand All @@ -31,7 +31,7 @@ def __init__(self, trialsdf, vartypes=None, binwidth=0.02):
Obligatory columns for the dataframe are "trial_start" and "trial_end", which tell the
constructor which time points to associate with that trial.
vartypes : dict, optional
vartypes : dict
Dictionary of types for each of the columns in trialsdf. Columns must be of the types:
-- timing: timing events, in which the column values are times since the start of the
session of an event within that trial, e.g. stimulus onset.
Expand All @@ -41,46 +41,44 @@ def __init__(self, trialsdf, vartypes=None, binwidth=0.02):
changes within the trial. e.g. pupil diameter.
Dictionary keys should be columns in trialsdf, values should be strings that are equal
to one of the above.
If vartypes is not passed, the constructor will assume you know what you are doing. Be
warned that this can result in the class failing in spectacular and vindictive ways.
by default None
binwidth : float, optional
Length of time bins which will be used for design matrix, by default 0.02
"""
# Data checks #
if vartypes is not None:
validtypes = ('timing', 'continuous', 'value')
if not all([name in vartypes for name in trialsdf.columns]):
raise KeyError("Some columns were not described in vartypes")
if not all([value in validtypes for value in vartypes.values()]):
raise ValueError("Invalid values were passed in vartypes")
validtypes = ('timing', 'continuous', 'value')
if not all([name in vartypes for name in trialsdf.columns]):
raise KeyError("Some columns were not described in vartypes")
if not all([value in validtypes for value in vartypes.values()]):
raise ValueError("Invalid values were passed in vartypes")

# Filter out cells which don't meet the criteria for minimum spiking, while doing trial
# assignment
self.vartypes = vartypes
if vartypes is not None:
self.vartypes['duration'] = 'value'
vartypes['duration'] = 'value'
base_df = trialsdf.copy()
trialsdf = trialsdf.copy() # Make sure we don't modify the original dataframe
trbounds = trialsdf[['trial_start', 'trial_end']] # Get the start/end of trials
# Empty trial duration value to use later
trialsdf['duration'] = np.nan
# Figure out which columns are timing variables if vartypes was passed
timingvars = [col for col in trialsdf.columns if vartypes[col] == 'timing']

for i, (start, end) in trbounds.iterrows():
if any(np.isnan((start, end))):
warn(f"NaN values found in trial start or end at trial number {i}. "
"Discarding trial.")
trialsdf.drop(i, inplace=True)
continue
for col in timingvars:
# Round values for the timing variables to the 5th decimal place and subtract
# trial start time.
trialsdf.at[i, col] = np.round(trialsdf.at[i, col] - start, decimals=5)
trialsdf.at[i, 'duration'] = end - start

# Set model parameters to begin with
self.binwidth = binwidth
self.covar = {}
self.trialsdf = trialsdf
self.vartypes = vartypes
self.base_df = base_df
self.compiled = False
return
Expand Down Expand Up @@ -155,7 +153,7 @@ def add_covariate_timing(self, covlabel, eventname, bases,
else:
raise TypeError('deltaval must be None, pandas series, or string reference'
f' to trialsdf column. {type(deltaval)} was passed instead.')
if eventname in self.vartypes and self.vartypes[eventname] != 'timing':
if self.vartypes[eventname] != 'timing':
raise TypeError(f'Column {eventname} in trialsdf is not registered as a timing')

vecsizes = self.trialsdf['duration'].apply(self.binf)
Expand All @@ -174,6 +172,33 @@ def add_covariate_timing(self, covlabel, eventname, bases,

def add_covariate_boxcar(self, covlabel, boxstart, boxend,
cond=None, height=None, desc=''):
"""
Convenience wrapped on add_covariate to add a boxcar covariate on the given start and end
variables, such that the covariate is a step function with non-zero value between those
values.
Note: This has not been tested yet and is not guaranteed to work, or work correctly.
Parameters
----------
covlabel : str
Name of the covariate for accessing later. Can be accessed via dot syntax of the
instance usually.
boxstart : str
Column name in trialsdf which will be used to define the start of the boxcar
boxend : str
Column name in trialsdf which defines the end of boxcar variable
cond : None, list, or func, optional
Condition in which to apply this covariate. Can either be a list of trial indices, or
a function which takes in a row of the trialsdf and returns a boolen on inclusion,
by default None
height : None, str, or pandas series, optional
Values for the height of the boxcar during the period defined per trial. Can be a
reference to a column in trialsdf or a separate series, by default None
desc : str, optional
Additional information about the covariate to store as a string, by default ''
"""
if covlabel in self.covar:
raise AttributeError(f'Covariate {covlabel} already exists in model.')
self._compile_check()
Expand Down Expand Up @@ -210,6 +235,27 @@ def add_covariate_boxcar(self, covlabel, boxstart, boxend,

def add_covariate_raw(self, covlabel, raw,
cond=None, desc=''):
"""
Convenience wrapper to add a 'raw' covariate, that is to say a covariate which is a
continuous value that changes with time during the course of a trial.
Note: This has not been tested and is not guaranteed to work or to work correctly.
Parameters
----------
covlabel : str
String used to reference covariate, can usually be accessed by instance's dot syntax
raw : str, func, or pandas series
The covariate to add to the design matrix. Can be a str reference to a column in
trialsdf, a function which takes in rows of trialsdf and produces a vector for each
row of the appropriate size given binwidth and trial duration, or a pandas series
of vectors of said appropriate type.
cond : None, list, or func, optional
Trials in which to apply the given covariate. Can be a list of trial numbers,
or a function which accepts rows of the trialsdf and returns a boolean, by default None
desc : str, optional
Additional information about the covariate for access later, by default ''
"""
stimlens = self.trialsdf.duration.apply(self.binf)
if isinstance(raw, str):
if raw not in self.trialsdf.columns:
Expand Down Expand Up @@ -354,7 +400,6 @@ def compile_design_matrix(self, dense=True):
assert self.binnedspikes.shape[0] == dm.shape[0], "Oh shit. Indexing error."
self.dm = dm
self.trlabels = trlabels
# self.dm = np.roll(dm, -1, axis=0) # Fix weird +1 offset bug in design matrix
self.compiled = True
return

Expand Down Expand Up @@ -384,7 +429,7 @@ def denseconv(X, bases):
A = np.zeros((T + TB - 1, int(np.sum(indices[kCov, :]))))
for i, j in enumerate(np.argwhere(indices[kCov, :]).flat):
A[:, i] = np.convolve(X[:, kCov], bases[:, j])
BX[:, k: sI[kCov]] = A[: T, :]
BX[:, k: sI[kCov]] = A[:T, :]
k = sI[kCov]
return BX

Expand All @@ -400,5 +445,5 @@ def convbasis(stim, bases, offset=0):
if offset < 0:
X = X[-offset:, :]
elif offset > 0:
X = X[: -(1 + offset), :]
X = X[:-offset, :]
return X
29 changes: 4 additions & 25 deletions brainbox/modeling/linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
class LinearGLM(NeuralModel):
def __init__(self, design_matrix, spk_times, spk_clu,
binwidth=0.02, metric='rsq', estimator=None,
train=0.8, blocktrain=False, mintrials=100):
mintrials=100):
"""
Fit a linear model using a DesignMatrix object and spike data. Can use ridge regression
or pure linear regression
Expand Down Expand Up @@ -48,13 +48,15 @@ def __init__(self, design_matrix, spk_times, spk_clu,
fitting, by default 100
"""
super().__init__(design_matrix, spk_times, spk_clu,
binwidth, train, blocktrain, mintrials)
binwidth, mintrials)
if estimator is None:
estimator = LinearRegression()
if not isinstance(estimator, BaseEstimator):
raise ValueError('Estimator must be a scikit-learn estimator, e.g. LinearRegression')
self.metric = metric
self.estimator = estimator
self.link = lambda x: x
self.invlink = self.link

def _fit(self, dm, binned, cells=None):
"""
Expand Down Expand Up @@ -94,26 +96,3 @@ def _fit(self, dm, binned, cells=None):
coefs.at[cell] = weight[cell_idx, :]
intercepts.at[cell] = intercept[cell_idx]
return coefs, intercepts

def score(self):
"""
Score model using chosen metric
Returns
-------
pandas.Series
Score using chosen metric (defined at instantiation) for each unit fit by the model.
"""
if not hasattr(self, 'coefs'):
raise AttributeError('Model has not been fit yet.')
testmask = np.isin(self.design.trlabels, self.testinds).flatten()
dm, binned = self.design[testmask, :], self.binnedspikes[testmask]

scores = pd.Series(index=self.coefs.index, name='scores')
for cell in self.coefs.index:
cell_idx = np.argwhere(self.clu_ids == cell)[0, 0]
wt = self.coefs.loc[cell].reshape(-1, 1)
bias = self.intercepts.loc[cell]
y = binned[:, cell_idx]
scores.at[cell] = self._scorer(wt, bias, dm, y)
return scores
Loading

0 comments on commit 81076be

Please sign in to comment.