-
Notifications
You must be signed in to change notification settings - Fork 183
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
New datasets and parameter confirmation #32
Changes from 7 commits
555ba57
cba8318
252a03e
f4339bf
1e2d8e3
63722f5
f08009a
1a3aab6
650416b
a9c1205
a0070bd
fa0222f
d03ab09
ed0972c
1be88be
9142d54
ed7e112
741fbaa
a4c9c36
737e744
05347fe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
''' | ||
Simple and compound motor imagery | ||
https://doi.org/10.1371/journal.pone.0114853 | ||
''' | ||
|
||
from .base import BaseDataset | ||
import zipfile as z | ||
from scipy.io import loadmat | ||
from mne.datasets.utils import _get_path, _do_path_update | ||
from mne.utils import _fetch_file | ||
import mne | ||
import numpy as np | ||
import os | ||
import shutil | ||
|
||
import logging | ||
log = logging.getLogger() | ||
|
||
FILES = [] | ||
FILES.append('https://dataverse.harvard.edu/api/access/datafile/2499178') | ||
FILES.append('https://dataverse.harvard.edu/api/access/datafile/2499182') | ||
FILES.append('https://dataverse.harvard.edu/api/access/datafile/2499179') | ||
|
||
|
||
def eeg_data_path(base_path, subject): | ||
file1_subj = ['cl', 'cyy', 'kyf', 'lnn'] | ||
file2_subj = ['ls', 'ry', 'wcf'] | ||
file3_subj = ['wx', 'yyx', 'zd'] | ||
|
||
def get_subjects(sub_inds, sub_names, ind): | ||
dataname = 'data{}'.format(ind) | ||
if not os.path.isfile(os.path.join(base_path, dataname+'.zip')): | ||
_fetch_file(FILES[ind], os.path.join( | ||
base_path, dataname + '.zip'), print_destination=False) | ||
with z.ZipFile(os.path.join(base_path, dataname + '.zip'), 'r') as f: | ||
os.makedirs(os.path.join(base_path, dataname), exist_ok=True) | ||
f.extractall(os.path.join(base_path, dataname)) | ||
for fname in os.listdir(os.path.join(base_path, dataname)): | ||
for ind, prefix in zip(sub_inds, sub_names): | ||
if fname.startswith(prefix): | ||
os.rename(os.path.join(base_path, dataname, fname), | ||
os.path.join(base_path, | ||
'subject_{}.mat'.format(ind))) | ||
os.remove(os.path.join(base_path, dataname + '.zip')) | ||
shutil.rmtree(os.path.join(base_path, dataname)) | ||
|
||
if not os.path.isfile(os.path.join(base_path, | ||
'subject_{}.mat'.format(subject))): | ||
if subject in range(1, 5): | ||
get_subjects(list(range(1, 5)), file1_subj, 0) | ||
elif subject in range(5, 8): | ||
get_subjects(list(range(5, 8)), file2_subj, 1) | ||
elif subject in range(8, 11): | ||
get_subjects(list(range(8, 11)), file3_subj, 2) | ||
return os.path.join(base_path, 'subject_{}.mat'.format(subject)) | ||
|
||
|
||
class Weibo2014(BaseDataset): | ||
"""Weibo 2014 Motor Imagery dataset [1] | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you do the same,
|
||
References | ||
----------- | ||
Yi Weibo, 2014, "EEG data of simple and compound limb | ||
motor imagery", https://doi.org/10.7910/DVN/27306, Harvard Dataverse, V1 | ||
|
||
""" | ||
|
||
|
||
|
||
def __init__(self): | ||
super().__init__( | ||
subjects=list(range(1, 11)), | ||
sessions_per_subject=1, | ||
events=dict(left_hand=1, right_hand=2, | ||
hands=3, feet=4, left_hand_right_foot=5, | ||
right_hand_left_foot=6, rest=7), | ||
code='Weibo 2014', | ||
# Full trial is 0-8 but with trialwise bandpass this reduces | ||
# boundary effects | ||
interval=[0, 8], | ||
paradigm='imagery', | ||
doi='10.7910/DVN/27306') | ||
|
||
def _get_single_subject_data(self, subject): | ||
"""return data for a single subject""" | ||
fname = self.data_path(subject) | ||
# TODO: add 1s 0 buffer between trials and make continuous | ||
data = loadmat(fname, squeeze_me=True, struct_as_record=False, | ||
verify_compressed_data_integrity=False) | ||
montage = mne.channels.read_montage('standard_1020') | ||
info = mne.create_info(ch_names=['EEG{}'.format(i) for i in range(1, 65)]+['STIM014'], | ||
ch_types=['eeg']*64+['stim'], | ||
sfreq=200, montage=None) | ||
# until we get the channel names montage is None | ||
event_ids = data['label'].ravel() | ||
raw_data = np.transpose(data['data'], axes=[2, 0, 1]) | ||
# de-mean each trial | ||
raw_data = raw_data - np.mean(raw_data, axis=2, keepdims=True) | ||
raw_events = np.zeros((raw_data.shape[0], 1, raw_data.shape[2])) | ||
raw_events[:, 0, 0] = event_ids | ||
data = np.concatenate([raw_data, raw_events], axis=1) | ||
# add buffer in between trials | ||
log.warning( | ||
'Trial data de-meaned and concatenated with a buffer to create cont data') | ||
zeroshape = (data.shape[0], data.shape[1], 50) | ||
data = np.concatenate([np.zeros(zeroshape), data, | ||
np.zeros(zeroshape)], axis=2) | ||
raw = mne.io.RawArray(data=np.concatenate(list(data), axis=1), | ||
info=info, verbose=False) | ||
return {'session_0': {'run_0': raw}} | ||
|
||
def data_path(self, subject, path=None, force_update=False, | ||
update_path=None, verbose=None): | ||
if subject not in self.subject_list: | ||
raise(ValueError("Invalid subject number")) | ||
key = 'MNE_DATASETS_WEIBO2014_PATH' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I usually got a weird warning about the non standard MNE key. Can we track down this kind of thing and change the keys for new datasets. |
||
path = _get_path(path, key, "Weibo 2014") | ||
_do_path_update(path, True, key, "Weibo 2014") | ||
basepath = os.path.join(path, "MNE-weibo-2014") | ||
if not os.path.isdir(basepath): | ||
os.makedirs(basepath) | ||
return eeg_data_path(basepath, subject) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
''' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we dont really need a docstring module for this one. its not parsed by the doc. |
||
Simple and compound motor imagery | ||
https://doi.org/10.1371/journal.pone.0114853 | ||
''' | ||
|
||
from .base import BaseDataset | ||
import zipfile as z | ||
from scipy.io import loadmat | ||
from mne.datasets.utils import _get_path, _do_path_update | ||
from mne.utils import _fetch_file | ||
import mne | ||
import numpy as np | ||
import os | ||
import shutil | ||
|
||
DATA_PATH = 'https://ndownloader.figshare.com/files/3662952' | ||
|
||
|
||
def local_data_path(base_path, subject): | ||
if not os.path.isdir(os.path.join(base_path, | ||
'subject_{}'.format(subject))): | ||
if not os.path.isdir(os.path.join(base_path, 'data')): | ||
_fetch_file(DATA_PATH, os.path.join(base_path, 'data.zip'), | ||
print_destination=False) | ||
with z.ZipFile(os.path.join(base_path, 'data.zip'), 'r') as f: | ||
f.extractall(base_path) | ||
os.remove(os.path.join(base_path, 'data.zip')) | ||
datapath = os.path.join(base_path, 'data') | ||
for i in range(1, 5): | ||
os.makedirs(os.path.join(base_path, 'subject_{}'.format(i))) | ||
for session in range(1,4): | ||
for run in ['A','B']: | ||
os.rename(os.path.join(datapath, 'S{}_{}{}.cnt'.format(i,session, run)), | ||
os.path.join(base_path, | ||
'subject_{}'.format(i), | ||
'{}{}.cnt'.format(session,run))) | ||
shutil.rmtree(os.path.join(base_path, 'data')) | ||
subjpath = os.path.join(base_path, 'subject_{}'.format(subject)) | ||
return [[os.path.join(subjpath, | ||
'{}{}.cnt'.format(y, x)) for x in ['A', 'B']] for y in ['1', '2', '3']] | ||
|
||
|
||
class Zhou2016(BaseDataset): | ||
"""Dataset from Zhou et al. 2016 [1] | ||
|
||
Abstract | ||
------------ | ||
|
||
Independent component analysis (ICA) as a promising spatial filtering method | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, i did not express myself very clearly. I was thinking about the part of the article that describes the dataset. For example, this part :
and
|
||
can separate motor-related independent components (MRICs) from the | ||
multichannel electroencephalogram (EEG) signals. However, the unpredictable | ||
burst interferences may significantly degrade the performance of ICA-based | ||
brain-computer interface (BCI) system. In this study, we proposed a new | ||
algorithm frame to address this issue by combining the single-trial-based | ||
ICA filter with zero-training classifier. We developed a two-round data | ||
selection method to identify automatically the badly corrupted EEG trials in | ||
the training set. The “high quality” training trials were utilized to | ||
optimize the ICA filter. In addition, we proposed an accuracy-matrix method | ||
to locate the artifact data segments within a single trial and investigated | ||
which types of artifacts can influence the performance of the ICA-based | ||
MIBCIs. Twenty-six EEG datasets of three-class motor imagery were used to | ||
validate the proposed methods, and the classification accuracies were | ||
compared with that obtained by frequently used common spatial pattern (CSP) | ||
spatial filtering algorithm. The experimental results demonstrated that the | ||
proposed optimizing strategy could effectively improve the stability, | ||
practicality and classification performance of ICA-based MIBCI. The study | ||
revealed that rational use of ICA method may be crucial in building a | ||
practical ICA-based MIBCI system. | ||
|
||
References | ||
------------ | ||
|
||
[1] Zhou B, Wu X, Lv Z, Zhang L, Guo X (2016) A Fully Automated Trial | ||
Selection Method for Optimization of Motor Imagery Based Brain-Computer | ||
Interface. PLoS ONE 11(9): | ||
e0162657. https://doi.org/10.1371/journal.pone.0162657 | ||
|
||
""" | ||
|
||
def __init__(self): | ||
super().__init__( | ||
subjects=list(range(1, 5)), | ||
sessions_per_subject=3, | ||
events=dict(left_hand=1, right_hand=2, | ||
feet=3), | ||
code='Zhou 2016', | ||
# MI 1-6s, prepare 0-1, break 6-10 | ||
# boundary effects | ||
interval=[0, 5], | ||
task_interval=[1,6], | ||
paradigm='imagery', | ||
doi='10.1371/journal.pone.0162657') | ||
|
||
def _get_single_subject_data(self, subject): | ||
"""return data for a single subject""" | ||
files = self.data_path(subject) | ||
|
||
out = {} | ||
for sess_ind, runlist in enumerate(files): | ||
sess_key = 'session_{}'.format(sess_ind) | ||
out[sess_key] = {} | ||
for run_ind, fname in enumerate(runlist): | ||
run_key = 'run_{}'.format(run_ind) | ||
out[sess_key][run_key] = mne.io.read_raw_cnt(fname, | ||
preload=True, | ||
montage='standard_1020') | ||
return out | ||
|
||
def data_path(self, subject, path=None, force_update=False, | ||
update_path=None, verbose=None): | ||
if subject not in self.subject_list: | ||
raise(ValueError("Invalid subject number")) | ||
key = 'MNE_DATASETS_ZHOU2016_PATH' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. idem, lets change the key for MNE standard key (whatever the standard is) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NVM, this is a warning because it's not in the list a pre-approved config name. https://github.com/mne-tools/mne-python/blob/master/mne/utils.py#L1478 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should try and deal with this at some point though, as the list keeps growing...maybe our own config file? |
||
path = _get_path(path, key, "Zhou 2016") | ||
_do_path_update(path, True, key, "Zhou 2016") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we have any other option than forcing the path ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is what we do everywhere -- it should be another PR I think, revamping the download system There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep we should. Let's keep this for another PR (not a priority) |
||
basepath = os.path.join(path, "MNE-zhou-2016") | ||
if not os.path.isdir(basepath): | ||
os.makedirs(basepath) | ||
return local_data_path(basepath, subject) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,3 +11,5 @@ | |
from .openvibe_mi import OpenvibeMI | ||
from .bbci_eeg_fnirs import BBCIEEGfNIRS | ||
from .upper_limb import UpperLimb | ||
from .Weibo2014 import Weibo2014 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please, add them in docs/source/datasets.rst |
||
from .Zhou2016 import Zhou2016 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,14 +2,55 @@ | |
Base class for a dataset | ||
""" | ||
import abc | ||
import logging | ||
|
||
log = logging.getLogger() | ||
|
||
|
||
class BaseDataset(metaclass=abc.ABCMeta): | ||
"""Base dataset""" | ||
|
||
def __init__(self, subjects, sessions_per_subject, events, code, interval, | ||
paradigm, doi=None): | ||
def __init__(self, subjects, sessions_per_subject, events, | ||
code, interval, paradigm, task_interval=None, doi=None): | ||
""" | ||
Parameters required for all datasets | ||
|
||
parameters | ||
---------- | ||
subjects: List of int | ||
List of subject number # TODO: make identifiers more general | ||
|
||
sessions_per_subject: int | ||
Number of sessions per subject | ||
|
||
events: dict of string: int | ||
String codes for events matched with labels in the stim channel. Currently imagery codes codes can include: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we also have elbow and other stuff. I'm wondering if we should start using MNE hierarchical event definition. for example, you can define an event as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well we lose nothing by adding it so why not, I'll go through and change all left_hand to hand/left etc --although there is one more level to worry about, of imagined vs actual There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can do imagined/hand/left, etc There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but we can skip this for now, and see how we can deal with that later. |
||
- left_hand | ||
- right_hand | ||
- hands | ||
- feet | ||
- rest | ||
- left_hand_right_foot | ||
- right_hand_left_foot | ||
- tongue | ||
- navigation | ||
- subtraction | ||
- word_ass (for word association) | ||
|
||
code: string | ||
Unique identifier for dataset, used in all plots | ||
|
||
interval: list with 2 entries | ||
Interval relative to trial start for imagery | ||
|
||
paradigm: ['p300','imagery'] | ||
Defines what sort of dataset this is (currently only imagery is implemented) | ||
|
||
task_interval: list of 2 entries or None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry for being picky, but this change make me inconfortable, and i'm trying to avoid complicating the API more than necessary. I do not think we need to have a double definition of time interval. The time interval should correspond to the interval of the task i.e. the motor imagery. so i would actually replace the The actual timing of the trial does not bring us anything. To my point of view, datasets are immuable objects, they are here to abstract the data as it has been recorded. does it make sense ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, I think that does make the most sense. I'll change that everywhere |
||
Defines the start and end of the imagery *relative to event marker.* If not specified, defaults to interval. | ||
|
||
doi: DOI for dataset, optional (for now) | ||
""" | ||
if not isinstance(subjects, list): | ||
raise(ValueError("subjects must be a list")) | ||
|
||
|
@@ -18,6 +59,13 @@ def __init__(self, subjects, sessions_per_subject, events, code, interval, | |
self.event_id = events | ||
self.code = code | ||
self.interval = interval | ||
if task_interval is None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm still wondering if this is the right way to proceed. This type of thing is very specific to MI. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it generalizes OK, the case of ERPs is just that task_interval[0] is 0 and task_interval[1] is the length of time until the next trigger |
||
assert interval[0]==0, 'Interval does not start at 0 so task onset is necessary' | ||
self.task_interval = list(interval) | ||
else: | ||
if interval[1]-interval[0] > task_interval[1]-task_interval[0]: | ||
log.warning('Given interval extends outside of imagery period') | ||
self.task_interval = task_interval | ||
self.paradigm = paradigm | ||
self.doi = doi | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we somehow avoid this code duplication ?