Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New datasets and parameter confirmation #32

Merged
merged 21 commits into from
Apr 8, 2018
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion docs/source/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ Motor Imagery Datasets
BBCIEEGfNIRS
OpenvibeMI
PhysionetMI
UpperLimb
UpperLimb
Zhou2016
Weibo2014

------------
ERP Datasets
Expand Down
162 changes: 162 additions & 0 deletions moabb/datasets/Weibo2014.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
'''
Simple and compound motor imagery
https://doi.org/10.1371/journal.pone.0114853
'''

from .base import BaseDataset
import zipfile as z
from scipy.io import loadmat
from mne.datasets.utils import _get_path, _do_path_update
from mne.utils import _fetch_file
import mne
import numpy as np
import os
import shutil

import logging
log = logging.getLogger()

FILES = []
FILES.append('https://dataverse.harvard.edu/api/access/datafile/2499178')
FILES.append('https://dataverse.harvard.edu/api/access/datafile/2499182')
FILES.append('https://dataverse.harvard.edu/api/access/datafile/2499179')


def eeg_data_path(base_path, subject):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we somehow avoid this code duplication ?

file1_subj = ['cl', 'cyy', 'kyf', 'lnn']
file2_subj = ['ls', 'ry', 'wcf']
file3_subj = ['wx', 'yyx', 'zd']

def get_subjects(sub_inds, sub_names, ind):
dataname = 'data{}'.format(ind)
if not os.path.isfile(os.path.join(base_path, dataname+'.zip')):
_fetch_file(FILES[ind], os.path.join(
base_path, dataname + '.zip'), print_destination=False)
with z.ZipFile(os.path.join(base_path, dataname + '.zip'), 'r') as f:
os.makedirs(os.path.join(base_path, dataname), exist_ok=True)
f.extractall(os.path.join(base_path, dataname))
for fname in os.listdir(os.path.join(base_path, dataname)):
for ind, prefix in zip(sub_inds, sub_names):
if fname.startswith(prefix):
os.rename(os.path.join(base_path, dataname, fname),
os.path.join(base_path,
'subject_{}.mat'.format(ind)))
os.remove(os.path.join(base_path, dataname + '.zip'))
shutil.rmtree(os.path.join(base_path, dataname))

if not os.path.isfile(os.path.join(base_path,
'subject_{}.mat'.format(subject))):
if subject in range(1, 5):
get_subjects(list(range(1, 5)), file1_subj, 0)
elif subject in range(5, 8):
get_subjects(list(range(5, 8)), file2_subj, 1)
elif subject in range(8, 11):
get_subjects(list(range(8, 11)), file3_subj, 2)
return os.path.join(base_path, 'subject_{}.mat'.format(subject))


class Weibo2014(BaseDataset):
"""Weibo 2014 Motor Imagery dataset.

Dataset from the article *Evaluation of EEG oscillatory patterns and
cognitive process during simple and compound limb motor imagery* [1]_.

It contains data recorded on 10 subjects, with 60 electrodes.

This dataset was used to investigate the differences of the EEG patterns
between simple limb motor imagery and compound limb motor
imagery. Seven kinds of mental tasks have been designed, involving three
tasks of simple limb motor imagery (left hand, right hand, feet), three
tasks of compound limb motor imagery combining hand with hand/foot
(both hands, left hand combined with right foot, right hand combined with
left foot) and rest state.

At the beginning of each trial (8 seconds), a white circle appeared at the
center of the monitor. After 2 seconds, a red circle (preparation cue)
appeared for 1 second to remind the subjects of paying attention to the
character indication next. Then red circle disappeared and character
indication (‘Left Hand’, ‘Left Hand & Right Foot’, et al) was presented on
the screen for 4 seconds, during which the participants were asked to
perform kinesthetic motor imagery rather than a visual type of imagery
while avoiding any muscle movement. After 7 seconds, ‘Rest’ was presented
for 1 second before next trial (Fig. 1(a)). The experiments were divided
into 9 sections, involving 8 sections consisting of 60 trials each for six
kinds of MI tasks (10 trials for each MI task in one section) and one
section consisting of 80 trials for rest state. The sequence of six MI
tasks was randomized. Intersection break was about 5 to 10 minutes.

References
-----------
.. [1] Yi, Weibo, et al. "Evaluation of EEG oscillatory patterns and
cognitive process during simple and compound limb motor imagery."
PloS one 9.12 (2014). https://doi.org/10.1371/journal.pone.0114853
"""
def __init__(self):
super().__init__(
subjects=list(range(1, 11)),
sessions_per_subject=1,
events=dict(left_hand=1, right_hand=2,
hands=3, feet=4, left_hand_right_foot=5,
right_hand_left_foot=6, rest=7),
code='Weibo 2014',
# Full trial is 0-8 but with trialwise bandpass this reduces
# boundary effects
interval=[3, 7],
task_interval=[0, 8],
paradigm='imagery',
doi='10.1371/journal.pone.0114853')

def _get_single_subject_data(self, subject):
"""return data for a single subject"""
fname = self.data_path(subject)
# TODO: add 1s 0 buffer between trials and make continuous
data = loadmat(fname, squeeze_me=True, struct_as_record=False,
verify_compressed_data_integrity=False)
montage = mne.channels.read_montage('standard_1005')
ch_names = ['Fp1', 'Fpz', 'Fp2', 'AF3', 'AF4', 'F7', 'F5', 'F3', 'F1',
'Fz', 'F2', 'F4', 'F6', 'F8', 'FT7', 'FC5', 'FC3', 'FC1',
'FCz', 'FC2', 'FC4', 'FC6', 'FT8', 'T7', 'C5', 'C3', 'C1',
'Cz', 'C2', 'C4', 'C6', 'T8', 'TP7', 'CP5', 'CP3', 'CP1',
'CPz', 'CP2', 'CP4', 'CP6', 'TP8', 'P7', 'P5', 'P3', 'P1',
'Pz', 'P2', 'P4', 'P6', 'P8', 'PO7', 'PO5', 'PO3', 'POz',
'PO4', 'PO6', 'PO8', 'CB1', 'O1', 'Oz', 'O2', 'CB2', 'VEO',
'HEO']

ch_types = ['eeg'] * 62 + ['eog'] * 2
# FIXME not sure what are those CB1 / CB2
ch_types[57] = 'misc'
ch_types[61] = 'misc'
info = mne.create_info(ch_names=ch_names + ['STIM014'],
ch_types=ch_types + ['stim'],
sfreq=200, montage=None)
# until we get the channel names montage is None
event_ids = data['label'].ravel()
raw_data = np.transpose(data['data'], axes=[2, 0, 1])
# de-mean each trial
raw_data = raw_data - np.mean(raw_data, axis=2, keepdims=True)
raw_events = np.zeros((raw_data.shape[0], 1, raw_data.shape[2]))
raw_events[:, 0, 0] = event_ids
data = np.concatenate([1e-6 * raw_data, raw_events], axis=1)
# add buffer in between trials
log.warning(
"Trial data de-meaned and concatenated with a buffer to create "
"cont data")
zeroshape = (data.shape[0], data.shape[1], 50)
data = np.concatenate([np.zeros(zeroshape), data,
np.zeros(zeroshape)], axis=2)
raw = mne.io.RawArray(data=np.concatenate(list(data), axis=1),
info=info, verbose=False)
raw.set_montage(montage)
return {'session_0': {'run_0': raw}}

def data_path(self, subject, path=None, force_update=False,
update_path=None, verbose=None):
if subject not in self.subject_list:
raise(ValueError("Invalid subject number"))
key = 'MNE_DATASETS_WEIBO2014_PATH'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I usually got a weird warning about the non standard MNE key. Can we track down this kind of thing and change the keys for new datasets.

path = _get_path(path, key, "Weibo 2014")
_do_path_update(path, True, key, "Weibo 2014")
basepath = os.path.join(path, "MNE-weibo-2014")
if not os.path.isdir(basepath):
os.makedirs(basepath)
return eeg_data_path(basepath, subject)
111 changes: 111 additions & 0 deletions moabb/datasets/Zhou2016.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
'''
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we dont really need a docstring module for this one. its not parsed by the doc.
you can still let this doc here, but put everything you want to see on the doc in the class docstring

Simple and compound motor imagery
https://doi.org/10.1371/journal.pone.0114853
'''

from .base import BaseDataset
import zipfile as z
from scipy.io import loadmat
from mne.datasets.utils import _get_path, _do_path_update
from mne.utils import _fetch_file
import mne
import numpy as np
import os
import shutil

DATA_PATH = 'https://ndownloader.figshare.com/files/3662952'


def local_data_path(base_path, subject):
if not os.path.isdir(os.path.join(base_path,
'subject_{}'.format(subject))):
if not os.path.isdir(os.path.join(base_path, 'data')):
_fetch_file(DATA_PATH, os.path.join(base_path, 'data.zip'),
print_destination=False)
with z.ZipFile(os.path.join(base_path, 'data.zip'), 'r') as f:
f.extractall(base_path)
os.remove(os.path.join(base_path, 'data.zip'))
datapath = os.path.join(base_path, 'data')
for i in range(1, 5):
os.makedirs(os.path.join(base_path, 'subject_{}'.format(i)))
for session in range(1,4):
for run in ['A','B']:
os.rename(os.path.join(datapath, 'S{}_{}{}.cnt'.format(i,session, run)),
os.path.join(base_path,
'subject_{}'.format(i),
'{}{}.cnt'.format(session,run)))
shutil.rmtree(os.path.join(base_path, 'data'))
subjpath = os.path.join(base_path, 'subject_{}'.format(subject))
return [[os.path.join(subjpath,
'{}{}.cnt'.format(y, x)) for x in ['A', 'B']] for y in ['1', '2', '3']]


class Zhou2016(BaseDataset):
"""Dataset from Zhou et al. 2016.

Dataset from the article *A Fully Automated Trial Selection Method for
Optimization of Motor Imagery Based Brain-Computer Interface* [1]_.
This dataset contains data recorded on 4 subjects performing 3 type of
motor imagery: left hand, right hand and feet.

Every subject went through three sessions, each of which contained two
consecutive runs with several minutes inter-run breaks, and each run
comprised 75 trials (25 trials per class). The intervals between two
sessions varied from several days to several months.

A trial started by a short beep indicating 1 s preparation time,
and followed by a red arrow pointing randomly to three directions (left,
right, or bottom) lasting for 5 s and then presented a black screen for
4 s. The subject was instructed to immediately perform the imagination
tasks of the left hand, right hand or foot movement respectively according
to the cue direction, and try to relax during the black screen.

References
----------

.. [1] Zhou B, Wu X, Lv Z, Zhang L, Guo X (2016) A Fully Automated
Trial Selection Method for Optimization of Motor Imagery Based
Brain-Computer Interface. PLoS ONE 11(9).
https://doi.org/10.1371/journal.pone.0162657
"""

def __init__(self):
super().__init__(
subjects=list(range(1, 5)),
sessions_per_subject=3,
events=dict(left_hand=1, right_hand=2,
feet=3),
code='Zhou 2016',
# MI 1-6s, prepare 0-1, break 6-10
# boundary effects
interval=[0, 5],
task_interval=[1, 6],
paradigm='imagery',
doi='10.1371/journal.pone.0162657')

def _get_single_subject_data(self, subject):
"""return data for a single subject"""
files = self.data_path(subject)

out = {}
for sess_ind, runlist in enumerate(files):
sess_key = 'session_{}'.format(sess_ind)
out[sess_key] = {}
for run_ind, fname in enumerate(runlist):
run_key = 'run_{}'.format(run_ind)
out[sess_key][run_key] = mne.io.read_raw_cnt(fname,
preload=True,
montage='standard_1020')
return out

def data_path(self, subject, path=None, force_update=False,
update_path=None, verbose=None):
if subject not in self.subject_list:
raise(ValueError("Invalid subject number"))
key = 'MNE_DATASETS_ZHOU2016_PATH'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

idem, lets change the key for MNE standard key (whatever the standard is)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NVM, this is a warning because it's not in the list a pre-approved config name. https://github.com/mne-tools/mne-python/blob/master/mne/utils.py#L1478

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should try and deal with this at some point though, as the list keeps growing...maybe our own config file?

path = _get_path(path, key, "Zhou 2016")
_do_path_update(path, True, key, "Zhou 2016")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we have any other option than forcing the path ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is what we do everywhere -- it should be another PR I think, revamping the download system

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep we should. Let's keep this for another PR (not a priority)

basepath = os.path.join(path, "MNE-zhou-2016")
if not os.path.isdir(basepath):
os.makedirs(basepath)
return local_data_path(basepath, subject)
2 changes: 2 additions & 0 deletions moabb/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@
from .openvibe_mi import OpenvibeMI
from .bbci_eeg_fnirs import BBCIEEGfNIRS
from .upper_limb import UpperLimb
from .Weibo2014 import Weibo2014
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please, add them in docs/source/datasets.rst

from .Zhou2016 import Zhou2016
52 changes: 50 additions & 2 deletions moabb/datasets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,55 @@
Base class for a dataset
"""
import abc
import logging

log = logging.getLogger()


class BaseDataset(metaclass=abc.ABCMeta):
"""Base dataset"""

def __init__(self, subjects, sessions_per_subject, events, code, interval,
paradigm, doi=None):
def __init__(self, subjects, sessions_per_subject, events,
code, interval, paradigm, task_interval=None, doi=None):
"""
Parameters required for all datasets

parameters
----------
subjects: List of int
List of subject number # TODO: make identifiers more general

sessions_per_subject: int
Number of sessions per subject

events: dict of string: int
String codes for events matched with labels in the stim channel. Currently imagery codes codes can include:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we also have elbow and other stuff.

I'm wondering if we should start using MNE hierarchical event definition.

for example, you can define an event as hand/left and hand/right, which allow to select all hand event after epoching by doing Epochs['hand']. But this is another discussion

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well we lose nothing by adding it so why not, I'll go through and change all left_hand to hand/left etc --although there is one more level to worry about, of imagined vs actual

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can do imagined/hand/left, etc

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but we can skip this for now, and see how we can deal with that later.

- left_hand
- right_hand
- hands
- feet
- rest
- left_hand_right_foot
- right_hand_left_foot
- tongue
- navigation
- subtraction
- word_ass (for word association)

code: string
Unique identifier for dataset, used in all plots

interval: list with 2 entries
Interval relative to trial start for imagery

paradigm: ['p300','imagery']
Defines what sort of dataset this is (currently only imagery is implemented)

task_interval: list of 2 entries or None
Copy link
Member

@alexandrebarachant alexandrebarachant Apr 6, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for being picky, but this change make me inconfortable, and i'm trying to avoid complicating the API more than necessary.

I do not think we need to have a double definition of time interval. The time interval should correspond to the interval of the task i.e. the motor imagery. so i would actually replace the interval by your task task_interval.

The actual timing of the trial does not bring us anything.
We changed the API significantly, and it is the paradigm itself that is doing the epoching, so we don't event need to expose tmin and tmax in the dataset anymore. If the user want to play with cropping the epoch, it is more natural to do it at the level of the paradigm (exactly like events or channels).

To my point of view, datasets are immuable objects, they are here to abstract the data as it has been recorded.

does it make sense ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, I think that does make the most sense. I'll change that everywhere

Defines the start and end of the imagery *relative to event marker.* If not specified, defaults to interval.

doi: DOI for dataset, optional (for now)
"""
if not isinstance(subjects, list):
raise(ValueError("subjects must be a list"))

Expand All @@ -18,6 +59,13 @@ def __init__(self, subjects, sessions_per_subject, events, code, interval,
self.event_id = events
self.code = code
self.interval = interval
if task_interval is None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still wondering if this is the right way to proceed.
We can set this to be the default interval, i.e. interval from the start to the end of MI.

This type of thing is very specific to MI.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it generalizes OK, the case of ERPs is just that task_interval[0] is 0 and task_interval[1] is the length of time until the next trigger

assert interval[0]==0, 'Interval does not start at 0 so task onset is necessary'
self.task_interval = list(interval)
else:
if interval[1]-interval[0] > task_interval[1]-task_interval[0]:
log.warning('Given interval extends outside of imagery period')
self.task_interval = task_interval
self.paradigm = paradigm
self.doi = doi

Expand Down
2 changes: 1 addition & 1 deletion moabb/datasets/bbci_eeg_fnirs.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def __init__(self, fnirs=False, motor_imagery=True,
sessions_per_subject=n_sessions,
events=events,
code='BBCI EEG fNIRS',
interval=[3.5, 10],
interval=[0, 10], # marker is for *task* start not cue start
paradigm=('/').join(paradigms),
doi='10.1109/TNSRE.2016.2628057')

Expand Down
Loading