Merge pull request #32 from NeuroTechX/new_datasets

New datasets and parameter confirmation
NeuroTechX · Apr 8, 2018 · 9ad04c3 · 9ad04c3
2 parents d8f46ae + 05347fe
commit 9ad04c3
Show file tree

Hide file tree

Showing 24 changed files with 1,448 additions and 174 deletions.
diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst
@@ -21,11 +21,14 @@ Motor Imagery Datasets
     BNCI2015004
 
     AlexMI
-    GigaDbMI
-    BBCIEEGfNIRS
+    Cho2017
+    Shin2017A
+    Shin2017B
     OpenvibeMI
     PhysionetMI
-    UpperLimb
+	  Ofner2017
+	  Zhou2016
+    Weibo2014
 
 ------------
 ERP Datasets

diff --git a/docs/source/paradigms.rst b/docs/source/paradigms.rst
@@ -14,9 +14,11 @@ Motor Imagery Paradigms
     :toctree: generated/
     :template: class.rst
 
-    BaseMotorImagery
+    MotorImagery
     LeftRightImagery
 
+    FilterBankMotorImagery
+    FilterBankLeftRightImagery
 
 ------------
 Base & Utils
@@ -26,4 +28,7 @@ Base & Utils
     :toctree: generated/
     :template: class.rst
 
+    motor_imagery.BaseMotorImagery
+    motor_imagery.SinglePass
+    motor_imagery.FilterBank
     base.BaseParadigm
diff --git a/docs/source/pipelines.rst b/docs/source/pipelines.rst
@@ -6,15 +6,16 @@ Pipelines
 
 .. currentmodule:: moabb.pipelines
 
-----------------------
-Motor Imagery Datasets
-----------------------
+---------
+Pipelines
+---------
 
 .. autosummary::
     :toctree: generated/
     :template: class.rst
 
     features.LogVariance
+    filter_bank.FilterBank
 
 ------------
 Base & Utils

diff --git a/examples/plot_filterbank_csp_vs_csp.py b/examples/plot_filterbank_csp_vs_csp.py
@@ -67,8 +67,9 @@
 overwrite = False  # set to True if we want to overwrite cached results
 
 # broadband filters
-filters = [[8, 35]]
-paradigm = LeftRightImagery(filters=filters)
+fmin=8
+fmax=35
+paradigm = LeftRightImagery(fmin=fmin, fmax=fmax)
 evaluation = CrossSessionEvaluation(paradigm=paradigm, datasets=datasets,
                                     suffix='examples', overwrite=overwrite)
 results = evaluation.process(pipelines)
@@ -78,7 +79,7 @@
 
 # bank of 6 filter, by 4 Hz increment
 filters = [[8, 12], [12, 16], [16, 20], [20, 24], [24, 28], [28, 35]]
-paradigm = FilterBankLeftRightImagery()
+paradigm = FilterBankLeftRightImagery(filters=filters)
 evaluation = CrossSessionEvaluation(paradigm=paradigm, datasets=datasets,
                                     suffix='examples', overwrite=overwrite)
 results_fb = evaluation.process(pipelines_fb)

diff --git a/moabb/datasets/Weibo2014.py b/moabb/datasets/Weibo2014.py
@@ -0,0 +1,161 @@
+'''
+Simple and compound motor imagery
+https://doi.org/10.1371/journal.pone.0114853
+'''
+
+from .base import BaseDataset
+import zipfile as z
+from scipy.io import loadmat
+from mne.datasets.utils import _get_path, _do_path_update
+from mne.utils import _fetch_file
+import mne
+import numpy as np
+import os
+import shutil
+
+import logging
+log = logging.getLogger()
+
+FILES = []
+FILES.append('https://dataverse.harvard.edu/api/access/datafile/2499178')
+FILES.append('https://dataverse.harvard.edu/api/access/datafile/2499182')
+FILES.append('https://dataverse.harvard.edu/api/access/datafile/2499179')
+
+
+def eeg_data_path(base_path, subject):
+    file1_subj = ['cl', 'cyy', 'kyf', 'lnn']
+    file2_subj = ['ls', 'ry', 'wcf']
+    file3_subj = ['wx', 'yyx', 'zd']
+
+    def get_subjects(sub_inds, sub_names, ind):
+        dataname = 'data{}'.format(ind)
+        if not os.path.isfile(os.path.join(base_path, dataname+'.zip')):
+            _fetch_file(FILES[ind], os.path.join(
+                base_path, dataname + '.zip'), print_destination=False)
+        with z.ZipFile(os.path.join(base_path, dataname + '.zip'), 'r') as f:
+            os.makedirs(os.path.join(base_path, dataname), exist_ok=True)
+            f.extractall(os.path.join(base_path, dataname))
+            for fname in os.listdir(os.path.join(base_path, dataname)):
+                for ind, prefix in zip(sub_inds, sub_names):
+                    if fname.startswith(prefix):
+                        os.rename(os.path.join(base_path, dataname, fname),
+                                  os.path.join(base_path,
+                                               'subject_{}.mat'.format(ind)))
+        os.remove(os.path.join(base_path, dataname + '.zip'))
+        shutil.rmtree(os.path.join(base_path, dataname))
+
+    if not os.path.isfile(os.path.join(base_path,
+                                       'subject_{}.mat'.format(subject))):
+        if subject in range(1, 5):
+            get_subjects(list(range(1, 5)), file1_subj, 0)
+        elif subject in range(5, 8):
+            get_subjects(list(range(5, 8)), file2_subj, 1)
+        elif subject in range(8, 11):
+            get_subjects(list(range(8, 11)), file3_subj, 2)
+    return os.path.join(base_path, 'subject_{}.mat'.format(subject))
+
+
+class Weibo2014(BaseDataset):
+    """Motor Imagery dataset from Weibo et al 2014.
+
+    Dataset from the article *Evaluation of EEG oscillatory patterns and
+    cognitive process during simple and compound limb motor imagery* [1]_.
+
+    It contains data recorded on 10 subjects, with 60 electrodes.
+
+    This dataset was used to investigate the differences of the EEG patterns
+    between simple limb motor imagery and compound limb motor
+    imagery. Seven kinds of mental tasks have been designed, involving three
+    tasks of simple limb motor imagery (left hand, right hand, feet), three
+    tasks of compound limb motor imagery combining hand with hand/foot
+    (both hands, left hand combined with right foot, right hand combined with
+    left foot) and rest state.
+
+    At the beginning of each trial (8 seconds), a white circle appeared at the
+    center of the monitor. After 2 seconds, a red circle (preparation cue)
+    appeared for 1 second to remind the subjects of paying attention to the
+    character indication next. Then red circle disappeared and character
+    indication (‘Left Hand’, ‘Left Hand & Right Foot’, et al) was presented on
+    the screen for 4 seconds, during which the participants were asked to
+    perform kinesthetic motor imagery rather than a visual type of imagery
+    while avoiding any muscle movement. After 7 seconds, ‘Rest’ was presented
+    for 1 second before next trial (Fig. 1(a)). The experiments were divided
+    into 9 sections, involving 8 sections consisting of 60 trials each for six
+    kinds of MI tasks (10 trials for each MI task in one section) and one
+    section consisting of 80 trials for rest state. The sequence of six MI
+    tasks was randomized. Intersection break was about 5 to 10 minutes.
+
+    References
+    -----------
+    .. [1] Yi, Weibo, et al. "Evaluation of EEG oscillatory patterns and
+           cognitive process during simple and compound limb motor imagery."
+           PloS one 9.12 (2014). https://doi.org/10.1371/journal.pone.0114853
+    """
+
+    def __init__(self):
+        super().__init__(
+            subjects=list(range(1, 11)),
+            sessions_per_subject=1,
+            events=dict(left_hand=1, right_hand=2,
+                        hands=3, feet=4, left_hand_right_foot=5,
+                        right_hand_left_foot=6, rest=7),
+            code='Weibo 2014',
+            # Full trial w/ rest is 0-8
+            interval=[3, 7],
+            paradigm='imagery',
+            doi='10.1371/journal.pone.0114853')
+
+    def _get_single_subject_data(self, subject):
+        """return data for a single subject"""
+        fname = self.data_path(subject)
+        # TODO: add 1s 0 buffer between trials and make continuous
+        data = loadmat(fname, squeeze_me=True, struct_as_record=False,
+                       verify_compressed_data_integrity=False)
+        montage = mne.channels.read_montage('standard_1005')
+        ch_names = ['Fp1', 'Fpz', 'Fp2', 'AF3', 'AF4', 'F7', 'F5', 'F3', 'F1',
+                    'Fz', 'F2', 'F4', 'F6', 'F8', 'FT7', 'FC5', 'FC3', 'FC1',
+                    'FCz', 'FC2', 'FC4', 'FC6', 'FT8', 'T7', 'C5', 'C3', 'C1',
+                    'Cz', 'C2', 'C4', 'C6', 'T8', 'TP7', 'CP5', 'CP3', 'CP1',
+                    'CPz', 'CP2', 'CP4', 'CP6', 'TP8', 'P7', 'P5', 'P3', 'P1',
+                    'Pz', 'P2', 'P4', 'P6', 'P8', 'PO7', 'PO5', 'PO3', 'POz',
+                    'PO4', 'PO6', 'PO8', 'CB1', 'O1', 'Oz', 'O2', 'CB2', 'VEO',
+                    'HEO']
+
+        ch_types = ['eeg'] * 62 + ['eog'] * 2
+        # FIXME not sure what are those CB1 / CB2
+        ch_types[57] = 'misc'
+        ch_types[61] = 'misc'
+        info = mne.create_info(ch_names=ch_names + ['STIM014'],
+                               ch_types=ch_types + ['stim'],
+                               sfreq=200, montage=None)
+        # until we get the channel names montage is None
+        event_ids = data['label'].ravel()
+        raw_data = np.transpose(data['data'], axes=[2, 0, 1])
+        # de-mean each trial
+        raw_data = raw_data - np.mean(raw_data, axis=2, keepdims=True)
+        raw_events = np.zeros((raw_data.shape[0], 1, raw_data.shape[2]))
+        raw_events[:, 0, 0] = event_ids
+        data = np.concatenate([1e-6 * raw_data, raw_events], axis=1)
+        # add buffer in between trials
+        log.warning(
+            "Trial data de-meaned and concatenated with a buffer to create "
+            "cont data")
+        zeroshape = (data.shape[0], data.shape[1], 50)
+        data = np.concatenate([np.zeros(zeroshape), data,
+                               np.zeros(zeroshape)], axis=2)
+        raw = mne.io.RawArray(data=np.concatenate(list(data), axis=1),
+                              info=info, verbose=False)
+        raw.set_montage(montage)
+        return {'session_0': {'run_0': raw}}
+
+    def data_path(self, subject, path=None, force_update=False,
+                  update_path=None, verbose=None):
+        if subject not in self.subject_list:
+            raise(ValueError("Invalid subject number"))
+        key = 'MNE_DATASETS_WEIBO2014_PATH'
+        path = _get_path(path, key, "Weibo 2014")
+        _do_path_update(path, True, key, "Weibo 2014")
+        basepath = os.path.join(path, "MNE-weibo-2014")
+        if not os.path.isdir(basepath):
+            os.makedirs(basepath)
+        return eeg_data_path(basepath, subject)
diff --git a/moabb/datasets/Zhou2016.py b/moabb/datasets/Zhou2016.py
@@ -0,0 +1,109 @@
+'''
+Simple and compound motor imagery.
+https://doi.org/10.1371/journal.pone.0114853
+'''
+
+from .base import BaseDataset
+import zipfile as z
+from mne.io import read_raw_cnt
+from mne.datasets.utils import _get_path, _do_path_update
+from mne.utils import _fetch_file
+import os
+import shutil
+
+DATA_PATH = 'https://ndownloader.figshare.com/files/3662952'
+
+
+def local_data_path(base_path, subject):
+    if not os.path.isdir(os.path.join(base_path,
+                                      'subject_{}'.format(subject))):
+        if not os.path.isdir(os.path.join(base_path, 'data')):
+            _fetch_file(DATA_PATH, os.path.join(base_path, 'data.zip'),
+                        print_destination=False)
+            with z.ZipFile(os.path.join(base_path, 'data.zip'), 'r') as f:
+                f.extractall(base_path)
+            os.remove(os.path.join(base_path, 'data.zip'))
+        datapath = os.path.join(base_path, 'data')
+        for i in range(1, 5):
+            os.makedirs(os.path.join(base_path, 'subject_{}'.format(i)))
+            for session in range(1, 4):
+                for run in ['A', 'B']:
+                    os.rename(os.path.join(datapath,
+                                           'S{}_{}{}.cnt'.format(i, session,
+                                                                 run)),
+                              os.path.join(base_path,
+                                           'subject_{}'.format(i),
+                                           '{}{}.cnt'.format(session, run)))
+        shutil.rmtree(os.path.join(base_path, 'data'))
+    subjpath = os.path.join(base_path, 'subject_{}'.format(subject))
+    return [[os.path.join(subjpath, '{}{}.cnt'.format(y, x))
+             for x in ['A', 'B']] for y in ['1', '2', '3']]
+
+
+class Zhou2016(BaseDataset):
+    """Motor Imagery dataset from Zhou et al 2016.
+
+    Dataset from the article *A Fully Automated Trial Selection Method for
+    Optimization of Motor Imagery Based Brain-Computer Interface* [1]_.
+    This dataset contains data recorded on 4 subjects performing 3 type of
+    motor imagery: left hand, right hand and feet.
+
+    Every subject went through three sessions, each of which contained two
+    consecutive runs with several minutes inter-run breaks, and each run
+    comprised 75 trials (25 trials per class). The intervals between two
+    sessions varied from several days to several months.
+
+    A trial started by a short beep indicating 1 s preparation time,
+    and followed by a red arrow pointing randomly to three directions (left,
+    right, or bottom) lasting for 5 s and then presented a black screen for
+    4 s. The subject was instructed to immediately perform the imagination
+    tasks of the left hand, right hand or foot movement respectively according
+    to the cue direction, and try to relax during the black screen.
+
+    References
+    ----------
+
+    .. [1] Zhou B, Wu X, Lv Z, Zhang L, Guo X (2016) A Fully Automated
+           Trial Selection Method for Optimization of Motor Imagery Based
+           Brain-Computer Interface. PLoS ONE 11(9).
+           https://doi.org/10.1371/journal.pone.0162657
+    """
+
+    def __init__(self):
+        super().__init__(
+            subjects=list(range(1, 5)),
+            sessions_per_subject=3,
+            events=dict(left_hand=1, right_hand=2,
+                        feet=3),
+            code='Zhou 2016',
+            # MI 1-6s, prepare 0-1, break 6-10
+            # boundary effects
+            interval=[1, 6],
+            paradigm='imagery',
+            doi='10.1371/journal.pone.0162657')
+
+    def _get_single_subject_data(self, subject):
+        """return data for a single subject"""
+        files = self.data_path(subject)
+
+        out = {}
+        for sess_ind, runlist in enumerate(files):
+            sess_key = 'session_{}'.format(sess_ind)
+            out[sess_key] = {}
+            for run_ind, fname in enumerate(runlist):
+                run_key = 'run_{}'.format(run_ind)
+                out[sess_key][run_key] = read_raw_cnt(fname, preload=True,
+                                                      montage='standard_1005')
+        return out
+
+    def data_path(self, subject, path=None, force_update=False,
+                  update_path=None, verbose=None):
+        if subject not in self.subject_list:
+            raise(ValueError("Invalid subject number"))
+        key = 'MNE_DATASETS_ZHOU2016_PATH'
+        path = _get_path(path, key, "Zhou 2016")
+        _do_path_update(path, True, key, "Zhou 2016")
+        basepath = os.path.join(path, "MNE-zhou-2016")
+        if not os.path.isdir(basepath):
+            os.makedirs(basepath)
+        return local_data_path(basepath, subject)
diff --git a/moabb/datasets/__init__.py b/moabb/datasets/__init__.py
@@ -4,10 +4,14 @@
 and will convert them into a MNE raw object. There are options to pool all the
 different recording sessions per subject or to evaluate them separately.
 """
-from .gigadb import GigaDbMI
+# flake8: noqa
+from .gigadb import Cho2017
 from .alex_mi import AlexMI
 from .physionet_mi import PhysionetMI
-from .bnci import BNCI2014001, BNCI2014002, BNCI2014004, BNCI2015001, BNCI2015004
+from .bnci import (BNCI2014001, BNCI2014002, BNCI2014004, BNCI2015001,
+                   BNCI2015004)
 from .openvibe_mi import OpenvibeMI
-from .bbci_eeg_fnirs import BBCIEEGfNIRS
-from .upper_limb import UpperLimb
+from .bbci_eeg_fnirs import Shin2017A, Shin2017B
+from .upper_limb import Ofner2017
+from .Weibo2014 import Weibo2014
+from .Zhou2016 import Zhou2016