NeuroTechX · vinay-jayaram · Apr 8, 2018 · Apr 5, 2018 · Apr 5, 2018 · Apr 5, 2018
diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst
@@ -25,7 +25,9 @@ Motor Imagery Datasets
     BBCIEEGfNIRS
     OpenvibeMI
     PhysionetMI
-    UpperLimb
+	       UpperLimb
+	       Zhou2016
+    Weibo2014
 
 ------------
 ERP Datasets

diff --git a/examples/plot_filterbank_csp_vs_csp.py b/examples/plot_filterbank_csp_vs_csp.py
@@ -67,8 +67,9 @@
 overwrite = False  # set to True if we want to overwrite cached results
 
 # broadband filters
-filters = [[8, 35]]
-paradigm = LeftRightImagery(filters=filters)
+fmin=8
+fmax=35
+paradigm = LeftRightImagery(fmin=fmin, fmax=fmax)
 evaluation = CrossSessionEvaluation(paradigm=paradigm, datasets=datasets,
                                     suffix='examples', overwrite=overwrite)
 results = evaluation.process(pipelines)
@@ -78,7 +79,7 @@
 
 # bank of 6 filter, by 4 Hz increment
 filters = [[8, 12], [12, 16], [16, 20], [20, 24], [24, 28], [28, 35]]
-paradigm = FilterBankLeftRightImagery()
+paradigm = FilterBankLeftRightImagery(filters=filters)
 evaluation = CrossSessionEvaluation(paradigm=paradigm, datasets=datasets,
                                     suffix='examples', overwrite=overwrite)
 results_fb = evaluation.process(pipelines_fb)

diff --git a/moabb/datasets/Weibo2014.py b/moabb/datasets/Weibo2014.py
@@ -0,0 +1,161 @@
+'''
+Simple and compound motor imagery
+https://doi.org/10.1371/journal.pone.0114853
+'''
+
+from .base import BaseDataset
+import zipfile as z
+from scipy.io import loadmat
+from mne.datasets.utils import _get_path, _do_path_update
+from mne.utils import _fetch_file
+import mne
+import numpy as np
+import os
+import shutil
+
+import logging
+log = logging.getLogger()
+
+FILES = []
+FILES.append('https://dataverse.harvard.edu/api/access/datafile/2499178')
+FILES.append('https://dataverse.harvard.edu/api/access/datafile/2499182')
+FILES.append('https://dataverse.harvard.edu/api/access/datafile/2499179')
+
+
+def eeg_data_path(base_path, subject):
+    file1_subj = ['cl', 'cyy', 'kyf', 'lnn']
+    file2_subj = ['ls', 'ry', 'wcf']
+    file3_subj = ['wx', 'yyx', 'zd']
+
+    def get_subjects(sub_inds, sub_names, ind):
+        dataname = 'data{}'.format(ind)
+        if not os.path.isfile(os.path.join(base_path, dataname+'.zip')):
+            _fetch_file(FILES[ind], os.path.join(
+                base_path, dataname + '.zip'), print_destination=False)
+        with z.ZipFile(os.path.join(base_path, dataname + '.zip'), 'r') as f:
+            os.makedirs(os.path.join(base_path, dataname), exist_ok=True)
+            f.extractall(os.path.join(base_path, dataname))
+            for fname in os.listdir(os.path.join(base_path, dataname)):
+                for ind, prefix in zip(sub_inds, sub_names):
+                    if fname.startswith(prefix):
+                        os.rename(os.path.join(base_path, dataname, fname),
+                                  os.path.join(base_path,
+                                               'subject_{}.mat'.format(ind)))
+        os.remove(os.path.join(base_path, dataname + '.zip'))
+        shutil.rmtree(os.path.join(base_path, dataname))
+
+    if not os.path.isfile(os.path.join(base_path,
+                                       'subject_{}.mat'.format(subject))):
+        if subject in range(1, 5):
+            get_subjects(list(range(1, 5)), file1_subj, 0)
+        elif subject in range(5, 8):
+            get_subjects(list(range(5, 8)), file2_subj, 1)
+        elif subject in range(8, 11):
+            get_subjects(list(range(8, 11)), file3_subj, 2)
+    return os.path.join(base_path, 'subject_{}.mat'.format(subject))
+
+
+class Weibo2014(BaseDataset):
+    """Weibo 2014 Motor Imagery dataset.
+
+    Dataset from the article *Evaluation of EEG oscillatory patterns and
+    cognitive process during simple and compound limb motor imagery* [1]_.
+
+    It contains data recorded on 10 subjects, with 60 electrodes.
+
+    This dataset was used to investigate the differences of the EEG patterns
+    between simple limb motor imagery and compound limb motor
+    imagery. Seven kinds of mental tasks have been designed, involving three
+    tasks of simple limb motor imagery (left hand, right hand, feet), three
+    tasks of compound limb motor imagery combining hand with hand/foot
+    (both hands, left hand combined with right foot, right hand combined with
+    left foot) and rest state.
+
+    At the beginning of each trial (8 seconds), a white circle appeared at the
+    center of the monitor. After 2 seconds, a red circle (preparation cue)
+    appeared for 1 second to remind the subjects of paying attention to the
+    character indication next. Then red circle disappeared and character
+    indication (‘Left Hand’, ‘Left Hand & Right Foot’, et al) was presented on
+    the screen for 4 seconds, during which the participants were asked to
+    perform kinesthetic motor imagery rather than a visual type of imagery
+    while avoiding any muscle movement. After 7 seconds, ‘Rest’ was presented
+    for 1 second before next trial (Fig. 1(a)). The experiments were divided
+    into 9 sections, involving 8 sections consisting of 60 trials each for six
+    kinds of MI tasks (10 trials for each MI task in one section) and one
+    section consisting of 80 trials for rest state. The sequence of six MI
+    tasks was randomized. Intersection break was about 5 to 10 minutes.
+
+    References
+    -----------
+    .. [1] Yi, Weibo, et al. "Evaluation of EEG oscillatory patterns and
+           cognitive process during simple and compound limb motor imagery."
+           PloS one 9.12 (2014). https://doi.org/10.1371/journal.pone.0114853
+    """
+
+    def __init__(self):
+        super().__init__(
+            subjects=list(range(1, 11)),
+            sessions_per_subject=1,
+            events=dict(left_hand=1, right_hand=2,
+                        hands=3, feet=4, left_hand_right_foot=5,
+                        right_hand_left_foot=6, rest=7),
+            code='Weibo 2014',
+            # Full trial w/ rest is 0-8
+            interval=[3, 7],
+            paradigm='imagery',
+            doi='10.1371/journal.pone.0114853')
+
+    def _get_single_subject_data(self, subject):
+        """return data for a single subject"""
+        fname = self.data_path(subject)
+        # TODO: add 1s 0 buffer between trials and make continuous
+        data = loadmat(fname, squeeze_me=True, struct_as_record=False,
+                       verify_compressed_data_integrity=False)
+        montage = mne.channels.read_montage('standard_1005')
+        ch_names = ['Fp1', 'Fpz', 'Fp2', 'AF3', 'AF4', 'F7', 'F5', 'F3', 'F1',
+                    'Fz', 'F2', 'F4', 'F6', 'F8', 'FT7', 'FC5', 'FC3', 'FC1',
+                    'FCz', 'FC2', 'FC4', 'FC6', 'FT8', 'T7', 'C5', 'C3', 'C1',
+                    'Cz', 'C2', 'C4', 'C6', 'T8', 'TP7', 'CP5', 'CP3', 'CP1',
+                    'CPz', 'CP2', 'CP4', 'CP6', 'TP8', 'P7', 'P5', 'P3', 'P1',
+                    'Pz', 'P2', 'P4', 'P6', 'P8', 'PO7', 'PO5', 'PO3', 'POz',
+                    'PO4', 'PO6', 'PO8', 'CB1', 'O1', 'Oz', 'O2', 'CB2', 'VEO',
+                    'HEO']
+
+        ch_types = ['eeg'] * 62 + ['eog'] * 2
+        # FIXME not sure what are those CB1 / CB2
+        ch_types[57] = 'misc'
+        ch_types[61] = 'misc'
+        info = mne.create_info(ch_names=ch_names + ['STIM014'],
+                               ch_types=ch_types + ['stim'],
+                               sfreq=200, montage=None)
+        # until we get the channel names montage is None
+        event_ids = data['label'].ravel()
+        raw_data = np.transpose(data['data'], axes=[2, 0, 1])
+        # de-mean each trial
+        raw_data = raw_data - np.mean(raw_data, axis=2, keepdims=True)
+        raw_events = np.zeros((raw_data.shape[0], 1, raw_data.shape[2]))
+        raw_events[:, 0, 0] = event_ids
+        data = np.concatenate([1e-6 * raw_data, raw_events], axis=1)
+        # add buffer in between trials
+        log.warning(
+            "Trial data de-meaned and concatenated with a buffer to create "
+            "cont data")
+        zeroshape = (data.shape[0], data.shape[1], 50)
+        data = np.concatenate([np.zeros(zeroshape), data,
+                               np.zeros(zeroshape)], axis=2)
+        raw = mne.io.RawArray(data=np.concatenate(list(data), axis=1),
+                              info=info, verbose=False)
+        raw.set_montage(montage)
+        return {'session_0': {'run_0': raw}}
+
+    def data_path(self, subject, path=None, force_update=False,
+                  update_path=None, verbose=None):
+        if subject not in self.subject_list:
+            raise(ValueError("Invalid subject number"))
+        key = 'MNE_DATASETS_WEIBO2014_PATH'
+        path = _get_path(path, key, "Weibo 2014")
+        _do_path_update(path, True, key, "Weibo 2014")
+        basepath = os.path.join(path, "MNE-weibo-2014")
+        if not os.path.isdir(basepath):
+            os.makedirs(basepath)
+        return eeg_data_path(basepath, subject)
diff --git a/moabb/datasets/Zhou2016.py b/moabb/datasets/Zhou2016.py
@@ -0,0 +1,110 @@
+'''
+Simple and compound motor imagery
+https://doi.org/10.1371/journal.pone.0114853
+'''
+
+from .base import BaseDataset
+import zipfile as z
+from scipy.io import loadmat
+from mne.datasets.utils import _get_path, _do_path_update
+from mne.utils import _fetch_file
+import mne
+import numpy as np
+import os
+import shutil
+
+DATA_PATH = 'https://ndownloader.figshare.com/files/3662952'
+
+
+def local_data_path(base_path, subject):
+    if not os.path.isdir(os.path.join(base_path,
+                                      'subject_{}'.format(subject))):
+        if not os.path.isdir(os.path.join(base_path, 'data')):
+            _fetch_file(DATA_PATH, os.path.join(base_path, 'data.zip'),
+                        print_destination=False)
+            with z.ZipFile(os.path.join(base_path, 'data.zip'), 'r') as f:
+                f.extractall(base_path)
+            os.remove(os.path.join(base_path, 'data.zip'))
+        datapath = os.path.join(base_path, 'data')
+        for i in range(1, 5):
+            os.makedirs(os.path.join(base_path, 'subject_{}'.format(i)))
+            for session in range(1,4):
+                for run in ['A','B']:
+                    os.rename(os.path.join(datapath, 'S{}_{}{}.cnt'.format(i,session, run)),
+                              os.path.join(base_path,
+                                           'subject_{}'.format(i),
+                                           '{}{}.cnt'.format(session,run)))
+        shutil.rmtree(os.path.join(base_path, 'data'))
+    subjpath = os.path.join(base_path, 'subject_{}'.format(subject))
+    return [[os.path.join(subjpath,
+                          '{}{}.cnt'.format(y, x)) for x in ['A', 'B']] for y in ['1', '2', '3']]
+
+
+class Zhou2016(BaseDataset):
+    """Dataset from Zhou et al. 2016.
+
+    Dataset from the article *A Fully Automated Trial Selection Method for
+    Optimization of Motor Imagery Based Brain-Computer Interface* [1]_.
+    This dataset contains data recorded on 4 subjects performing 3 type of
+    motor imagery: left hand, right hand and feet.
+
+    Every subject went through three sessions, each of which contained two
+    consecutive runs with several minutes inter-run breaks, and each run
+    comprised 75 trials (25 trials per class). The intervals between two
+    sessions varied from several days to several months.
+
+    A trial started by a short beep indicating 1 s preparation time,
+    and followed by a red arrow pointing randomly to three directions (left,
+    right, or bottom) lasting for 5 s and then presented a black screen for
+    4 s. The subject was instructed to immediately perform the imagination
+    tasks of the left hand, right hand or foot movement respectively according
+    to the cue direction, and try to relax during the black screen.
+
+    References
+    ----------
+
+    .. [1] Zhou B, Wu X, Lv Z, Zhang L, Guo X (2016) A Fully Automated
+           Trial Selection Method for Optimization of Motor Imagery Based
+           Brain-Computer Interface. PLoS ONE 11(9).
+           https://doi.org/10.1371/journal.pone.0162657
+    """
+
+    def __init__(self):
+        super().__init__(
+            subjects=list(range(1, 5)),
+            sessions_per_subject=3,
+            events=dict(left_hand=1, right_hand=2,
+                        feet=3),
+            code='Zhou 2016',
+            # MI 1-6s, prepare 0-1, break 6-10
+            # boundary effects
+            interval=[1, 6],
+            paradigm='imagery',
+            doi='10.1371/journal.pone.0162657')
+
+    def _get_single_subject_data(self, subject):
+        """return data for a single subject"""
+        files = self.data_path(subject)
+
+        out = {}
+        for sess_ind, runlist in enumerate(files):
+            sess_key = 'session_{}'.format(sess_ind)
+            out[sess_key] = {}
+            for run_ind, fname in enumerate(runlist):
+                run_key = 'run_{}'.format(run_ind)
+                out[sess_key][run_key] = mne.io.read_raw_cnt(fname,
+                                                             preload=True,
+                                                             montage='standard_1020')
+        return out
+
+    def data_path(self, subject, path=None, force_update=False,
+                  update_path=None, verbose=None):
+        if subject not in self.subject_list:
+            raise(ValueError("Invalid subject number"))
+        key = 'MNE_DATASETS_ZHOU2016_PATH'
+        path = _get_path(path, key, "Zhou 2016")
+        _do_path_update(path, True, key, "Zhou 2016")
+        basepath = os.path.join(path, "MNE-zhou-2016")
+        if not os.path.isdir(basepath):
+            os.makedirs(basepath)
+        return local_data_path(basepath, subject)
diff --git a/moabb/datasets/__init__.py b/moabb/datasets/__init__.py
@@ -11,3 +11,5 @@
 from .openvibe_mi import OpenvibeMI
 from .bbci_eeg_fnirs import BBCIEEGfNIRS
 from .upper_limb import UpperLimb
+from .Weibo2014 import Weibo2014
+from .Zhou2016 import Zhou2016
diff --git a/moabb/datasets/alex_mi.py b/moabb/datasets/alex_mi.py
@@ -11,7 +11,21 @@
 
 
 class AlexMI(BaseDataset):
-    """Alex Motor Imagery dataset"""
+    """Alex Motor Imagery dataset
+    This Dataset contains EEG recordings from 8 subjects, performing 2 task of motor
+    imagination (right hand, feet or rest). Data have been recorded at 512Hz with 16
+    wet electrodes (Fpz, F7, F3, Fz, F4, F8, T7, C3, Cz, C4, T8, P7, P3, Pz, P4, P8)
+    with a g.tec g.USBamp EEG amplifier.
+
+    File are provided in MNE raw file format. A stimulation channel encoding the
+    timing of the motor imagination. The start of a trial is encoded as 1, then the
+    actual start of the motor imagination is encoded with 2 for imagination of a
+    right hand movement, 3 for imagination of both feet movement and 4 with a rest
+    trial.
+
+    The duration of each trial is 3 second. There is 20 trial of each class.
+
+    """
 
     def __init__(self):
         super().__init__(

diff --git a/moabb/datasets/base.py b/moabb/datasets/base.py
@@ -2,14 +2,52 @@
 Base class for a dataset
 """
 import abc
+import logging
+
+log = logging.getLogger()
 
 
 class BaseDataset(metaclass=abc.ABCMeta):
     """Base dataset"""
 
-    def __init__(self, subjects, sessions_per_subject, events, code, interval,
-                 paradigm, doi=None):
+    def __init__(self, subjects, sessions_per_subject, events,
+                 code, interval, paradigm, doi=None):
+        """
+        Parameters required for all datasets
 
+        parameters
+        ----------
+        subjects: List of int
+            List of subject number # TODO: make identifiers more general
+
+        sessions_per_subject: int
+            Number of sessions per subject
+
+        events: dict of string: int
+            String codes for events matched with labels in the stim channel. Currently imagery codes codes can include:
+            - left_hand
+            - right_hand
+            - hands
+            - feet
+            - rest
+            - left_hand_right_foot
+            - right_hand_left_foot
+            - tongue
+            - navigation
+            - subtraction
+            - word_ass (for word association)
+
+        code: string
+            Unique identifier for dataset, used in all plots
+
+        interval: list with 2 entries
+            Imagery interval as defined in the dataset description
+
+        paradigm: ['p300','imagery']
+            Defines what sort of dataset this is (currently only imagery is implemented)
+
+        doi: DOI for dataset, optional (for now)
+        """
         if not isinstance(subjects, list):
             raise(ValueError("subjects must be a list"))