NeuroTechX · vinay-jayaram · Apr 8, 2018 · Apr 5, 2018 · Apr 5, 2018 · Apr 5, 2018
diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst
@@ -25,7 +25,9 @@ Motor Imagery Datasets
     BBCIEEGfNIRS
     OpenvibeMI
     PhysionetMI
-    UpperLimb
+	       UpperLimb
+	       Zhou2016
+    Weibo2014
 
 ------------
 ERP Datasets

diff --git a/moabb/datasets/Weibo2014.py b/moabb/datasets/Weibo2014.py
@@ -0,0 +1,122 @@
+'''
+Simple and compound motor imagery
+https://doi.org/10.1371/journal.pone.0114853
+'''
+
+from .base import BaseDataset
+import zipfile as z
+from scipy.io import loadmat
+from mne.datasets.utils import _get_path, _do_path_update
+from mne.utils import _fetch_file
+import mne
+import numpy as np
+import os
+import shutil
+
+import logging
+log = logging.getLogger()
+
+FILES = []
+FILES.append('https://dataverse.harvard.edu/api/access/datafile/2499178')
+FILES.append('https://dataverse.harvard.edu/api/access/datafile/2499182')
+FILES.append('https://dataverse.harvard.edu/api/access/datafile/2499179')
+
+
+def eeg_data_path(base_path, subject):
+    file1_subj = ['cl', 'cyy', 'kyf', 'lnn']
+    file2_subj = ['ls', 'ry', 'wcf']
+    file3_subj = ['wx', 'yyx', 'zd']
+
+    def get_subjects(sub_inds, sub_names, ind):
+        dataname = 'data{}'.format(ind)
+        if not os.path.isfile(os.path.join(base_path, dataname+'.zip')):
+            _fetch_file(FILES[ind], os.path.join(
+                base_path, dataname + '.zip'), print_destination=False)
+        with z.ZipFile(os.path.join(base_path, dataname + '.zip'), 'r') as f:
+            os.makedirs(os.path.join(base_path, dataname), exist_ok=True)
+            f.extractall(os.path.join(base_path, dataname))
+            for fname in os.listdir(os.path.join(base_path, dataname)):
+                for ind, prefix in zip(sub_inds, sub_names):
+                    if fname.startswith(prefix):
+                        os.rename(os.path.join(base_path, dataname, fname),
+                                  os.path.join(base_path,
+                                               'subject_{}.mat'.format(ind)))
+        os.remove(os.path.join(base_path, dataname + '.zip'))
+        shutil.rmtree(os.path.join(base_path, dataname))
+
+    if not os.path.isfile(os.path.join(base_path,
+                                       'subject_{}.mat'.format(subject))):
+        if subject in range(1, 5):
+            get_subjects(list(range(1, 5)), file1_subj, 0)
+        elif subject in range(5, 8):
+            get_subjects(list(range(5, 8)), file2_subj, 1)
+        elif subject in range(8, 11):
+            get_subjects(list(range(8, 11)), file3_subj, 2)
+    return os.path.join(base_path, 'subject_{}.mat'.format(subject))
+
+
+class Weibo2014(BaseDataset):
+    """Weibo 2014 Motor Imagery dataset [1]
+
+    References 
+    ----------- 
+    Yi Weibo, 2014, "EEG data of simple and compound limb
+    motor imagery", https://doi.org/10.7910/DVN/27306, Harvard Dataverse, V1
+
+    """
+
+
+
+    def __init__(self):
+        super().__init__(
+            subjects=list(range(1, 11)),
+            sessions_per_subject=1,
+            events=dict(left_hand=1, right_hand=2,
+                        hands=3, feet=4, left_hand_right_foot=5,
+                        right_hand_left_foot=6, rest=7),
+            code='Weibo 2014',
+            # Full trial is 0-8 but with trialwise bandpass this reduces
+            # boundary effects
+            interval=[0, 8],
+            paradigm='imagery',
+            doi='10.7910/DVN/27306')
+
+    def _get_single_subject_data(self, subject):
+        """return data for a single subject"""
+        fname = self.data_path(subject)
+        # TODO: add 1s 0 buffer between trials and make continuous
+        data = loadmat(fname, squeeze_me=True, struct_as_record=False,
+                       verify_compressed_data_integrity=False)
+        montage = mne.channels.read_montage('standard_1020')
+        info = mne.create_info(ch_names=['EEG{}'.format(i) for i in range(1, 65)]+['STIM014'],
+                               ch_types=['eeg']*64+['stim'],
+                               sfreq=200, montage=None)
+        # until we get the channel names montage is None
+        event_ids = data['label'].ravel()
+        raw_data = np.transpose(data['data'], axes=[2, 0, 1])
+        # de-mean each trial
+        raw_data = raw_data - np.mean(raw_data, axis=2, keepdims=True)
+        raw_events = np.zeros((raw_data.shape[0], 1, raw_data.shape[2]))
+        raw_events[:, 0, 0] = event_ids
+        data = np.concatenate([raw_data, raw_events], axis=1)
+        # add buffer in between trials
+        log.warning(
+            'Trial data de-meaned and concatenated with a buffer to create cont data')
+        zeroshape = (data.shape[0], data.shape[1], 50)
+        data = np.concatenate([np.zeros(zeroshape), data,
+                               np.zeros(zeroshape)], axis=2)
+        raw = mne.io.RawArray(data=np.concatenate(list(data), axis=1),
+                              info=info, verbose=False)
+        return {'session_0': {'run_0': raw}}
+
+    def data_path(self, subject, path=None, force_update=False,
+                  update_path=None, verbose=None):
+        if subject not in self.subject_list:
+            raise(ValueError("Invalid subject number"))
+        key = 'MNE_DATASETS_WEIBO2014_PATH'
+        path = _get_path(path, key, "Weibo 2014")
+        _do_path_update(path, True, key, "Weibo 2014")
+        basepath = os.path.join(path, "MNE-weibo-2014")
+        if not os.path.isdir(basepath):
+            os.makedirs(basepath)
+        return eeg_data_path(basepath, subject)
diff --git a/moabb/datasets/Zhou2016.py b/moabb/datasets/Zhou2016.py
@@ -0,0 +1,119 @@
+'''
+Simple and compound motor imagery
+https://doi.org/10.1371/journal.pone.0114853
+'''
+
+from .base import BaseDataset
+import zipfile as z
+from scipy.io import loadmat
+from mne.datasets.utils import _get_path, _do_path_update
+from mne.utils import _fetch_file
+import mne
+import numpy as np
+import os
+import shutil
+
+DATA_PATH = 'https://ndownloader.figshare.com/files/3662952'
+
+
+def local_data_path(base_path, subject):
+    if not os.path.isdir(os.path.join(base_path,
+                                      'subject_{}'.format(subject))):
+        if not os.path.isdir(os.path.join(base_path, 'data')):
+            _fetch_file(DATA_PATH, os.path.join(base_path, 'data.zip'),
+                        print_destination=False)
+            with z.ZipFile(os.path.join(base_path, 'data.zip'), 'r') as f:
+                f.extractall(base_path)
+            os.remove(os.path.join(base_path, 'data.zip'))
+        datapath = os.path.join(base_path, 'data')
+        for i in range(1, 5):
+            os.makedirs(os.path.join(base_path, 'subject_{}'.format(i)))
+            for session in range(1,4):
+                for run in ['A','B']:
+                    os.rename(os.path.join(datapath, 'S{}_{}{}.cnt'.format(i,session, run)),
+                              os.path.join(base_path,
+                                           'subject_{}'.format(i),
+                                           '{}{}.cnt'.format(session,run)))
+        shutil.rmtree(os.path.join(base_path, 'data'))
+    subjpath = os.path.join(base_path, 'subject_{}'.format(subject))
+    return [[os.path.join(subjpath,
+                          '{}{}.cnt'.format(y, x)) for x in ['A', 'B']] for y in ['1', '2', '3']]
+
+
+class Zhou2016(BaseDataset):
+    """Dataset from Zhou et al. 2016 [1]
+
+    Abstract
+    ------------
+
+    Independent component analysis (ICA) as a promising spatial filtering method
+    can separate motor-related independent components (MRICs) from the
+    multichannel electroencephalogram (EEG) signals. However, the unpredictable
+    burst interferences may significantly degrade the performance of ICA-based
+    brain-computer interface (BCI) system. In this study, we proposed a new
+    algorithm frame to address this issue by combining the single-trial-based
+    ICA filter with zero-training classifier. We developed a two-round data
+    selection method to identify automatically the badly corrupted EEG trials in
+    the training set. The “high quality” training trials were utilized to
+    optimize the ICA filter. In addition, we proposed an accuracy-matrix method
+    to locate the artifact data segments within a single trial and investigated
+    which types of artifacts can influence the performance of the ICA-based
+    MIBCIs. Twenty-six EEG datasets of three-class motor imagery were used to
+    validate the proposed methods, and the classification accuracies were
+    compared with that obtained by frequently used common spatial pattern (CSP)
+    spatial filtering algorithm. The experimental results demonstrated that the
+    proposed optimizing strategy could effectively improve the stability,
+    practicality and classification performance of ICA-based MIBCI. The study
+    revealed that rational use of ICA method may be crucial in building a
+    practical ICA-based MIBCI system.
+
+    References
+    ------------
+
+    [1] Zhou B, Wu X, Lv Z, Zhang L, Guo X (2016) A Fully Automated Trial
+    Selection Method for Optimization of Motor Imagery Based Brain-Computer
+    Interface. PLoS ONE 11(9):
+    e0162657. https://doi.org/10.1371/journal.pone.0162657
+
+    """
+
+    def __init__(self):
+        super().__init__(
+            subjects=list(range(1, 5)),
+            sessions_per_subject=3,
+            events=dict(left_hand=1, right_hand=2,
+                        feet=3),
+            code='Zhou 2016',
+            # MI 1-6s, prepare 0-1, break 6-10
+            # boundary effects
+            interval=[0, 5],
+            task_interval=[1,6],
+            paradigm='imagery',
+            doi='10.1371/journal.pone.0162657')
+
+    def _get_single_subject_data(self, subject):
+        """return data for a single subject"""
+        files = self.data_path(subject)
+
+        out = {}
+        for sess_ind, runlist in enumerate(files):
+            sess_key = 'session_{}'.format(sess_ind)
+            out[sess_key] = {}
+            for run_ind, fname in enumerate(runlist):
+                run_key = 'run_{}'.format(run_ind)
+                out[sess_key][run_key] = mne.io.read_raw_cnt(fname,
+                                                             preload=True,
+                                                             montage='standard_1020')
+        return out
+
+    def data_path(self, subject, path=None, force_update=False,
+                  update_path=None, verbose=None):
+        if subject not in self.subject_list:
+            raise(ValueError("Invalid subject number"))
+        key = 'MNE_DATASETS_ZHOU2016_PATH'
+        path = _get_path(path, key, "Zhou 2016")
+        _do_path_update(path, True, key, "Zhou 2016")
+        basepath = os.path.join(path, "MNE-zhou-2016")
+        if not os.path.isdir(basepath):
+            os.makedirs(basepath)
+        return local_data_path(basepath, subject)
diff --git a/moabb/datasets/__init__.py b/moabb/datasets/__init__.py
@@ -11,3 +11,5 @@
 from .openvibe_mi import OpenvibeMI
 from .bbci_eeg_fnirs import BBCIEEGfNIRS
 from .upper_limb import UpperLimb
+from .Weibo2014 import Weibo2014
+from .Zhou2016 import Zhou2016
diff --git a/moabb/datasets/base.py b/moabb/datasets/base.py
@@ -2,14 +2,55 @@
 Base class for a dataset
 """
 import abc
+import logging
+
+log = logging.getLogger()
 
 
 class BaseDataset(metaclass=abc.ABCMeta):
     """Base dataset"""
 
-    def __init__(self, subjects, sessions_per_subject, events, code, interval,
-                 paradigm, doi=None):
+    def __init__(self, subjects, sessions_per_subject, events,
+                 code, interval, paradigm, task_interval=None, doi=None):
+        """
+        Parameters required for all datasets
 
+        parameters
+        ----------
+        subjects: List of int
+            List of subject number # TODO: make identifiers more general
+
+        sessions_per_subject: int
+            Number of sessions per subject
+
+        events: dict of string: int
+            String codes for events matched with labels in the stim channel. Currently imagery codes codes can include:
+            - left_hand
+            - right_hand
+            - hands
+            - feet
+            - rest
+            - left_hand_right_foot
+            - right_hand_left_foot
+            - tongue
+            - navigation
+            - subtraction
+            - word_ass (for word association)
+
+        code: string
+            Unique identifier for dataset, used in all plots
+
+        interval: list with 2 entries
+            Interval relative to trial start for imagery
+
+        paradigm: ['p300','imagery']
+            Defines what sort of dataset this is (currently only imagery is implemented)
+
+        task_interval: list of 2 entries or None
+            Defines the start and end of the imagery *relative to event marker.* If not specified, defaults to interval. 
+
+        doi: DOI for dataset, optional (for now)
+        """
         if not isinstance(subjects, list):
             raise(ValueError("subjects must be a list"))
 
@@ -18,6 +59,13 @@ def __init__(self, subjects, sessions_per_subject, events, code, interval,
         self.event_id = events
         self.code = code
         self.interval = interval
+        if task_interval is None:
+            assert interval[0]==0, 'Interval does not start at 0 so task onset is necessary'
+            self.task_interval = list(interval)
+        else:
+            if interval[1]-interval[0] > task_interval[1]-task_interval[0]:
+                log.warning('Given interval extends outside of imagery period')
+            self.task_interval = task_interval
         self.paradigm = paradigm
         self.doi = doi
 

diff --git a/moabb/datasets/bbci_eeg_fnirs.py b/moabb/datasets/bbci_eeg_fnirs.py
@@ -85,7 +85,7 @@ def __init__(self, fnirs=False, motor_imagery=True,
                          sessions_per_subject=n_sessions,
                          events=events,
                          code='BBCI EEG fNIRS',
-                         interval=[3.5, 10],
+                         interval=[0, 10], # marker is for *task* start not cue start
                          paradigm=('/').join(paradigms),
                          doi='10.1109/TNSRE.2016.2628057')