diff --git a/environment.yml b/environment.yml index a0dbdf5ec49..45898f4fd5b 100644 --- a/environment.yml +++ b/environment.yml @@ -65,3 +65,4 @@ dependencies: - lazy_loader - defusedxml - python-neo + - formulaic diff --git a/mne/datasets/config.py b/mne/datasets/config.py index ccd4babacd9..b09fb87eedf 100644 --- a/mne/datasets/config.py +++ b/mne/datasets/config.py @@ -129,7 +129,7 @@ ) MNE_DATASETS["misc"] = dict( archive_name=f"{MISC_VERSIONED}.tar.gz", # 'mne-misc-data', - hash="md5:e343d3a00cb49f8a2f719d14f4758afe", + hash="md5:201d35531d3c03701cf50e38bb73481f", url=( "https://codeload.github.com/mne-tools/mne-misc-data/tar.gz/" f'{RELEASES["misc"]}' diff --git a/mne/stats/cluster_level.py b/mne/stats/cluster_level.py index 50743c104ef..8f24c0c4a0c 100644 --- a/mne/stats/cluster_level.py +++ b/mne/stats/cluster_level.py @@ -4,20 +4,31 @@ # License: BSD-3-Clause # Copyright the MNE-Python contributors. +from __future__ import annotations + +from typing import Literal + +import matplotlib.pyplot as plt import numpy as np +from mpl_toolkits.axes_grid1 import make_axes_locatable from scipy import ndimage, sparse from scipy.sparse.csgraph import connected_components from scipy.stats import f as fstat from scipy.stats import t as tstat +from ..epochs import BaseEpochs, EvokedArray +from ..evoked import Evoked from ..fixes import has_numba, jit from ..parallel import parallel_func from ..source_estimate import MixedSourceEstimate, SourceEstimate, VolSourceEstimate from ..source_space import SourceSpaces +from ..time_frequency import BaseTFR from ..utils import ( + GetEpochsMixin, ProgressBar, _check_option, _pl, + _soft_import, _validate_type, check_random_state, logger, @@ -25,8 +36,13 @@ verbose, warn, ) +from ..viz import plot_compare_evokeds from .parametric import f_oneway, ttest_1samp_no_p +# need this at top-level of file due to type hints +pd = _soft_import("pandas", purpose="DataFrame integration", strict=False) +DataFrame = getattr(pd, "DataFrame", None) + def _get_buddies_fallback(r, s, neighbors, indices=None): if indices is None: @@ -931,7 +947,7 @@ def _permutation_cluster_test( sample_shape = X[0].shape[1:] for x in X: if x.shape[1:] != sample_shape: - raise ValueError("All samples mush have the same size") + raise ValueError("All samples must have the same size") # flatten the last dimensions in case the data is high dimensional X = [np.reshape(x, (x.shape[0], -1)) for x in X] @@ -1723,3 +1739,362 @@ def summarize_clusters_stc( data_summary[:, 0] = np.sum(data_summary, axis=1) return klass(data_summary, vertices, tmin, tstep, subject) + + +def _validate_cluster_df(df: DataFrame, dv_name: str, iv_name: str): + """Validate the input DataFrame for cluster tests.""" + # check if all necessary columns are present + missing = ({dv_name} | {iv_name}) - set(df.columns) # should be empty + sep = '", "' + if missing: # if not empty, there are missing columns + raise ValueError( + f"DataFrame must contain a column named for each term in `formula`. " + f"Column{_pl(missing)} missing for term{_pl(missing)} " # _pl = pluralize + f'"{sep.join(missing)}".' + ) + # check if the data column contains valid (and consistent) instance types + inst = df[dv_name].iloc[0] + valid_types = ( + Evoked, + BaseEpochs, + BaseTFR, + np.ndarray, + ) # Base covers all Epochs and TFRs + _validate_type(inst, valid_types, f"Data in dependent variable column '{dv_name}'") + all_types = set(df[dv_name].map(type)) + all_type_names = ", ".join([type(x).__name__ for x in all_types]) + prologue = f"Data in dependent variable column '{dv_name}' must all have " + if len(all_types) > 1: + raise ValueError( + f"{prologue} the same type, but found types {{{all_type_names}}}." + ) + # check if the shape of the data is consistent + if isinstance(inst, np.ndarray): + all_shapes = set( + df[dv_name].map(lambda x: x.shape[1:]) + ) # first dim may vary (participants or epochs) + elif isinstance(inst, (BaseEpochs | BaseTFR)): + all_shapes = set(df[dv_name].map(lambda x: x.get_data().shape[1:])) + else: + all_shapes = set(df[dv_name].map(lambda x: x.get_data().shape)) + if len(all_shapes) > 1: + raise ValueError( + f"{prologue} consistent shape, but {len(all_shapes)} different " + f"shapes were found: {'; '.join(all_shapes)}." + ) + obj_type = all_types.pop() + is_epo = GetEpochsMixin in obj_type.__mro__ + is_tfr = BaseTFR in obj_type.__mro__ + is_arr = np.ndarray in obj_type.__mro__ + return is_epo, is_tfr, is_arr + + +@verbose +def cluster_test( + df: DataFrame, + formula: str, + *, # end of positional-only parameters + within_id: str | None = None, + stat_fun: callable | None = None, + tail: Literal[-1, 0, 1] = 0, + threshold=None, + n_permutations: str | int = 1024, + adjacency: sparse.spmatrix | None | False = None, # should be None (default) + max_step: int = 1, # TODO may need to provide `max_step_time` and `max_step_freq` + exclude: list | None = None, # TODO needs rethink because user passes MNE objects + step_down_p: float = 0.0, + t_power: float = 1.0, + check_disjoint: bool = False, + out_type: Literal["indices", "mask"] = "indices", + seed: None | int | np.random.RandomState = None, + buffer_size: int | None = None, + n_jobs: int = 1, + verbose=None, +): + """Run a cluster permutation test from a DataFrame and a formula. + + Parameters + ---------- + df : pd.DataFrame + Dataframe containing the data, dependent and independent variables. + formula : str + Wilkinson notation formula for design matrix. The names of the dependent + and independent variable should match the columns in ``df``. + within_id : None | str + Name of column in ``df`` to use in identifying within-group contrasts. If + ``None``, will perform a between-group test. Ignored if the number of groups + (unique values in the independent variable column of ``df``) is greater than 2. + %(stat_fun_clust_both)s + %(tail_clust)s + %(threshold_clust_both)s + %(n_permutations_clust_all)s + %(adjacency_clust_both)s + max_step : int, optional + Maximum distance between samples (time points). Default is 1. + exclude : array-like of bool | None + Mask to apply to the data to exclude certain points from clustering + (e.g., medial wall vertices). Should be the same shape as the channels/vertices + dimension of the data objects. If ``None``, no points are excluded. + %(step_down_p_clust)s + %(t_power_clust)s + check_disjoint : bool + Whether to check if the ``adjacency`` matrix can be separated into disjoint + sets before clustering. This may lead to faster clustering, especially if + the "time" and/or "frequency" dimensions are large. + %(out_type_clust)s + %(seed)s + buffer_size : int | None + Block size to use when computing test statistics. This can significantly + reduce memory usage when ``n_jobs > 1`` and memory sharing between + processes is enabled (see :func:`mne.set_cache_dir`), because the data will be + shared between processes and each process only needs to allocate space for + a small block of locations at a time. + %(n_jobs)s + %(verbose)s + + Notes + ----- + %(threshold_clust_t_or_f_notes)s + + Returns + ------- + ClusterResult + Object containing the results of the cluster permutation test. + """ + # parse formula + formulaic = _soft_import("formulaic", purpose="parse formula for clustering") + parser = formulaic.parser.DefaultFormulaParser(include_intercept=False) + formula = formulaic.Formula(formula, _parser=parser) + # extract the dependent and independent variable names + dv_name = str(np.array(formula.lhs.root).item()) + iv_name = str(np.array(formula.rhs.root).item()) + + # validate the input dataframe and return the type of the data column entries + is_epo, is_tfr, is_arr = _validate_cluster_df(df, dv_name, iv_name) + + # for within_subject designs, check if each subject has 2 observations + _validate_type(within_id, (str, None), "within_id") + if within_id: + df = df.copy(deep=False) # Don't mutate input dataframe row order! + df.sort_values([iv_name, within_id], inplace=True) + counts = df[within_id].value_counts() + if any(counts != 2): + raise ValueError("for paired t-test, each subject must have 2 observations") + + # extract the data from the dataframe + outer_func = np.concatenate if is_epo else np.array + axes = (-3, -1) if is_tfr else (-2, -1) + + def func_arr(series): + return np.concatenate(series.values) + + def func_mne(series): + return outer_func( + series.map(lambda inst: inst.get_data().swapaxes(*axes)).to_list() + ) + + func = func_arr if is_arr else func_mne + + # convert to a list-like X for clustering + X = df.groupby(iv_name).agg({dv_name: func})[dv_name].to_list() + + # determine test type + if len(X) == 1: + kind = "within" # data already subtracted + elif len(X) > 2: + kind = "between" + elif ( + len(set(x.shape for x in X)) > 1 + ): # check if there are unequal observations in each group + kind = "between" + # by now we know there are exactly 2 elements in X, and their shapes match + elif within_id in df: + kind = "within" + X = X[0] - X[1] + else: # 2 elements in X but no within_id provided → unpaired test + kind = "between" + + # define stat function and threshold + stat_fun, threshold = _check_fun( + X=X, stat_fun=stat_fun, threshold=threshold, tail=tail, kind=kind + ) + + # check_fun doesn't work with list input` + if kind == "within": # will this create an issue for already subtracted data? + X = [X] + + # Run the cluster-based permutation test + stat_obs, clusters, cluster_p_values, H0 = _permutation_cluster_test( + X, + n_permutations=n_permutations, + threshold=threshold, + stat_fun=stat_fun, + tail=tail, + n_jobs=n_jobs, + adjacency=adjacency, + max_step=max_step, # maximum distance between samples (time points) + exclude=exclude, # exclude no time points or channels + step_down_p=step_down_p, # step down in jumps test + t_power=t_power, # weigh each location by its stats score + out_type=out_type, + check_disjoint=check_disjoint, + buffer_size=buffer_size, # block size for chunking the data + seed=seed, + ) + + return ClusterResult(stat_obs, clusters, cluster_p_values, H0, stat_fun) + + +class ClusterResult: + """ + Object containing the results of the cluster permutation test. + + Parameters + ---------- + stat_obs : np.ndarray + The observed test statistic. + clusters : list + List of clusters. + cluster_p_values : np.ndarray + P-values for each cluster. + H0 : np.ndarray + Max cluster level stats observed under permutation. + """ + + def __init__( + self, + stat_obs: np.typing.NDArray, + clusters: list, + cluster_p_values: np.typing.NDArray, + H0: np.typing.NDArray, + stat_fun: callable, + ): + self.stat_obs = stat_obs + self.clusters = clusters + self.cluster_p_values = cluster_p_values + self.H0 = H0 + self.stat_fun = stat_fun + + # unpaired t-test equivalent to f_oneway w/ 2 groups + if stat_fun is f_oneway: + self.stat_name = "F-statistic" + elif stat_fun is ttest_1samp_no_p: + self.stat_name = "paired T-statistic" + else: + self.stat_name = "test statistic" + + def plot_cluster_time_sensor( + self, + condition_labels: dict, + colors: list | dict | None = None, + linestyles: list | dict | None = None, + cmap_evokeds: None | str | tuple = None, + cmap_topo: None | str | tuple = None, + ci: float | bool | callable | None = None, + ): + """ + Plot the cluster with the lowest p-value. + + 2D cluster plotted with topoplot on the left and evoked signals on the right. + Timepoints that are part of the cluster are + highlighted in green on the evoked signals. + + Parameters + ---------- + condition_labels : dict + Dictionary with condition labels as keys and evoked objects as values. + colors : list|dict|None + Colors to use when plotting the ERP lines and confidence bands. + linestyles : list|dict|None + Styles to use when plotting the ERP lines. + cmap_evokeds : None|str|tuple + Colormap from which to draw color values when plotting the ERP lines. + cmap_topo: matplotlib colormap + Colormap to use for the topomap. + ci : float|bool|callable()|None + Confidence band around each ERP time series. + """ + # extract condition labels from the dictionary + cond_keys = list(condition_labels.keys()) + # extract the evokeds from the dictionary + cond_values = list(condition_labels.values()) + + linestyles = {cond_keys[0]: "-", cond_keys[1]: "--"} + + lowest_p_cluster = np.argmin(self.cluster_p_values) + + # plot the cluster with the lowest p-value + time_inds, space_inds = np.squeeze(self.clusters[lowest_p_cluster]) + ch_inds = np.unique(space_inds) + time_inds = np.unique(time_inds) + + # get topography for t stat + t_map = self.stat_obs[time_inds, ...].mean(axis=0).astype(int) + + # get signals at the sensors contributing to the cluster + sig_times = cond_values[0][0].times[time_inds] + + # create spatial mask + mask = np.zeros((t_map.shape[0], 1), dtype=bool) + mask[ch_inds, :] = True + + # initialize figure + fig, ax_topo = plt.subplots(1, 1, figsize=(10, 3), layout="constrained") + + # plot average test statistic and mark significant sensors + t_evoked = EvokedArray(t_map[:, np.newaxis], cond_values[0][0].info, tmin=0) + t_evoked.plot_topomap( + times=0, + mask=mask, + axes=ax_topo, + cmap=cmap_topo, + show=False, + colorbar=False, + mask_params=dict(markersize=10), + scalings=1.00, + ) + image = ax_topo.images[0] + + # remove the title that would otherwise say "0.000 s" + ax_topo.set_title( + "Spatial cluster extent:\n averaged from {:0.3f} to {:0.3f} s".format( + *sig_times[[0, -1]] + ) + ) + + # create additional axes (for ERF and colorbar) + divider = make_axes_locatable(ax_topo) + + # add axes for colorbar + ax_colorbar = divider.append_axes("right", size="5%", pad=0.1) + cbar = plt.colorbar(image, cax=ax_colorbar) + cbar.set_label(self.stat_name) + + # add new axis for time courses and plot time courses + ax_signals = divider.append_axes("right", size="300%", pad=1.3) + title = ( + f"Temporal cluster extent:\nSignal averaged over {len(ch_inds)} sensor(s)" + ) + plot_compare_evokeds( + condition_labels, + title=title, + picks=ch_inds, + axes=ax_signals, + colors=colors, + linestyles=linestyles, + cmap=cmap_evokeds, + show=False, + split_legend=True, + truncate_yaxis="auto", + truncate_xaxis=False, + ci=ci, + ) + plt.legend(frameon=False, loc="upper left") + + # plot temporal cluster extent + ymin, ymax = ax_signals.get_ylim() + ax_signals.fill_betweenx( + (ymin, ymax), sig_times[0], sig_times[-1], color="grey", alpha=0.3 + ) + + plt.show() diff --git a/mne/stats/tests/test_cluster_level.py b/mne/stats/tests/test_cluster_level.py index e319d018328..06a87a07477 100644 --- a/mne/stats/tests/test_cluster_level.py +++ b/mne/stats/tests/test_cluster_level.py @@ -15,10 +15,19 @@ ) from scipy import linalg, sparse, stats -from mne import MixedSourceEstimate, SourceEstimate, SourceSpaces, VolSourceEstimate +from mne import ( + EpochsArray, + EvokedArray, + MixedSourceEstimate, + SourceEstimate, + SourceSpaces, + VolSourceEstimate, + create_info, +) from mne.fixes import _eye_array from mne.stats import combine_adjacency, ttest_ind_no_p from mne.stats.cluster_level import ( + cluster_test, f_oneway, permutation_cluster_1samp_test, permutation_cluster_test, @@ -27,7 +36,8 @@ summarize_clusters_stc, ttest_1samp_no_p, ) -from mne.utils import _record_warnings, catch_logging +from mne.time_frequency import AverageTFRArray, BaseTFR, EpochsTFRArray +from mne.utils import GetEpochsMixin, _record_warnings, catch_logging n_space = 50 @@ -867,3 +877,109 @@ def test_output_equiv(shape, out_type, adjacency, threshold): assert out_type == "indices" got_mask[np.ix_(*clu)] = n assert_array_equal(got_mask, want_mask) + + +def test_compare_old_and_new_cluster_api(): + """Test for same results from old and new APIs.""" + pd = pytest.importorskip("pandas") + condition1_1d, condition2_1d, condition1_2d, condition2_2d = _get_conditions() + df_1d = pd.DataFrame( + dict( + data=[condition1_1d, condition2_1d], + condition=["a", "b"], + ) + ) + kwargs = dict(n_permutations=100, tail=1, seed=1, buffer_size=None, out_type="mask") + F_obs, clusters, cluster_pvals, H0 = permutation_cluster_test( + [condition1_1d, condition2_1d], **kwargs + ) + formula = "data ~ condition" + cluster_result = cluster_test(df_1d, formula, **kwargs) + assert_array_equal(cluster_result.H0, H0) + assert_array_equal(cluster_result.stat_obs, F_obs) + assert_array_equal(cluster_result.cluster_p_values, cluster_pvals) + assert cluster_result.clusters == clusters + + +@pytest.mark.parametrize( + "Inst", (EpochsArray, EvokedArray, EpochsTFRArray, AverageTFRArray) +) +@pytest.mark.filterwarnings('ignore:Ignoring argument "tail":RuntimeWarning') +def test_new_cluster_api(Inst): + """Test handling different MNE objects in the cluster API.""" + pd = pytest.importorskip("pandas") + + rng = np.random.default_rng(seed=8675309) + is_epo = GetEpochsMixin in Inst.__mro__ + is_tfr = BaseTFR in Inst.__mro__ + + n_epo, n_chan, n_freq, n_times = 6, 3, 4, 5 + + # prepare the dimensions of the simulated data, then simulate + size = (n_chan,) + if is_epo: + size = (n_epo, *size) + if is_tfr: + size = (*size, n_freq) + size = (*size, n_times) + data = rng.normal(size=size) + + # construct the instance + info = create_info(ch_names=n_chan, sfreq=1000, ch_types="eeg") + kw = dict(times=np.arange(n_times), freqs=np.arange(n_freq)) if is_tfr else dict() + cond_a = Inst(data=data, info=info, **kw) + cond_b = cond_a.copy() + # introduce a significant difference in a specific region, time, and frequency + ch_start, ch_end = 0, 2 # 2 channels + t_start, t_end = 2, 4 # 2 times + f_start, f_end = 2, 4 # 2 freqs + if is_tfr: + cond_b._data[..., ch_start:ch_end, f_start:f_end, t_start:t_end] += 2 + else: + cond_b._data[..., ch_start:ch_end, t_start:t_end] += 2 + # for Evokeds/AverageTFRs, we create fake "subjects" as our observations within each + # condition. We add a bit of noise while we do so. + if not is_epo: + insts = list() + for cond in cond_a, cond_b: + for _n in range(n_epo): + if not _n: + insts.append(cond) + continue + _cond = cond.copy() + _cond.data += rng.normal(scale=0.1, size=_cond.data.shape) + insts.append(_cond) + conds = np.repeat(["a", "b"], n_epo).tolist() + else: + # For Epochs(TFR)Array, each epoch is an observation and they're already + # noisy/non-identical, so no duplication / noise-addition necessary. + insts = [cond_a, cond_b] + conds = ["a", "b"] + + # run new clustering API + df = pd.DataFrame(dict(data=insts, condition=conds)) + kwargs = dict( + n_permutations=100, seed=42, tail=1, buffer_size=None, out_type="mask" + ) + result_new_api = cluster_test(df, "data~condition", **kwargs) + + # make sure channels are last dimension for old API + if is_epo: + axes = (0, 3, 2, 1) if is_tfr else (0, 2, 1) + X = [cond_a.get_data().transpose(*axes), cond_b.get_data().transpose(*axes)] + else: + axes = (2, 1, 0) if is_tfr else (1, 0) + Xa = list() + Xb = list() + for inst, cond in zip(insts, conds): + container = Xa if cond == "a" else Xb + container.append(inst.get_data().transpose(*axes)) + X = [np.stack(Xa), np.stack(Xb)] + + F_obs, clusters, cluster_pvals, H0 = permutation_cluster_test(X, **kwargs) + assert_array_almost_equal(result_new_api.H0, H0) + assert_array_almost_equal(result_new_api.stat_obs, F_obs) + assert_array_almost_equal(result_new_api.cluster_p_values, cluster_pvals) + assert len(result_new_api.clusters) == len(clusters) + for clu1, clu2 in zip(result_new_api.clusters, clusters): + assert_array_equal(clu1, clu2) diff --git a/mne/utils/docs.py b/mne/utils/docs.py index 3e59d751d93..2c84f95f0e0 100644 --- a/mne/utils/docs.py +++ b/mne/utils/docs.py @@ -144,61 +144,54 @@ def _reflow_param_docstring(docstring, has_first_line=True, width=75): formatting. This can add overhead so is meant only for debugging. """ -docdict["adjacency_clust"] = """ -adjacency : scipy.sparse.spmatrix | None | False +_adjacency_clust_template = """ +adjacency : scipy.sparse.spmatrix | {param_none}False Defines adjacency between locations in the data, where "locations" can be spatial vertices, frequency bins, time points, etc. For spatial vertices (i.e. sensor space data), see :func:`mne.channels.find_ch_adjacency` or :func:`mne.spatial_inter_hemi_adjacency`. For source space data, see - :func:`mne.spatial_src_adjacency` or - :func:`mne.spatio_temporal_src_adjacency`. If ``False``, assumes - no adjacency (each location is treated as independent and unconnected). - If ``None``, a regular lattice adjacency is assumed, connecting - each {sp} location to its neighbor(s) along the last dimension - of {{eachgrp}} ``{{x}}``{lastdim}. + :func:`mne.spatial_src_adjacency` or :func:`mne.spatio_temporal_src_adjacency`. + If ``False``, assumes no adjacency (each location is treated as independent and + unconnected).{if_none} If ``adjacency`` is a matrix, it is assumed to be symmetric (only the upper triangular half is used) and must be square with dimension equal to - ``{{x}}.shape[-1]`` {parone} or ``{{x}}.shape[-1] * {{x}}.shape[-2]`` - {partwo} or (optionally) - ``{{x}}.shape[-1] * {{x}}.shape[-2] * {{x}}.shape[-3]`` - {parthree}.{memory} + the product of the last 1, 2, or 3 data dimensions (e.g., for time-frequency data: + n_channels, n_channels * n_freqs, or n_channels * n_freqs * n_times).{memory} +""" +_if_none = """ If ``None``, a regular lattice adjacency is assumed, connecting + each {spatial}location to its neighbor(s) along the last dimension + of {the_data}. """ - -mem = ( - " If spatial adjacency is uniform in time, it is recommended to use " - "a square matrix with dimension ``{x}.shape[-1]`` (n_vertices) to save " - "memory and computation, and to use ``max_step`` to define the extent " - "of temporal adjacency to consider when clustering." -) -comb = " The function `mne.stats.combine_adjacency` may be useful for 4D data." st = dict( - sp="spatial", - lastdim="", - parone="(n_vertices)", - partwo="(n_times * n_vertices)", - parthree="(n_times * n_freqs * n_vertices)", - memory=mem, + param_none="None | ", + if_none=_if_none.format(spatial="spatial ", the_data="{eachgrp} ``{x}``"), + memory=""" + If spatial adjacency is uniform in time, it is recommended to use a square matrix + with dimension ``{x}.shape[-1]`` (n_vertices) to save memory and computation, + and to use ``max_step`` to define the extent of temporal adjacency to consider when + clustering. +""", ) tf = dict( - sp="", - lastdim=" (or the last two dimensions if ``{x}`` is 2D)", - parone="(for 2D data)", - partwo="(for 3D data)", - parthree="(for 4D data)", - memory=comb, + param_none="None | ", + if_none=_if_none.format( + spatial="", + the_data="{eachgrp} ``{x}`` (or the last two dimensions if ``{x}`` is 2D)", + ), + memory=""" + The function `mne.stats.combine_adjacency` may be useful for 4D data. +""", ) -nogroups = dict(eachgrp="", x="X") +nogrps = dict(eachgrp="", x="X") groups = dict(eachgrp="each group ", x="X[k]") -docdict["adjacency_clust_1"] = ( - docdict["adjacency_clust"].format(**tf).format(**nogroups) -) -docdict["adjacency_clust_n"] = docdict["adjacency_clust"].format(**tf).format(**groups) -docdict["adjacency_clust_st1"] = ( - docdict["adjacency_clust"].format(**st).format(**nogroups) -) -docdict["adjacency_clust_stn"] = ( - docdict["adjacency_clust"].format(**st).format(**groups) + +docdict["adjacency_clust_1"] = _adjacency_clust_template.format(**tf).format(**nogrps) +docdict["adjacency_clust_both"] = _adjacency_clust_template.format( + param_none="", if_none="", memory="" ) +docdict["adjacency_clust_n"] = _adjacency_clust_template.format(**tf).format(**groups) +docdict["adjacency_clust_st1"] = _adjacency_clust_template.format(**st).format(**nogrps) +docdict["adjacency_clust_stn"] = _adjacency_clust_template.format(**st).format(**groups) docdict["adjust_dig_chpi"] = """ adjust_dig : bool @@ -708,7 +701,7 @@ def _reflow_param_docstring(docstring, has_first_line=True, width=75): docdict["check_disjoint_clust"] = """ check_disjoint : bool - Whether to check if the connectivity matrix can be separated into disjoint + Whether to check if the ``adjacency`` matrix can be separated into disjoint sets before clustering. This may lead to faster clustering, especially if the second dimension of ``X`` (usually the "time" dimension) is large. """ @@ -1416,7 +1409,7 @@ def _reflow_param_docstring(docstring, has_first_line=True, width=75): """ docdict["exclude_clust"] = """ -exclude : bool array or None +exclude : array-like of bool | None Mask to apply to the data to exclude certain points from clustering (e.g., medial wall vertices). Should be the same shape as ``X``. If ``None``, no points are excluded. @@ -3962,7 +3955,7 @@ def _reflow_param_docstring(docstring, has_first_line=True, width=75): seed : None | int | instance of ~numpy.random.RandomState A seed for the NumPy random number generator (RNG). If ``None`` (default), the seed will be obtained from the operating system - (see :class:`~numpy.random.RandomState` for details), meaning it will most + (see :class:`~numpy.random.RandomState` for details), meaning it will most likely produce different output every time this function or method is run. To achieve reproducible results, pass a value here to explicitly initialize the RNG with a defined state. @@ -4266,16 +4259,23 @@ def _reflow_param_docstring(docstring, has_first_line=True, width=75): channel names in the file will be used when possible. """ -_stat_fun_clust_base = """ +_stat_fun_template = """ stat_fun : callable | None Function called to calculate the test statistic. Must accept 1D-array as - input and return a 1D array. If ``None`` (the default), uses - `mne.stats.{}`. + input and return a 1D array. If ``None`` (the default), uses {}. """ -docdict["stat_fun_clust_f"] = _stat_fun_clust_base.format("f_oneway") +docdict["stat_fun_clust_both"] = _stat_fun_template.format( + """:func:`mne.stats.ttest_1samp_no_p` + for paired tests and :func:`mne.stats.f_oneway` for unpaired tests or tests of + more than 2 groups.""" +) + +docdict["stat_fun_clust_f"] = _stat_fun_template.format(":func:`mne.stats.f_oneway`") -docdict["stat_fun_clust_t"] = _stat_fun_clust_base.format("ttest_1samp_no_p") +docdict["stat_fun_clust_t"] = _stat_fun_template.format( + ":func:`mne.stats.ttest_1samp_no_p`" +) docdict["static"] = """ static : instance of SpatialImage @@ -4486,10 +4486,10 @@ def _reflow_param_docstring(docstring, has_first_line=True, width=75): threshold : float | dict | None The so-called "cluster forming threshold" in the form of a test statistic (note: this is not an alpha level / "p-value"). - If numeric, vertices with data values more extreme than ``threshold`` will - be used to form clusters. If ``None``, {} will be chosen + If numeric, vertices with stat values more extreme than ``threshold`` will + be used to form clusters. If ``None``, {which_thresh} will be chosen automatically that corresponds to a p-value of 0.05 for the given number of - observations (only valid when using {}). If ``threshold`` is a + observations (only valid when using {which_stat}). If ``threshold`` is a :class:`dict` (with keys ``'start'`` and ``'step'``) then threshold-free cluster enhancement (TFCE) will be used (see the :ref:`TFCE example ` and :footcite:`SmithNichols2009`). @@ -4497,8 +4497,14 @@ def _reflow_param_docstring(docstring, has_first_line=True, width=75): a particular p-value for one-tailed or two-tailed tests. """ -f_test = ("an F-threshold", "an F-statistic") -docdict["threshold_clust_f"] = _threshold_clust_base.format(*f_test) +docdict["threshold_clust_both"] = _threshold_clust_base.format( + which_thresh="a t- or F-threshold", + which_stat="``stat_fun=None``, i.e., a paired t-test or one-way F-test", +) + +docdict["threshold_clust_f"] = _threshold_clust_base.format( + which_thresh="an F-threshold", which_stat="an F-statistic" +) docdict["threshold_clust_f_notes"] = """ For computing a ``threshold`` based on a p-value, use the conversion @@ -4510,8 +4516,9 @@ def _reflow_param_docstring(docstring, has_first_line=True, width=75): thresh = scipy.stats.f.ppf(1 - pval, dfn=dfn, dfd=dfd) # F distribution """ -t_test = ("a t-threshold", "a t-statistic") -docdict["threshold_clust_t"] = _threshold_clust_base.format(*t_test) +docdict["threshold_clust_t"] = _threshold_clust_base.format( + which_thresh="a t-threshold", which_stat="a t-statistic" +) docdict["threshold_clust_t_notes"] = """ For computing a ``threshold`` based on a p-value, use the conversion @@ -4525,6 +4532,23 @@ def _reflow_param_docstring(docstring, has_first_line=True, width=75): For testing the lower tail (``tail=-1``), don't subtract ``pval`` from 1. """ +docdict["threshold_clust_t_or_f_notes"] = """ +For computing a ``threshold`` based on a p-value, use the conversion +from :meth:`scipy.stats.rv_continuous.ppf`:: + + pval = 0.001 # arbitrary + # for t-statistic + df = n_observations - 1 # degrees of freedom for the t-test + thresh = scipy.stats.t.ppf(1 - pval / 2, df) # two-tailed, t distribution + # for f-statistic + dfn = n_conditions - 1 # degrees of freedom numerator + dfd = n_observations - n_conditions # degrees of freedom denominator + thresh = scipy.stats.f.ppf(1 - pval, dfn=dfn, dfd=dfd) # F distribution + +For a one-tailed test (``tail=1``), don't divide the p-value by 2. +For testing the lower tail (``tail=-1``), don't subtract ``pval`` from 1. +""" + docdict["time_bandwidth_tfr"] = """ time_bandwidth : float ``≥ 2.0`` Product between the temporal window length (in seconds) and the *full* diff --git a/pyproject.toml b/pyproject.toml index e82012b8d6d..0c36566021d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -106,7 +106,8 @@ full-no-qt = [ "snirf", "defusedxml", "neo", - "antio>=0.4.0", + "formulaic", + "antio>=0.4.0" ] full = ["mne[full-no-qt]", "PyQt6!=6.6.0", "PyQt6-Qt6!=6.6.0,!=6.7.0"] full-pyqt6 = ["mne[full]"] @@ -157,6 +158,7 @@ doc = [ "sphinxcontrib-towncrier", "memory_profiler", "neo", + "formulaic", "seaborn!=0.11.2", "sphinx_copybutton", "sphinx-design", diff --git a/tools/vulture_allowlist.py b/tools/vulture_allowlist.py index ce5e95124d4..3515c04917a 100644 --- a/tools/vulture_allowlist.py +++ b/tools/vulture_allowlist.py @@ -154,3 +154,6 @@ _qt_raise_window _qt_disable_paint _qt_get_stylesheet + +# used in tutorial, not sure why shows up +plot_cluster_time_sensor diff --git a/tutorials/stats-sensor-space/76_new_cluster_test_api.py b/tutorials/stats-sensor-space/76_new_cluster_test_api.py new file mode 100644 index 00000000000..0e5aee91432 --- /dev/null +++ b/tutorials/stats-sensor-space/76_new_cluster_test_api.py @@ -0,0 +1,164 @@ +""" +.. _tut-new-cluster-test-api: + +=============================================================== +New cluster test API that allows for Wilkinson style formulas +=============================================================== + +This tutorial shows how to use the new API for cluster testing. +The new API allows for Wilkinson style formulas and allows for more flexibility in +the design of the test. Here we will demonstrate how to use the new API for +a standard paired t-test on evoked data from multiple subjects. +It uses a non-parametric statistical procedure based on permutations and +cluster level statistics. + +The procedure consists of: + + - loading evoked data from multiple subjects + - construct a dataframe that contains the difference between conditions + - run the new cluster test function with formula in Wilkinson notation + - plot the results with the new ClusterResults API + +Here, the unit of observation are evokeds from multiple subjects (2nd level analysis). + +For more information on cluster-based permutation testing in MNE-Python, +see also: :ref:`tut-cluster-one-samp-tfr`. +""" +# Authors: Carina Forster +# +# License: BSD-3-Clause +# Copyright the MNE-Python contributors. + +# %% Load the required packages + +from pathlib import Path + +import pandas as pd + +import mne + +# %% Load the P3 dataset + +# Set parameters +# -------------- +# Define the path to the P3 dataset +path_to_p3 = mne.datasets.misc.data_path() / "ERP_CORE" / "P3" + +# Define the range of participant IDs (we only have 5 participants in the dataset) +participant_ids = range(15, 20) # This will cover participant 15 to 19 + +# store the evoked data of all subjects +evokeds_allsubs = [] + +# Loop over each participant ID and generate the corresponding filename +# to load the evoked data +for pid in participant_ids: + # Create the filename using an f-string, ID is zero-padded to 3 digits + filename_p3 = f"sub-{pid:03d}_ses-P3_task-P3_ave.fif" + + # Create the full path to the file + p3_file_path = Path(path_to_p3) / filename_p3 + + # load the evoked data + evokeds = mne.read_evokeds(p3_file_path) + + # add single subjects evoked data to a list + evokeds_allsubs.append(evokeds) + +# the P3b dataset is part of the freely available ERP CORE dataset +# participants were presented with a visual oddball task +# and the P3b component was analyzed +# the conditions of interest are the target (rare visual stimuli) +# and non-target stimuli (frequent visual stimuli) + +# %% visually inspect the evoked data for each condition + +# let's extract the target and non-target evokeds +target_only = [evoked[0] for evoked in evokeds_allsubs] +non_target_only = [evoked[1] for evoked in evokeds_allsubs] + +# let's first have a look at the data + +# create contrast target - non-target +diff_evoked = [ + mne.combine_evoked([evokeds_a, evokeds_b], weights=[1, -1]) + for evokeds_a, evokeds_b in zip(target_only, non_target_only) +] + +# plot the grand average of the difference signal +mne.grand_average(diff_evoked).plot() +# plot the topography of the difference signal +mne.grand_average(diff_evoked).plot_topomap() + +# we can see that the strongest difference is around 400 ms in +# central-parietal channels with a stronger evoked signal for target stimuli + +# %% Prepare the dataframe for the new cluster test API + +# the dataframe should contain the contrast evoked data and the subject index +# each row in the dataframe should represent one observation (evoked data) + +# save the evoked data for both conditions in one list +evokeds_conditions = target_only + non_target_only + +# create a list that defines the condition for each evoked data +# this will be used to create the conditions column in the dataframe +conditions = ["target"] * len(target_only) + ["non-target"] * len(non_target_only) + +# finally add a column that defines the subject index +# this will be used to create the subject_index column in the dataframe +# we multiply the participant_ids by 2 to account for the two conditions +subject_index = list(participant_ids) * 2 + +# create the dataframe containing the evoked data, the condition and the subject index +df = pd.DataFrame( + { + "evoked": evokeds_conditions, + "condition": conditions, + "subject_index": subject_index, + } +) + +# %% run the cluster test function with formulaic input + +# we will use the new API that allows for Wilkinson style formulas +# the formula should be a string in Wilkinson notation + +# we want to test whether there is a significant difference between +# target and non-target stimuli in the post-stimulus window +# we will use a cluster-based permutation paired t-test for this + +# let's first define the formula based on Wilkinson notation +# we want to predict the evoked difference signal based on the subject +# the cluster test randomly permutes the subject label +# the 1 in the formula represents the intercept which is always included +# C is a categorical variable that will be dummy coded +formula = "evoked ~ condition" + +# run the new cluster test API and return the new cluster_result object +cluster_result = mne.stats.cluster_level.cluster_test( + df=df, formula=formula, within_id="subject_index" +) +# TODO: add n_permutations to cluster_result + +# print the lowest cluster p-value +print(f"The lowest cluster p-value is: {cluster_result.cluster_p_values.min()}") + +# note that we ran an exact test due to the small sample size +# (only 15 permutations) + +# %% plot the results + +# set up conditions dictionary for cluster plots +# this is necessary for plotting the evoked data and the cluster result on top +conditions_dict = {"target": target_only, "non-target": non_target_only} + +# finally let's plot the results using the ClusterResults class + +# we plot the cluster with the lowest p-value +cluster_result.plot_cluster_time_sensor(condition_labels=conditions_dict, ci=True) +# we can see that there is something going on around 400 ms +# with a stronger signal for target trials in right central-parietal channels + +# however the cluster is not significant which is unsurprising +# given the small sample size (only 5 subjects)