From 2f49da8b280bbc531b10557528c782de176def8a Mon Sep 17 00:00:00 2001
From: Bru <a.bruno@aluno.ufabc.edu.br>
Date: Thu, 24 Oct 2024 10:48:35 +0200
Subject: [PATCH 1/3] [MNT] Solving moabb and braindecode compatibility (#669)

* first try

* done

* done

* whats new file

* whats new file

* whats new file

* Easy fix with Sosulski2019 ;)

* [FIX] Applying suggestions from the revision

* [FIX] fixing Ofner2017 too!

* [FIX] fixing GrosseWentrup2009 too!

* [FIX] Solving Liu dataset!
---
 docs/source/whats_new.rst      |  4 ++-
 moabb/datasets/Zhou2016.py     |  6 +++-
 moabb/datasets/liu2024.py      | 11 ++++----
 moabb/datasets/mpi_mi.py       |  6 ++--
 moabb/datasets/physionet_mi.py | 11 ++++++--
 moabb/datasets/sosulski2019.py |  6 ++--
 moabb/datasets/upper_limb.py   |  7 +++--
 moabb/datasets/utils.py        | 50 ++++++++++++++++++++++++++++++++++
 8 files changed, 84 insertions(+), 17 deletions(-)

diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst
index d7f1398c2..134c5415b 100644
--- a/docs/source/whats_new.rst
+++ b/docs/source/whats_new.rst
@@ -24,7 +24,9 @@ Bugs
 - Fix Stieger2021 dataset bugs (:gh:`651` by `Martin Wimpff`_)
 - Unpinning major version Scikit-learn and numpy (:gh:`652` by `Bruno Aristimunha`_)
 - Replacing the func:`numpy.string_` to func:`numpy.bytes_` (:gh:`665` by `Bruno Aristimunha`_)
--  Fixing the set_download_dir that was not working when we tried to set the dir more than 10 times at the same time (:gh:`668` by `Bruno Aristimunha`_)
+- Fixing the set_download_dir that was not working when we tried to set the dir more than 10 times at the same time (:gh:`668` by `Bruno Aristimunha`_)
+- Creating stimulus channels in :class:`moabb.datasets.Zhou2016` and :class:`moabb.datasets.PhysionetMI` to allow braindecode compatibility (:gh:`669` by `Bruno Aristimunha`_)
+
 
 API changes
 ~~~~~~~~~~~
diff --git a/moabb/datasets/Zhou2016.py b/moabb/datasets/Zhou2016.py
index c4037cea9..fb7f07b04 100644
--- a/moabb/datasets/Zhou2016.py
+++ b/moabb/datasets/Zhou2016.py
@@ -14,6 +14,7 @@
 
 from .base import BaseDataset
 from .download import get_dataset_path
+from .utils import stim_channels_with_selected_ids
 
 
 DATA_PATH = "https://ndownloader.figshare.com/files/3662952"
@@ -88,6 +89,7 @@ def __init__(self):
             paradigm="imagery",
             doi="10.1371/journal.pone.0162657",
         )
+        self.events = dict(left_hand=1, right_hand=2, feet=3)
 
     def _get_single_subject_data(self, subject):
         """Return data for a single subject."""
@@ -105,7 +107,9 @@ def _get_single_subject_data(self, subject):
                 stim[stim == "2"] = "right_hand"
                 stim[stim == "3"] = "feet"
                 raw.annotations.description = stim
-                out[sess_key][run_key] = raw
+                out[sess_key][run_key] = stim_channels_with_selected_ids(
+                    raw, desired_event_id=self.events
+                )
                 out[sess_key][run_key].set_montage(make_standard_montage("standard_1005"))
         return out
 
diff --git a/moabb/datasets/liu2024.py b/moabb/datasets/liu2024.py
index 82dbf4af6..07e0738d8 100644
--- a/moabb/datasets/liu2024.py
+++ b/moabb/datasets/liu2024.py
@@ -14,6 +14,7 @@
 
 from moabb.datasets import download as dl
 from moabb.datasets.base import BaseDataset
+from moabb.datasets.utils import stim_channels_with_selected_ids
 
 
 # Link to the raw data
@@ -77,15 +78,15 @@ class Liu2024(BaseDataset):
     def __init__(self, break_events=False, instr_events=False):
         self.break_events = break_events
         self.instr_events = instr_events
-        events = {"left_hand": 1, "right_hand": 2}
+        self.events = {"left_hand": 1, "right_hand": 2}
         if break_events:
-            events["instr"] = 3
+            self.events["instr"] = 3
         if instr_events:
-            events["break"] = 4
+            self.events["break"] = 4
         super().__init__(
             subjects=list(range(1, 50 + 1)),
             sessions_per_subject=1,
-            events=events,
+            events=self.events,
             code="Liu2024",
             interval=(2, 6),
             paradigm="imagery",
@@ -277,7 +278,7 @@ def _get_single_subject_data(self, subject):
         # Loading dataset
         raw = raw.load_data(verbose=False)
         # There is only one session
-        sessions = {"0": {"0": raw}}
+        sessions = {"0": {"0": stim_channels_with_selected_ids(raw, self.event_id)}}
 
         return sessions
 
diff --git a/moabb/datasets/mpi_mi.py b/moabb/datasets/mpi_mi.py
index 06e8bd366..9abff905b 100644
--- a/moabb/datasets/mpi_mi.py
+++ b/moabb/datasets/mpi_mi.py
@@ -5,6 +5,7 @@
 
 from moabb.datasets import download as dl
 from moabb.datasets.base import BaseDataset
+from moabb.datasets.utils import stim_channels_with_selected_ids
 from moabb.utils import depreciated_alias
 
 
@@ -56,10 +57,11 @@ class GrosseWentrup2009(BaseDataset):
     """
 
     def __init__(self):
+        self.events_id = dict(right_hand=2, left_hand=1)
         super().__init__(
             subjects=list(range(1, 11)),
             sessions_per_subject=1,
-            events=dict(right_hand=2, left_hand=1),
+            events=self.events_id,
             code="GrosseWentrup2009",
             interval=[0, 7],
             paradigm="imagery",
@@ -76,7 +78,7 @@ def _get_single_subject_data(self, subject):
         stim[stim == "20"] = "right_hand"
         stim[stim == "10"] = "left_hand"
         raw.annotations.description = stim
-        return {"0": {"0": raw}}
+        return {"0": {"0": stim_channels_with_selected_ids(raw, self.event_id)}}
 
     def data_path(
         self, subject, path=None, force_update=False, update_path=None, verbose=None
diff --git a/moabb/datasets/physionet_mi.py b/moabb/datasets/physionet_mi.py
index 3367c9cf3..8f99dfad2 100644
--- a/moabb/datasets/physionet_mi.py
+++ b/moabb/datasets/physionet_mi.py
@@ -6,6 +6,7 @@
 
 from moabb.datasets.base import BaseDataset
 from moabb.datasets.download import data_dl, get_dataset_path
+from moabb.datasets.utils import stim_channels_with_selected_ids
 
 
 BASE_URL = "https://physionet.org/files/eegmmidb/1.0.0/"
@@ -79,7 +80,7 @@ def __init__(self, imagined=True, executed=False):
             paradigm="imagery",
             doi="10.1109/TBME.2004.827072",
         )
-
+        self.events = dict(left_hand=2, right_hand=3, feet=5, hands=4, rest=1)
         self.imagined = imagined
         self.executed = executed
         self.feet_runs = []
@@ -123,7 +124,9 @@ def _get_single_subject_data(self, subject):
             stim[stim == "T1"] = "left_hand"
             stim[stim == "T2"] = "right_hand"
             raw.annotations.description = stim
-            data[str(idx)] = raw
+            data[str(idx)] = stim_channels_with_selected_ids(
+                raw, desired_event_id=self.events
+            )
             idx += 1
 
         # feet runs
@@ -136,7 +139,9 @@ def _get_single_subject_data(self, subject):
             stim[stim == "T1"] = "hands"
             stim[stim == "T2"] = "feet"
             raw.annotations.description = stim
-            data[str(idx)] = raw
+            data[str(idx)] = stim_channels_with_selected_ids(
+                raw, desired_event_id=self.events
+            )
             idx += 1
 
         return {"0": data}
diff --git a/moabb/datasets/sosulski2019.py b/moabb/datasets/sosulski2019.py
index ade789445..2fc20db7e 100644
--- a/moabb/datasets/sosulski2019.py
+++ b/moabb/datasets/sosulski2019.py
@@ -7,6 +7,7 @@
 
 from moabb.datasets import download as dl
 from moabb.datasets.base import BaseDataset
+from moabb.datasets.utils import stim_channels_with_selected_ids
 
 
 SPOT_PILOT_P300_URL = (
@@ -95,12 +96,13 @@ def __init__(
         self.n_channels = 31
         self.use_soas_as_sessions = use_soas_as_sessions
         self.description_map = {"Stimulus/S 21": "Target", "Stimulus/S  1": "NonTarget"}
+        self.events = dict(Target=21, NonTarget=1)
         code = "Sosulski2019"
         interval = [-0.2, 1] if interval is None else interval
         super().__init__(
             subjects=list(range(1, 13 + 1)),
             sessions_per_subject=1,
-            events=dict(Target=21, NonTarget=1),
+            events=self.events,
             code=code,
             interval=interval,
             paradigm="p300",
@@ -133,7 +135,7 @@ def _get_single_run_data(self, file_path):
         if self.reject_non_iid:
             raw.set_annotations(raw.annotations[7:85])  # non-iid rejection
         raw.annotations.rename(self.description_map)
-        return raw
+        return stim_channels_with_selected_ids(raw, self.events)
 
     def _get_single_subject_data(self, subject):
         """Return data for a single subject."""
diff --git a/moabb/datasets/upper_limb.py b/moabb/datasets/upper_limb.py
index f10db8ccd..e8095de81 100644
--- a/moabb/datasets/upper_limb.py
+++ b/moabb/datasets/upper_limb.py
@@ -3,6 +3,7 @@
 from mne.io import read_raw_gdf
 
 from moabb.datasets.base import BaseDataset
+from moabb.datasets.utils import stim_channels_with_selected_ids
 
 from . import download as dl
 
@@ -58,7 +59,7 @@ class Ofner2017(BaseDataset):
     def __init__(self, imagined=True, executed=False):
         self.imagined = imagined
         self.executed = executed
-        event_id = {
+        self.event_id = {
             "right_elbow_flexion": 1536,
             "right_elbow_extension": 1537,
             "right_supination": 1538,
@@ -72,7 +73,7 @@ def __init__(self, imagined=True, executed=False):
         super().__init__(
             subjects=list(range(1, 16)),
             sessions_per_subject=n_sessions,
-            events=event_id,
+            events=self.event_id,
             code="Ofner2017",
             interval=[0, 3],  # according to paper 2-5
             paradigm="imagery",
@@ -114,7 +115,7 @@ def _get_single_subject_data(self, subject):
                 stim[stim == "1541"] = "right_hand_open"
                 stim[stim == "1542"] = "rest"
                 raw.annotations.description = stim
-                data[str(ii)] = raw
+                data[str(ii)] = stim_channels_with_selected_ids(raw, self.event_id)
 
             out[session_name] = data
         return out
diff --git a/moabb/datasets/utils.py b/moabb/datasets/utils.py
index f0e280f94..c0f198537 100644
--- a/moabb/datasets/utils.py
+++ b/moabb/datasets/utils.py
@@ -2,6 +2,7 @@
 
 import inspect
 
+import mne
 import numpy as np
 from mne import create_info
 from mne.io import RawArray
@@ -273,3 +274,52 @@ def add_stim_channel_epoch(
     )
     raw = raw.add_channels([RawArray(data=stim_chan, info=info, verbose=False)])
     return raw
+
+
+def stim_channels_with_selected_ids(
+    raw: mne.io.BaseRaw, desired_event_id: dict, stim_channel_name="STIM"
+):
+    """
+    Add a stimulus channel with filtering and renaming based on events_ids.
+
+    Parameters
+    ----------
+    raw: mne.Raw
+        The raw object to add the stimulus channel to.
+    desired_event_id: dict
+        Dictionary with events
+    """
+
+    # Get events using the consistent event_id mapping
+    events, _ = mne.events_from_annotations(raw, event_id=desired_event_id)
+
+    # Filter the events array to include only desired events
+    desired_event_ids = list(desired_event_id.values())
+    filtered_events = events[np.isin(events[:, 2], desired_event_ids)]
+
+    # Create annotations from filtered events using the inverted mapping
+    event_desc = {v: k for k, v in desired_event_id.items()}
+    annot_from_events = mne.annotations_from_events(
+        events=filtered_events,
+        event_desc=event_desc,
+        sfreq=raw.info["sfreq"],
+        orig_time=raw.info["meas_date"],
+    )
+    raw.set_annotations(annot_from_events)
+
+    # Create the stim channel data array
+    stim_channs = np.zeros((1, raw.n_times))
+    for event in filtered_events:
+        sample_index = event[0]
+        event_code = event[2]  # Consistent event IDs
+        stim_channs[0, sample_index] = event_code
+
+    # Create the stim channel and add it to raw
+
+    stim_info = mne.create_info(
+        [stim_channel_name], sfreq=raw.info["sfreq"], ch_types=["stim"]
+    )
+    stim_raw = mne.io.RawArray(stim_channs, stim_info, verbose=False)
+    raw_with_stim = raw.copy().add_channels([stim_raw], force_update_info=True)
+
+    return raw_with_stim

From d2edb29a22c8621fa5e4643b682835cb44432b1d Mon Sep 17 00:00:00 2001
From: gcattan <gcattan@hotmail.fr>
Date: Wed, 6 Nov 2024 15:02:19 +0100
Subject: [PATCH 2/3] Update version of pyRiemann to 0.7 (#671)

* Update version of pyRiemann to 0.7

Signed-off-by: gcattan <gcattan@hotmail.fr>

* Update whats_new.rst

---------

Signed-off-by: gcattan <gcattan@hotmail.fr>
---
 docs/source/whats_new.rst | 2 ++
 pyproject.toml            | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst
index 134c5415b..4d4d3d957 100644
--- a/docs/source/whats_new.rst
+++ b/docs/source/whats_new.rst
@@ -18,6 +18,8 @@ Develop branch
 Enhancements
 ~~~~~~~~~~~~
 
+- Update version of pyRiemann to 0.7 (:gh:`671` by `Gregoire Cattan`_)
+
 Bugs
 ~~~~
 
diff --git a/pyproject.toml b/pyproject.toml
index 61a9b4595..07da17eb0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,7 +21,7 @@ h5py = "^3.10.0"
 scikit-learn = ">=1.4.2"
 matplotlib = "^3.6.2"
 seaborn = "^0.12.1"
-pyriemann = "^0.6"
+pyriemann = "^0.7"
 PyYAML = "^6.0"
 pooch = "^1.6.0"
 requests = "^2.28.1"

From e52b71d54c6f4ea3daebdb219c037634de692761 Mon Sep 17 00:00:00 2001
From: Pierre Guetschel <25532709+PierreGtch@users.noreply.github.com>
Date: Tue, 12 Nov 2024 10:23:40 +0100
Subject: [PATCH 3/3] Add columns definitions in the datasets doc (#672)

* Update datasets summary

* Rename #Trials column in MI table for consistency

* Update whatsnew
---
 docs/source/dataset_summary.rst    | 40 ++++++++++++++++++++++++++++++
 docs/source/whats_new.rst          |  1 +
 moabb/datasets/summary_imagery.csv |  2 +-
 3 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/docs/source/dataset_summary.rst b/docs/source/dataset_summary.rst
index 8b2908573..96f748d57 100644
--- a/docs/source/dataset_summary.rst
+++ b/docs/source/dataset_summary.rst
@@ -17,10 +17,29 @@ there is a tutorial explaining how to do so, and we welcome warmly any new contr
 
 See also `Datasets-Support <https://github.com/NeuroTechX/moabb/wiki/Datasets-Support>`__ for supplementary
 detail on datasets (class name, size, licence, etc.)
+Dataset, #Subj, #Chan, #Classes, #Trials, Trial length, Freq, #Session, #Runs, Total_trials, PapersWithCode leaderboard
+
+Columns definitions:
+* **Dataset** is the name of the dataset.
+* **#Subj** is the number of subjects.
+* **#Chan** is the number of EEG channels.
+* **#Trials / class** is the number of repetitions performed by one subject for each class. This number is computed using only the first subject of each dataset. *The definitions of a **class** and of a **trial** depend on the paradigm used (see sections below)*.
+* **Trial length** is the duration of trial in seconds.
+* **Total_trials** is the total number of trials in the dataset (all subjects and classes together).
+* **Freq** is the sampling frequency of the raw data.
+* **#Session** is the number of sessions per subject. Different sessions are often recorded on different days.
+* **#Runs** is the number of runs per session. A run is a continuous recording of the EEG data. Often, the different runs of a given session are recorded without removing the EEG cap in between.
+* **PapersWithCode leaderboard** is the link to the dataset on the PapersWithCode leaderboard.
 
 Motor Imagery
 ======================
 
+Motor Imagery is a BCI paradigm where the subject imagines performing movements. Each movement is associated with a different command to build an application.
+
+Motor Imagery-specific definitions:
+* **#Classes** is the number of different imagery tasks.
+* **Trial** is one repetition of the imagery task.
+
 .. csv-table::
    :file: ../build/summary_imagery.csv
    :header-rows: 1
@@ -30,6 +49,12 @@ Motor Imagery
 P300/ERP
 ======================
 
+ERP (Event-Related Potential) is a BCI paradigm where the subject is presented with a stimulus and the EEG response is recorded. The P300 is a positive peak in the EEG signal that occurs around 300 ms after the stimulus.
+
+P300-specific definitions:
+* **A trial** is one flash.
+* **The classes** are binary: a trial is **target** if the key on which the subject focuses is flashed and **non-target** otherwise.
+
 .. csv-table::
    :file: ../build/summary_p300.csv
    :header-rows: 1
@@ -39,6 +64,13 @@ P300/ERP
 SSVEP
 ======================
 
+SSVEP (Steady-State Visually Evoked Potential) is a BCI paradigm where the subject is presented with flickering stimuli. The EEG signal is modulated at the same frequency as the stimulus. Each stimulus is flickering at a different frequency.
+
+SSVEP-specific definitions:
+* **#Classes** is the number of different stimulation frequencies.
+* **A trial** is one symbol selection. This includes multiple flashes.
+
+
 .. csv-table::
    :file: ../build/summary_ssvep.csv
    :header-rows: 1
@@ -58,6 +90,14 @@ Hornero, R. (2021). Brain–computer interfaces based on code-modulated visual e
 potentials (c-VEP): A literature review. Journal of Neural Engineering, 18(6), 061002.
 DOI: https://doi.org/10.1088/1741-2552/ac38cf
 
+c-VEP-specific definitions:
+* **A trial** is one symbol selection. This includes multiple flashes.
+* **#Trial classes** is the number of different symbols.
+* **#Epoch classes** is the number of possible intensities for the flashes  (for a visual cVEP paradigm). Typically, there are only two intensities: on and off.
+* **#Epochs / class** the number of flashes per intensity in each session.
+* **Codes** is the type of code used in the experiment.
+* **Presentation rate** is the rate at which the codes are presented.
+
 .. csv-table::
    :file: ../build/summary_cvep.csv
    :header-rows: 1
diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst
index 4d4d3d957..b907ce8fd 100644
--- a/docs/source/whats_new.rst
+++ b/docs/source/whats_new.rst
@@ -19,6 +19,7 @@ Enhancements
 ~~~~~~~~~~~~
 
 - Update version of pyRiemann to 0.7 (:gh:`671` by `Gregoire Cattan`_)
+- Add columns definitions in the datasets doc (:gh:`672` by `Pierre Guetschel`_)
 
 Bugs
 ~~~~
diff --git a/moabb/datasets/summary_imagery.csv b/moabb/datasets/summary_imagery.csv
index 389615cc4..9e923e15d 100644
--- a/moabb/datasets/summary_imagery.csv
+++ b/moabb/datasets/summary_imagery.csv
@@ -1,4 +1,4 @@
-Dataset, #Subj, #Chan, #Classes, #Trials, Trial length, Freq, #Session, #Runs, Total_trials, PapersWithCode leaderboard
+Dataset, #Subj, #Chan, #Classes, #Trials / class, Trial length, Freq, #Session, #Runs, Total_trials, PapersWithCode leaderboard
 AlexMI,8,16,3,20,3s,512Hz,1,1,480,https://paperswithcode.com/dataset/alexandremotorimagery-moabb
 BNCI2014_001,9,22,4,144,4s,250Hz,2,6,62208,https://paperswithcode.com/dataset/bnci2014-001-moabb-1
 BNCI2014_002,14,15,2,80,5s,512Hz,1,8,17920,https://paperswithcode.com/dataset/bnci2014-002-moabb-1