1
0
Files
Mwata-Velu_et_al_2023/config.py
2026-04-09 08:21:30 -07:00

145 lines
6.5 KiB
Python

"""
Configuration for reproducing Mwata-Velu et al. (2023)
"EEG-BCI Features Discrimination between Executed and Imagined Movements
Based on FastICA, Hjorth Parameters, and SVM"
Mathematics 2023, 11, 4409. DOI: 10.3390/math11214409
Dataset: PhysioNet EEG Motor Movement/Imagery Dataset (curated CSV format)
"""
from pathlib import Path
# =============================================================================
# Paths
# =============================================================================
DATA_DIR = Path("..\eegmmidb")
# =============================================================================
# Dataset parameters
# =============================================================================
SAMPLING_RATE = 160 # Hz
N_CHANNELS = 64
# Full 64-channel names (Sharbrough system, PhysioNet ordering)
CHANNEL_NAMES = [
'FC5', 'FC3', 'FC1', 'FCz', 'FC2', 'FC4', 'FC6',
'C5', 'C3', 'C1', 'Cz', 'C2', 'C4', 'C6',
'CP5', 'CP3', 'CP1', 'CPz', 'CP2', 'CP4', 'CP6',
'Fp1', 'Fpz', 'Fp2',
'AF7', 'AF3', 'AFz', 'AF4', 'AF8',
'F7', 'F5', 'F3', 'F1', 'Fz', 'F2', 'F4', 'F6', 'F8',
'FT7', 'FT8',
'T7', 'T8', 'T9', 'T10',
'TP7', 'TP8',
'P7', 'P5', 'P3', 'P1', 'Pz', 'P2', 'P4', 'P6', 'P8',
'PO7', 'PO3', 'POz', 'PO4', 'PO8',
'O1', 'Oz', 'O2',
'Iz',
]
# =============================================================================
# Channel selections (Section 3.2)
# =============================================================================
# 19 channels from the 10-20 system used for ICA decomposition (Section 3.2)
ICA_CHANNELS = [
'Fp1', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8',
'T7', 'C3', 'Cz', 'C4', 'T8',
'P7', 'P3', 'Pz', 'P4', 'P8', 'O1', 'O2',
]
# 9 "Selected_channels" for ICA energy concentration criterion (Algorithm 1, Step 7)
# These are the sensorimotor + frontal + parietal channels the paper evaluates
# energy concentration against.
SELECTED_CHANNELS = ['C3', 'Cz', 'C4', 'F3', 'Fz', 'F4', 'P3', 'Pz', 'P4']
# Channels used for Hjorth feature extraction (Section 3.5, Table 5)
# The paper's best results (Set 3) use C3, Cz, C4.
TARGET_CHANNELS = ['C3', 'Cz', 'C4']
# =============================================================================
# Task / run definitions
# =============================================================================
# NOTE ON NUMBERING: The curated CSV dataset uses a different run numbering
# scheme than the original PhysioNet EDF files. The mapping is:
#
# Curated CSV PhysioNet EDF Task
# ----------- ------------- ----
# Run 01 R03 Execute open/close left or right fist
# Run 02 R04 Imagine open/close left or right fist
# Run 03 R05 Execute open/close both fists or both feet
# Run 04 R06 Imagine open/close both fists or both feet
# Run 05 R07 Execute open/close left or right fist
# Run 06 R08 Imagine open/close left or right fist
# Run 07 R09 Execute open/close both fists or both feet
# Run 08 R10 Imagine open/close both fists or both feet
# Run 09 R11 Execute open/close left or right fist
# Run 10 R12 Imagine open/close left or right fist
# Run 11 R13 Execute open/close both fists or both feet
# Run 12 R14 Imagine open/close both fists or both feet
#
# The paper's Section 4 states twice that results correspond to R03, R04, R07,
# R08, R11, R12 (left/right fist only). This agrees with another statement
# that says they only use 6 of the 14 runs per subject. However, the sample
# counts (8652 total) require including all 12 task runs. Additionally, the
# paper also says "samples of the first 10 runs constituted the training set;
# those of the 11th and 12th, and 13th and 14th runs were used as the testing
# and validation sets, respectively". These statements contradict each other.
# We use the 6 runs that are listed twice: R03, R04, R07, R08, R11, R12.
EXECUTION_RUNS = [1, 5, 9] # R03, R07, R11
IMAGERY_RUNS = [2, 6, 10] # R04, R08, R12
TARGET_RUNS = EXECUTION_RUNS + IMAGERY_RUNS
# Annotation labels that correspond to T1/T2 events (active task periods).
# T0 (rest) is excluded. These codes come from the curated CSV annotation files.
ACTIVE_EVENT_LABELS = [2, 3, 5, 6, 8, 9, 11, 12]
# =============================================================================
# Sub-band definitions (Section 3.3)
# =============================================================================
SUB_BANDS = [
('theta', 4.0, 8.0),
('alpha', 8.0, 13.0),
('beta', 13.0, 30.0),
]
# =============================================================================
# ICA parameters (Section 3.4, Algorithm 1)
# =============================================================================
ICA_N_COMPONENTS = 19
ICA_ENERGY_THRESHOLD = 0.35
ICA_MAX_ITER = 500
ICA_TOL = 1e-4
# =============================================================================
# SVM parameters (Section 3.6, Figure 6)
# =============================================================================
SVM_C = 2 ** 13 # 8192
SVM_GAMMA = 2 ** 1 # 2
SVM_KERNEL = 'rbf'
# =============================================================================
# Evaluation
# =============================================================================
N_RUNS = 5 # Paper: "results were averaged by running the model five times"
RANDOM_SEEDS = [42, 123, 456, 789, 1024]
# =============================================================================
# ICA strategy (not specified in paper — this is a reproducibility variable)
# =============================================================================
# Options:
# 'per_run' — Fit ICA independently on each ~2-minute run
# 'per_subject' — Fit ICA once on all runs concatenated per subject
# 'global' — Fit ICA once on all training subjects concatenated
ICA_STRATEGY = 'per_subject'
# =============================================================================
# Method 2: Cross-Subject Split (Table 4)
# =============================================================================
# Note: The curated dataset already excludes the 6 problematic subjects
# (S088, S089, S092, S100, S104, S106), so we use consecutive IDs.
TRAIN_SUBJECTS = list(range(1, 84)) # Subjects 1-83
TEST_SUBJECTS = list(range(84, 94)) # Subjects 84-93
VAL_SUBJECTS = list(range(94, 104)) # Subjects 94-103