145 lines
6.5 KiB
Python
145 lines
6.5 KiB
Python
"""
|
|
Configuration for reproducing Mwata-Velu et al. (2023)
|
|
"EEG-BCI Features Discrimination between Executed and Imagined Movements
|
|
Based on FastICA, Hjorth Parameters, and SVM"
|
|
Mathematics 2023, 11, 4409. DOI: 10.3390/math11214409
|
|
|
|
Dataset: PhysioNet EEG Motor Movement/Imagery Dataset (curated CSV format)
|
|
"""
|
|
|
|
from pathlib import Path
|
|
|
|
# =============================================================================
|
|
# Paths
|
|
# =============================================================================
|
|
DATA_DIR = Path("..\eegmmidb")
|
|
|
|
# =============================================================================
|
|
# Dataset parameters
|
|
# =============================================================================
|
|
SAMPLING_RATE = 160 # Hz
|
|
N_CHANNELS = 64
|
|
|
|
# Full 64-channel names (Sharbrough system, PhysioNet ordering)
|
|
CHANNEL_NAMES = [
|
|
'FC5', 'FC3', 'FC1', 'FCz', 'FC2', 'FC4', 'FC6',
|
|
'C5', 'C3', 'C1', 'Cz', 'C2', 'C4', 'C6',
|
|
'CP5', 'CP3', 'CP1', 'CPz', 'CP2', 'CP4', 'CP6',
|
|
'Fp1', 'Fpz', 'Fp2',
|
|
'AF7', 'AF3', 'AFz', 'AF4', 'AF8',
|
|
'F7', 'F5', 'F3', 'F1', 'Fz', 'F2', 'F4', 'F6', 'F8',
|
|
'FT7', 'FT8',
|
|
'T7', 'T8', 'T9', 'T10',
|
|
'TP7', 'TP8',
|
|
'P7', 'P5', 'P3', 'P1', 'Pz', 'P2', 'P4', 'P6', 'P8',
|
|
'PO7', 'PO3', 'POz', 'PO4', 'PO8',
|
|
'O1', 'Oz', 'O2',
|
|
'Iz',
|
|
]
|
|
|
|
# =============================================================================
|
|
# Channel selections (Section 3.2)
|
|
# =============================================================================
|
|
|
|
# 19 channels from the 10-20 system used for ICA decomposition (Section 3.2)
|
|
ICA_CHANNELS = [
|
|
'Fp1', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8',
|
|
'T7', 'C3', 'Cz', 'C4', 'T8',
|
|
'P7', 'P3', 'Pz', 'P4', 'P8', 'O1', 'O2',
|
|
]
|
|
|
|
# 9 "Selected_channels" for ICA energy concentration criterion (Algorithm 1, Step 7)
|
|
# These are the sensorimotor + frontal + parietal channels the paper evaluates
|
|
# energy concentration against.
|
|
SELECTED_CHANNELS = ['C3', 'Cz', 'C4', 'F3', 'Fz', 'F4', 'P3', 'Pz', 'P4']
|
|
|
|
# Channels used for Hjorth feature extraction (Section 3.5, Table 5)
|
|
# The paper's best results (Set 3) use C3, Cz, C4.
|
|
TARGET_CHANNELS = ['C3', 'Cz', 'C4']
|
|
|
|
# =============================================================================
|
|
# Task / run definitions
|
|
# =============================================================================
|
|
# NOTE ON NUMBERING: The curated CSV dataset uses a different run numbering
|
|
# scheme than the original PhysioNet EDF files. The mapping is:
|
|
#
|
|
# Curated CSV PhysioNet EDF Task
|
|
# ----------- ------------- ----
|
|
# Run 01 R03 Execute open/close left or right fist
|
|
# Run 02 R04 Imagine open/close left or right fist
|
|
# Run 03 R05 Execute open/close both fists or both feet
|
|
# Run 04 R06 Imagine open/close both fists or both feet
|
|
# Run 05 R07 Execute open/close left or right fist
|
|
# Run 06 R08 Imagine open/close left or right fist
|
|
# Run 07 R09 Execute open/close both fists or both feet
|
|
# Run 08 R10 Imagine open/close both fists or both feet
|
|
# Run 09 R11 Execute open/close left or right fist
|
|
# Run 10 R12 Imagine open/close left or right fist
|
|
# Run 11 R13 Execute open/close both fists or both feet
|
|
# Run 12 R14 Imagine open/close both fists or both feet
|
|
#
|
|
# The paper's Section 4 states twice that results correspond to R03, R04, R07,
|
|
# R08, R11, R12 (left/right fist only). This agrees with another statement
|
|
# that says they only use 6 of the 14 runs per subject. However, the sample
|
|
# counts (8652 total) require including all 12 task runs. Additionally, the
|
|
# paper also says "samples of the first 10 runs constituted the training set;
|
|
# those of the 11th and 12th, and 13th and 14th runs were used as the testing
|
|
# and validation sets, respectively". These statements contradict each other.
|
|
# We use the 6 runs that are listed twice: R03, R04, R07, R08, R11, R12.
|
|
|
|
EXECUTION_RUNS = [1, 5, 9] # R03, R07, R11
|
|
IMAGERY_RUNS = [2, 6, 10] # R04, R08, R12
|
|
TARGET_RUNS = EXECUTION_RUNS + IMAGERY_RUNS
|
|
|
|
# Annotation labels that correspond to T1/T2 events (active task periods).
|
|
# T0 (rest) is excluded. These codes come from the curated CSV annotation files.
|
|
ACTIVE_EVENT_LABELS = [2, 3, 5, 6, 8, 9, 11, 12]
|
|
|
|
# =============================================================================
|
|
# Sub-band definitions (Section 3.3)
|
|
# =============================================================================
|
|
SUB_BANDS = [
|
|
('theta', 4.0, 8.0),
|
|
('alpha', 8.0, 13.0),
|
|
('beta', 13.0, 30.0),
|
|
]
|
|
|
|
# =============================================================================
|
|
# ICA parameters (Section 3.4, Algorithm 1)
|
|
# =============================================================================
|
|
ICA_N_COMPONENTS = 19
|
|
ICA_ENERGY_THRESHOLD = 0.35
|
|
ICA_MAX_ITER = 500
|
|
ICA_TOL = 1e-4
|
|
|
|
# =============================================================================
|
|
# SVM parameters (Section 3.6, Figure 6)
|
|
# =============================================================================
|
|
SVM_C = 2 ** 13 # 8192
|
|
SVM_GAMMA = 2 ** 1 # 2
|
|
SVM_KERNEL = 'rbf'
|
|
|
|
# =============================================================================
|
|
# Evaluation
|
|
# =============================================================================
|
|
N_RUNS = 5 # Paper: "results were averaged by running the model five times"
|
|
RANDOM_SEEDS = [42, 123, 456, 789, 1024]
|
|
|
|
# =============================================================================
|
|
# ICA strategy (not specified in paper — this is a reproducibility variable)
|
|
# =============================================================================
|
|
# Options:
|
|
# 'per_run' — Fit ICA independently on each ~2-minute run
|
|
# 'per_subject' — Fit ICA once on all runs concatenated per subject
|
|
# 'global' — Fit ICA once on all training subjects concatenated
|
|
ICA_STRATEGY = 'per_subject'
|
|
|
|
# =============================================================================
|
|
# Method 2: Cross-Subject Split (Table 4)
|
|
# =============================================================================
|
|
# Note: The curated dataset already excludes the 6 problematic subjects
|
|
# (S088, S089, S092, S100, S104, S106), so we use consecutive IDs.
|
|
TRAIN_SUBJECTS = list(range(1, 84)) # Subjects 1-83
|
|
TEST_SUBJECTS = list(range(84, 94)) # Subjects 84-93
|
|
VAL_SUBJECTS = list(range(94, 104)) # Subjects 94-103
|