diff --git a/p2/quest/biber_kernelpca_affil.png b/p2/quest/affil_biber_kernelpca_affil.png similarity index 100% rename from p2/quest/biber_kernelpca_affil.png rename to p2/quest/affil_biber_kernelpca_affil.png diff --git a/p2/quest/neurobiber-pca.log b/p2/quest/neurobiber-pca.log index d8a9800..0bc7040 100644 --- a/p2/quest/neurobiber-pca.log +++ b/p2/quest/neurobiber-pca.log @@ -1,5 +1,9 @@ -starting the job at: Wed Sep 3 18:53:34 CDT 2025 +starting the job at: Thu Sep 4 10:09:58 CDT 2025 setting up the environment running the neurobiber labeling script +Number of PCs explaining 90% variance: 18 +Variance of each PCA component: [88.92832185 39.46471687 32.34601523 20.19544345 14.0083261 11.5837521 + 7.82584723 6.89064989 6.07988254 5.80726367 5.49782354 4.50587747 + 4.31482409 2.81997326 2.62989708 2.27205352 2.09396341 2.00076119] job finished, cleaning up -job pau at: Wed Sep 3 18:53:58 CDT 2025 +job pau at: Thu Sep 4 10:10:21 CDT 2025 diff --git a/p2/quest/parallel-mw-olmo-info-cat.log b/p2/quest/parallel-mw-olmo-info-cat.log index 3884ca9..3fd3851 100644 --- a/p2/quest/parallel-mw-olmo-info-cat.log +++ b/p2/quest/parallel-mw-olmo-info-cat.log @@ -1,8 +1,8 @@ -setting up the environment by loading in conda environment at Wed Sep 3 19:04:03 CDT 2025 -running the bertopic job at Wed Sep 3 19:04:03 CDT 2025 +setting up the environment by loading in conda environment at Thu Sep 4 10:04:55 CDT 2025 +running the bertopic job at Thu Sep 4 10:04:55 CDT 2025 ---------------------------------------- -srun job start: Wed Sep 3 19:04:03 CDT 2025 -Job ID: 3220869 +srun job start: Thu Sep 4 10:04:55 CDT 2025 +Job ID: 3272179 Username: nws8519 Queue: gengpu Account: p32852 @@ -11,16 +11,230 @@ The following variables are not guaranteed to be the same in the prologue and the job run script ---------------------------------------- -PATH (in prologue) : /home/nws8519/.conda/envs/olmo/bin:/software/miniconda3/4.12.0/condabin:/home/nws8519/.local/bin:/home/nws8519/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/usr/lpp/mmfs/bin:/hpc/usertools +PATH (in prologue) : /home/nws8519/.conda/envs/olmo/bin:/software/miniconda3/4.12.0/condabin:/home/nws8519/.local/bin:/home/nws8519/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/lpp/mmfs/bin:/hpc/usertools WORKDIR is: /home/nws8519 ---------------------------------------- -/home/nws8519/.conda/envs/olmo/bin/python3.11: can't open file '/gpfs/home/nws8519/git/mw-lifecycle-analysis/p2/quest/nnodes': [Errno 2] No such file or directory -/home/nws8519/.conda/envs/olmo/bin/python3.11: can't open file '/gpfs/home/nws8519/git/mw-lifecycle-analysis/p2/quest/nnodes': [Errno 2] No such file or directory -Traceback (most recent call last): - File "/home/nws8519/.conda/envs/olmo/bin/torchrun", line 8, in - sys.exit(main()) - ^^^^^^ - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper +W0904 10:05:10.900000 1845275 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] +W0904 10:05:10.900000 1845275 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] ***************************************** +W0904 10:05:10.900000 1845275 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0904 10:05:10.900000 1845275 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] ***************************************** +W0904 10:05:10.900000 1845276 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] +W0904 10:05:10.900000 1845276 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] ***************************************** +W0904 10:05:10.900000 1845276 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0904 10:05:10.900000 1845276 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] ***************************************** +W0904 10:05:10.906000 1400307 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] +W0904 10:05:10.906000 1400307 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] ***************************************** +W0904 10:05:10.906000 1400307 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0904 10:05:10.906000 1400307 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] ***************************************** +W0904 10:05:10.907000 1400308 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] +W0904 10:05:10.907000 1400308 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] ***************************************** +W0904 10:05:10.907000 1400308 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0904 10:05:10.907000 1400308 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] ***************************************** +/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py:117: DtypeWarning: Columns (21) have mixed types. Specify dtype option on import or set low_memory=False. + df = pd.read_csv("/home/nws8519/git/mw-lifecycle-analysis/p2/quest/072525_pp_biberplus_labels.csv") +/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py:117: DtypeWarning: Columns (21) have mixed types. Specify dtype option on import or set low_memory=False. + df = pd.read_csv("/home/nws8519/git/mw-lifecycle-analysis/p2/quest/072525_pp_biberplus_labels.csv") +[rank0]: Traceback (most recent call last): +[rank0]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 178, in +[rank0]: main() +[rank0]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 122, in main +[rank0]: dataset = SentenceDataset(comment_texts, comment_types, priming, typology, instructions) +[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 76, in __init__ +[rank0]: sentences = split_to_sentences(cleaned_comment) +[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 106, in split_to_sentences +[rank0]: return nltk.sent_tokenize(text) +[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/__init__.py", line 119, in sent_tokenize +[rank0]: tokenizer = _get_punkt_tokenizer(language) +[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/__init__.py", line 105, in _get_punkt_tokenizer +[rank0]: return PunktTokenizer(language) +[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/punkt.py", line 1744, in __init__ +[rank0]: self.load_lang(lang) +[rank0]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/punkt.py", line 1749, in load_lang +[rank0]: lang_dir = find(f"tokenizers/punkt_tab/{lang}/") +[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +[rank0]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/data.py", line 579, in find +[rank0]: raise LookupError(resource_not_found) +[rank0]: LookupError: +[rank0]: ********************************************************************** +[rank0]: Resource punkt_tab not found. +[rank0]: Please use the NLTK Downloader to obtain the resource: + +[rank0]: >>> import nltk +[rank0]: >>> nltk.download('punkt_tab') +[rank0]:  +[rank0]: For more information see: https://www.nltk.org/data.html + +[rank0]: Attempted to load tokenizers/punkt_tab/english/ + +[rank0]: Searched in: +[rank0]: - '/home/nws8519/nltk_data' +[rank0]: - '/home/nws8519/.conda/envs/olmo/nltk_data' +[rank0]: - '/home/nws8519/.conda/envs/olmo/share/nltk_data' +[rank0]: - '/home/nws8519/.conda/envs/olmo/lib/nltk_data' +[rank0]: - '/usr/share/nltk_data' +[rank0]: - '/usr/local/share/nltk_data' +[rank0]: - '/usr/lib/nltk_data' +[rank0]: - '/usr/local/lib/nltk_data' +[rank0]: ********************************************************************** + +[rank2]: Traceback (most recent call last): +[rank2]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 178, in +[rank2]: main() +[rank2]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 122, in main +[rank2]: dataset = SentenceDataset(comment_texts, comment_types, priming, typology, instructions) +[rank2]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +[rank2]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 76, in __init__ +[rank2]: sentences = split_to_sentences(cleaned_comment) +[rank2]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +[rank2]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 106, in split_to_sentences +[rank2]: return nltk.sent_tokenize(text) +[rank2]: ^^^^^^^^^^^^^^^^^^^^^^^^ +[rank2]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/__init__.py", line 119, in sent_tokenize +[rank2]: tokenizer = _get_punkt_tokenizer(language) +[rank2]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +[rank2]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/__init__.py", line 105, in _get_punkt_tokenizer +[rank2]: return PunktTokenizer(language) +[rank2]: ^^^^^^^^^^^^^^^^^^^^^^^^ +[rank2]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/punkt.py", line 1744, in __init__ +[rank2]: self.load_lang(lang) +[rank2]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/punkt.py", line 1749, in load_lang +[rank2]: lang_dir = find(f"tokenizers/punkt_tab/{lang}/") +[rank2]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +[rank2]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/data.py", line 579, in find +[rank2]: raise LookupError(resource_not_found) +[rank2]: LookupError: +[rank2]: ********************************************************************** +[rank2]: Resource punkt_tab not found. +[rank2]: Please use the NLTK Downloader to obtain the resource: + +[rank2]: >>> import nltk +[rank2]: >>> nltk.download('punkt_tab') +[rank2]:  +[rank2]: For more information see: https://www.nltk.org/data.html + +[rank2]: Attempted to load tokenizers/punkt_tab/english/ + +[rank2]: Searched in: +[rank2]: - '/home/nws8519/nltk_data' +[rank2]: - '/home/nws8519/.conda/envs/olmo/nltk_data' +[rank2]: - '/home/nws8519/.conda/envs/olmo/share/nltk_data' +[rank2]: - '/home/nws8519/.conda/envs/olmo/lib/nltk_data' +[rank2]: - '/usr/share/nltk_data' +[rank2]: - '/usr/local/share/nltk_data' +[rank2]: - '/usr/lib/nltk_data' +[rank2]: - '/usr/local/lib/nltk_data' +[rank2]: ********************************************************************** + +/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py:117: DtypeWarning: Columns (21) have mixed types. Specify dtype option on import or set low_memory=False. + df = pd.read_csv("/home/nws8519/git/mw-lifecycle-analysis/p2/quest/072525_pp_biberplus_labels.csv") +/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py:117: DtypeWarning: Columns (21) have mixed types. Specify dtype option on import or set low_memory=False. + df = pd.read_csv("/home/nws8519/git/mw-lifecycle-analysis/p2/quest/072525_pp_biberplus_labels.csv") +[rank1]: Traceback (most recent call last): +[rank1]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 178, in +[rank1]: main() +[rank1]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 122, in main +[rank1]: dataset = SentenceDataset(comment_texts, comment_types, priming, typology, instructions) +[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +[rank1]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 76, in __init__ +[rank1]: sentences = split_to_sentences(cleaned_comment) +[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +[rank1]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 106, in split_to_sentences +[rank1]: return nltk.sent_tokenize(text) +[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^ +[rank1]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/__init__.py", line 119, in sent_tokenize +[rank1]: tokenizer = _get_punkt_tokenizer(language) +[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +[rank1]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/__init__.py", line 105, in _get_punkt_tokenizer +[rank1]: return PunktTokenizer(language) +[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^ +[rank1]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/punkt.py", line 1744, in __init__ +[rank1]: self.load_lang(lang) +[rank1]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/punkt.py", line 1749, in load_lang +[rank1]: lang_dir = find(f"tokenizers/punkt_tab/{lang}/") +[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +[rank1]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/data.py", line 579, in find +[rank1]: raise LookupError(resource_not_found) +[rank1]: LookupError: +[rank1]: ********************************************************************** +[rank1]: Resource punkt_tab not found. +[rank1]: Please use the NLTK Downloader to obtain the resource: + +[rank1]: >>> import nltk +[rank1]: >>> nltk.download('punkt_tab') +[rank1]:  +[rank1]: For more information see: https://www.nltk.org/data.html + +[rank1]: Attempted to load tokenizers/punkt_tab/english/ + +[rank1]: Searched in: +[rank1]: - '/home/nws8519/nltk_data' +[rank1]: - '/home/nws8519/.conda/envs/olmo/nltk_data' +[rank1]: - '/home/nws8519/.conda/envs/olmo/share/nltk_data' +[rank1]: - '/home/nws8519/.conda/envs/olmo/lib/nltk_data' +[rank1]: - '/usr/share/nltk_data' +[rank1]: - '/usr/local/share/nltk_data' +[rank1]: - '/usr/lib/nltk_data' +[rank1]: - '/usr/local/lib/nltk_data' +[rank1]: ********************************************************************** + +[rank3]: Traceback (most recent call last): +[rank3]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 178, in +[rank3]: main() +[rank3]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 122, in main +[rank3]: dataset = SentenceDataset(comment_texts, comment_types, priming, typology, instructions) +[rank3]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +[rank3]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 76, in __init__ +[rank3]: sentences = split_to_sentences(cleaned_comment) +[rank3]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +[rank3]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 106, in split_to_sentences +[rank3]: return nltk.sent_tokenize(text) +[rank3]: ^^^^^^^^^^^^^^^^^^^^^^^^ +[rank3]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/__init__.py", line 119, in sent_tokenize +[rank3]: tokenizer = _get_punkt_tokenizer(language) +[rank3]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +[rank3]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/__init__.py", line 105, in _get_punkt_tokenizer +[rank3]: return PunktTokenizer(language) +[rank3]: ^^^^^^^^^^^^^^^^^^^^^^^^ +[rank3]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/punkt.py", line 1744, in __init__ +[rank3]: self.load_lang(lang) +[rank3]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/punkt.py", line 1749, in load_lang +[rank3]: lang_dir = find(f"tokenizers/punkt_tab/{lang}/") +[rank3]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +[rank3]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/data.py", line 579, in find +[rank3]: raise LookupError(resource_not_found) +[rank3]: LookupError: +[rank3]: ********************************************************************** +[rank3]: Resource punkt_tab not found. +[rank3]: Please use the NLTK Downloader to obtain the resource: + +[rank3]: >>> import nltk +[rank3]: >>> nltk.download('punkt_tab') +[rank3]:  +[rank3]: For more information see: https://www.nltk.org/data.html + +[rank3]: Attempted to load tokenizers/punkt_tab/english/ + +[rank3]: Searched in: +[rank3]: - '/home/nws8519/nltk_data' +[rank3]: - '/home/nws8519/.conda/envs/olmo/nltk_data' +[rank3]: - '/home/nws8519/.conda/envs/olmo/share/nltk_data' +[rank3]: - '/home/nws8519/.conda/envs/olmo/lib/nltk_data' +[rank3]: - '/usr/share/nltk_data' +[rank3]: - '/usr/local/share/nltk_data' +[rank3]: - '/usr/lib/nltk_data' +[rank3]: - '/usr/local/lib/nltk_data' +[rank3]: ********************************************************************** + +[rank2]:[W904 10:05:56.100290280 ProcessGroupNCCL.cpp:1476] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see https://pytorch.org/docs/stable/distributed.html#shutdown (function operator()) +[rank0]:[W904 10:05:56.107999460 ProcessGroupNCCL.cpp:1476] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see https://pytorch.org/docs/stable/distributed.html#shutdown (function operator()) +W0904 10:05:57.705000 1400307 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/api.py:900] Sending process 1400332 closing signal SIGTERM +W0904 10:05:57.720000 1400308 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/api.py:900] Sending process 1400334 closing signal SIGTERM +E0904 10:05:57.770000 1400307 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/api.py:874] failed (exitcode: 1) local_rank: 0 (pid: 1400331) of binary: /home/nws8519/.conda/envs/olmo/bin/python3.11 Traceback (most recent call last): File "/home/nws8519/.conda/envs/olmo/bin/torchrun", line 8, in sys.exit(main()) @@ -29,93 +243,6 @@ Traceback (most recent call last): return f(*args, **kwargs) ^^^^^^^^^^^^^^^^^^ File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 892, in main - return f(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^ - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 892, in main - run(args) - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 883, in run - run(args) - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 883, in run - elastic_launch( - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 139, in __call__ - elastic_launch( - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 139, in __call__ - return launch_agent(self._config, self._entrypoint, list(args)) - return launch_agent(self._config, self._entrypoint, list(args)) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 261, in launch_agent - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 261, in launch_agent - result = agent.run() - result = agent.run() - ^^^^^^^^^^^ - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/metrics/api.py", line 138, in wrapper - ^^^^^^^^^^^ - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/metrics/api.py", line 138, in wrapper - result = f(*args, **kwargs) - result = f(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^ - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/agent/server/api.py", line 711, in run - ^^^^^^^^^^^^^^^^^^ - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/agent/server/api.py", line 711, in run - result = self._invoke_run(role) - result = self._invoke_run(role) - ^^^^^^^^^^^^^^^^^^^^^^ - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/agent/server/api.py", line 864, in _invoke_run - ^^^^^^^^^^^^^^^^^^^^^^ - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/agent/server/api.py", line 864, in _invoke_run - self._initialize_workers(self._worker_group) - self._initialize_workers(self._worker_group) - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/metrics/api.py", line 138, in wrapper - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/metrics/api.py", line 138, in wrapper - result = f(*args, **kwargs) - result = f(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^ - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/agent/server/api.py", line 683, in _initialize_workers - ^^^^^^^^^^^^^^^^^^ - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/agent/server/api.py", line 683, in _initialize_workers - self._rendezvous(worker_group) - self._rendezvous(worker_group) - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/metrics/api.py", line 138, in wrapper - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/metrics/api.py", line 138, in wrapper - result = f(*args, **kwargs) - result = f(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^ - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/agent/server/api.py", line 500, in _rendezvous - ^^^^^^^^^^^^^^^^^^ - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/agent/server/api.py", line 500, in _rendezvous - rdzv_info = spec.rdzv_handler.next_rendezvous() - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/rendezvous/static_tcp_rendezvous.py", line 67, in next_rendezvous - self._store = TCPStore( # type: ignore[call-arg] - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -torch.distributed.DistNetworkError: The server socket has failed to listen on any local network address. port: 29500, useIpv6: false, code: -98, name: EADDRINUSE, message: address already in use - rdzv_info = spec.rdzv_handler.next_rendezvous() - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/rendezvous/static_tcp_rendezvous.py", line 67, in next_rendezvous - self._store = TCPStore( # type: ignore[call-arg] - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -torch.distributed.DistNetworkError: The server socket has failed to listen on any local network address. port: 29500, useIpv6: false, code: -98, name: EADDRINUSE, message: address already in use -E0903 19:04:19.236000 1488504 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/api.py:874] failed (exitcode: 2) local_rank: 0 (pid: 1488524) of binary: /home/nws8519/.conda/envs/olmo/bin/python3.11 -E0903 19:04:19.236000 2554912 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/api.py:874] failed (exitcode: 2) local_rank: 0 (pid: 2554950) of binary: /home/nws8519/.conda/envs/olmo/bin/python3.11 -Traceback (most recent call last): - File "/home/nws8519/.conda/envs/olmo/bin/torchrun", line 8, in - sys.exit(main()) - ^^^^^^ - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper -Traceback (most recent call last): - File "/home/nws8519/.conda/envs/olmo/bin/torchrun", line 8, in - return f(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^ - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 892, in main - sys.exit(main()) - ^^^^^^ - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper - return f(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^ - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 892, in main - run(args) - File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 883, in run run(args) File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 883, in run elastic_launch( @@ -126,20 +253,31 @@ Traceback (most recent call last): raise ChildFailedError( torch.distributed.elastic.multiprocessing.errors.ChildFailedError: ============================================================ -nnodes FAILED +/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py FAILED ------------------------------------------------------------ Failures: ------------------------------------------------------------ Root Cause (first observed failure): [0]: - time : 2025-09-03_19:04:19 - host : qgpu2013 + time : 2025-09-04_10:05:57 + host : qgpu0203 rank : 0 (local_rank: 0) - exitcode : 2 (pid: 1488524) + exitcode : 1 (pid: 1400331) error_file: traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html ============================================================ +E0904 10:05:57.885000 1400308 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/api.py:874] failed (exitcode: 1) local_rank: 0 (pid: 1400333) of binary: /home/nws8519/.conda/envs/olmo/bin/python3.11 +Traceback (most recent call last): + File "/home/nws8519/.conda/envs/olmo/bin/torchrun", line 8, in + sys.exit(main()) + ^^^^^^ + File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper + return f(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^ + File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 892, in main + run(args) + File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 883, in run elastic_launch( File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 139, in __call__ return launch_agent(self._config, self._entrypoint, list(args)) @@ -148,20 +286,56 @@ Root Cause (first observed failure): raise ChildFailedError( torch.distributed.elastic.multiprocessing.errors.ChildFailedError: ============================================================ -nnodes FAILED +/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py FAILED ------------------------------------------------------------ Failures: ------------------------------------------------------------ Root Cause (first observed failure): [0]: - time : 2025-09-03_19:04:19 - host : qgpu2014 - rank : 0 (local_rank: 0) - exitcode : 2 (pid: 2554950) + time : 2025-09-04_10:05:57 + host : qgpu0203 + rank : 2 (local_rank: 0) + exitcode : 1 (pid: 1400333) error_file: traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html ============================================================ -srun: error: qgpu2013: tasks 0-1: Exited with exit code 1 -srun: error: qgpu2014: tasks 2-3: Exited with exit code 1 -unsupervised olmo categorization pau at Wed Sep 3 19:04:19 CDT 2025 +Traceback (most recent call last): + File "/home/nws8519/.conda/envs/olmo/bin/torchrun", line 8, in + sys.exit(main()) + ^^^^^^ + File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper + return f(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^ + File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 892, in main + run(args) + File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 883, in run + elastic_launch( + File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 139, in __call__ + return launch_agent(self._config, self._entrypoint, list(args)) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 265, in launch_agent + if result.is_failed(): + ^^^^^^^^^^^^^^^^ +AttributeError: 'NoneType' object has no attribute 'is_failed' +Traceback (most recent call last): + File "/home/nws8519/.conda/envs/olmo/bin/torchrun", line 8, in + sys.exit(main()) + ^^^^^^ + File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper + return f(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^ + File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 892, in main + run(args) + File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 883, in run + elastic_launch( + File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 139, in __call__ + return launch_agent(self._config, self._entrypoint, list(args)) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 265, in launch_agent + if result.is_failed(): + ^^^^^^^^^^^^^^^^ +AttributeError: 'NoneType' object has no attribute 'is_failed' +srun: error: qgpu0203: tasks 2-3: Exited with exit code 1 +srun: error: qgpu0202: tasks 0-1: Exited with exit code 1 +unsupervised olmo categorization pau at Thu Sep 4 10:05:58 CDT 2025 diff --git a/p2/quest/python_scripts/__pycache__/olmo_parallel_cat.cpython-311.pyc b/p2/quest/python_scripts/__pycache__/olmo_parallel_cat.cpython-311.pyc new file mode 100644 index 0000000..c001f79 Binary files /dev/null and b/p2/quest/python_scripts/__pycache__/olmo_parallel_cat.cpython-311.pyc differ diff --git a/p2/quest/python_scripts/neurobiber_PCA.py b/p2/quest/python_scripts/neurobiber_PCA.py index 90978ae..9c9b5ec 100644 --- a/p2/quest/python_scripts/neurobiber_PCA.py +++ b/p2/quest/python_scripts/neurobiber_PCA.py @@ -20,12 +20,26 @@ if __name__ == "__main__": biber_vec_df = biber_vec_df[biber_vec_df['comment_type'] == 'task_description'] biber_vecs = format_df_data(biber_vec_df) #handoff to PCA model - pca = KernelPCA(n_components=2, kernel="rbf") - biber_vecs_pca = pca.fit_transform(biber_vecs) - + ''' + pca_trial = PCA() + biber_vecs_pca_trial = pca_trial.fit_transform(biber_vecs) + + explained_variance = pca_trial.explained_variance_ratio_ + cumulative_variance = np.cumsum(explained_variance) + + n_components = np.argmax(cumulative_variance >= 0.90) + 1 + print(f"Number of PCs explaining 90% variance: {n_components}") + ''' + pca = PCA(n_components=18) + biber_vecs_pca = pca.fit_transform(biber_vecs) + selected_axis = "source" + + component_variances = np.var(biber_vecs_pca, axis=0) + print("Variance of each PCA component:", component_variances) + #first looking at comment_type le = LabelEncoder() - colors = le.fit_transform(biber_vec_df['AuthorWMFAffil']) + colors = le.fit_transform(biber_vec_df[selected_axis]) plt.scatter(biber_vecs_pca[:, 0], biber_vecs_pca[:, 1], c=colors, edgecolor='none', alpha=0.5, cmap="viridis") @@ -38,16 +52,16 @@ if __name__ == "__main__": plot_df = pd.DataFrame({ "PC1": biber_vecs_pca[:, 0], "PC2": biber_vecs_pca[:, 1], - "AuthorWMFAffil": biber_vec_df["AuthorWMFAffil"].astype(str) + selected_axis: biber_vec_df[selected_axis].astype(str) }) plt.figure(figsize=(8,6)) sns.scatterplot( - data=plot_df, x="PC1", y="PC2", hue="AuthorWMFAffil", + data=plot_df, x="PC1", y="PC2", hue="source", palette="tab10", s=40, alpha=0.7, edgecolor=None ) plt.xlabel('component 1') plt.ylabel('component 2') - plt.legend(title='AuthorWMFAffil', bbox_to_anchor=(1.05, 1), loc=2) + plt.legend(title=selected_axis, bbox_to_anchor=(1.05, 1), loc=2) plt.tight_layout() - plt.savefig("biber_kernelpca_affil.png", dpi=300) + plt.savefig(f"{selected_axis}_090425_biber_kernelpca_affil.png", dpi=300) plt.show() diff --git a/p2/quest/python_scripts/olmo_parallel_cat.py b/p2/quest/python_scripts/olmo_parallel_cat.py index 5e95867..f20c6ce 100644 --- a/p2/quest/python_scripts/olmo_parallel_cat.py +++ b/p2/quest/python_scripts/olmo_parallel_cat.py @@ -1,7 +1,7 @@ import torch import torch.nn.functional as F from torch.utils.data import Dataset, DataLoader -from datautils import MyTrainDataset +#from utils import MyTrainDataset import torch.multiprocessing as mp import torch.distributed as dist @@ -84,7 +84,7 @@ class SentenceDataset(Dataset): return self.samples[idx] # ----------------- data handling functions -7 def preprocess_comment(raw_text): +def preprocess_comment(raw_text): # 1. replace code with CODE comment_text = re.sub(r'`[^`]+`', 'CODE', raw_text) # Inline code comment_text = re.sub(r'```[\s\S]+?```', 'CODE', comment_text) # Block code diff --git a/p2/quest/slurm_jobs/parallel_olmo_categorization.sh b/p2/quest/slurm_jobs/parallel_olmo_categorization.sh index 8b47c46..f39aba9 100644 --- a/p2/quest/slurm_jobs/parallel_olmo_categorization.sh +++ b/p2/quest/slurm_jobs/parallel_olmo_categorization.sh @@ -24,10 +24,10 @@ echo "running the bertopic job at $(date)" srun torchrun \ --nnodes 2 \ - --nproc 2 \ + --nproc-per-node 2 \ --rdzv_id $RANDOM \ --rdzv_backend c10d \ --rdzv_endpoint "$SLURMD_NODENAME:29502" \ - /home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/info_labeling.py 10000 100 + /home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py 10000 100 echo "unsupervised olmo categorization pau at $(date)" diff --git a/p2/quest/source_090425_biber_kernelpca_affil.png b/p2/quest/source_090425_biber_kernelpca_affil.png new file mode 100644 index 0000000..d4a67cc Binary files /dev/null and b/p2/quest/source_090425_biber_kernelpca_affil.png differ