updating with new pca results
This commit is contained in:
parent
a3c1a48dc7
commit
809e858bbf
Before Width: | Height: | Size: 1.4 MiB After Width: | Height: | Size: 1.4 MiB |
@ -1,5 +1,9 @@
|
||||
starting the job at: Wed Sep 3 18:53:34 CDT 2025
|
||||
starting the job at: Thu Sep 4 10:09:58 CDT 2025
|
||||
setting up the environment
|
||||
running the neurobiber labeling script
|
||||
Number of PCs explaining 90% variance: 18
|
||||
Variance of each PCA component: [88.92832185 39.46471687 32.34601523 20.19544345 14.0083261 11.5837521
|
||||
7.82584723 6.89064989 6.07988254 5.80726367 5.49782354 4.50587747
|
||||
4.31482409 2.81997326 2.62989708 2.27205352 2.09396341 2.00076119]
|
||||
job finished, cleaning up
|
||||
job pau at: Wed Sep 3 18:53:58 CDT 2025
|
||||
job pau at: Thu Sep 4 10:10:21 CDT 2025
|
||||
|
@ -1,8 +1,8 @@
|
||||
setting up the environment by loading in conda environment at Wed Sep 3 19:04:03 CDT 2025
|
||||
running the bertopic job at Wed Sep 3 19:04:03 CDT 2025
|
||||
setting up the environment by loading in conda environment at Thu Sep 4 10:04:55 CDT 2025
|
||||
running the bertopic job at Thu Sep 4 10:04:55 CDT 2025
|
||||
----------------------------------------
|
||||
srun job start: Wed Sep 3 19:04:03 CDT 2025
|
||||
Job ID: 3220869
|
||||
srun job start: Thu Sep 4 10:04:55 CDT 2025
|
||||
Job ID: 3272179
|
||||
Username: nws8519
|
||||
Queue: gengpu
|
||||
Account: p32852
|
||||
@ -11,16 +11,230 @@ The following variables are not
|
||||
guaranteed to be the same in the
|
||||
prologue and the job run script
|
||||
----------------------------------------
|
||||
PATH (in prologue) : /home/nws8519/.conda/envs/olmo/bin:/software/miniconda3/4.12.0/condabin:/home/nws8519/.local/bin:/home/nws8519/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/usr/lpp/mmfs/bin:/hpc/usertools
|
||||
PATH (in prologue) : /home/nws8519/.conda/envs/olmo/bin:/software/miniconda3/4.12.0/condabin:/home/nws8519/.local/bin:/home/nws8519/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/usr/lpp/mmfs/bin:/hpc/usertools
|
||||
WORKDIR is: /home/nws8519
|
||||
----------------------------------------
|
||||
/home/nws8519/.conda/envs/olmo/bin/python3.11: can't open file '/gpfs/home/nws8519/git/mw-lifecycle-analysis/p2/quest/nnodes': [Errno 2] No such file or directory
|
||||
/home/nws8519/.conda/envs/olmo/bin/python3.11: can't open file '/gpfs/home/nws8519/git/mw-lifecycle-analysis/p2/quest/nnodes': [Errno 2] No such file or directory
|
||||
Traceback (most recent call last):
|
||||
File "/home/nws8519/.conda/envs/olmo/bin/torchrun", line 8, in <module>
|
||||
sys.exit(main())
|
||||
^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper
|
||||
W0904 10:05:10.900000 1845275 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766]
|
||||
W0904 10:05:10.900000 1845275 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] *****************************************
|
||||
W0904 10:05:10.900000 1845275 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
||||
W0904 10:05:10.900000 1845275 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] *****************************************
|
||||
W0904 10:05:10.900000 1845276 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766]
|
||||
W0904 10:05:10.900000 1845276 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] *****************************************
|
||||
W0904 10:05:10.900000 1845276 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
||||
W0904 10:05:10.900000 1845276 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] *****************************************
|
||||
W0904 10:05:10.906000 1400307 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766]
|
||||
W0904 10:05:10.906000 1400307 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] *****************************************
|
||||
W0904 10:05:10.906000 1400307 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
||||
W0904 10:05:10.906000 1400307 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] *****************************************
|
||||
W0904 10:05:10.907000 1400308 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766]
|
||||
W0904 10:05:10.907000 1400308 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] *****************************************
|
||||
W0904 10:05:10.907000 1400308 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
||||
W0904 10:05:10.907000 1400308 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py:766] *****************************************
|
||||
/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py:117: DtypeWarning: Columns (21) have mixed types. Specify dtype option on import or set low_memory=False.
|
||||
df = pd.read_csv("/home/nws8519/git/mw-lifecycle-analysis/p2/quest/072525_pp_biberplus_labels.csv")
|
||||
/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py:117: DtypeWarning: Columns (21) have mixed types. Specify dtype option on import or set low_memory=False.
|
||||
df = pd.read_csv("/home/nws8519/git/mw-lifecycle-analysis/p2/quest/072525_pp_biberplus_labels.csv")
|
||||
[rank0]: Traceback (most recent call last):
|
||||
[rank0]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 178, in <module>
|
||||
[rank0]: main()
|
||||
[rank0]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 122, in main
|
||||
[rank0]: dataset = SentenceDataset(comment_texts, comment_types, priming, typology, instructions)
|
||||
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank0]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 76, in __init__
|
||||
[rank0]: sentences = split_to_sentences(cleaned_comment)
|
||||
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank0]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 106, in split_to_sentences
|
||||
[rank0]: return nltk.sent_tokenize(text)
|
||||
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank0]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/__init__.py", line 119, in sent_tokenize
|
||||
[rank0]: tokenizer = _get_punkt_tokenizer(language)
|
||||
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank0]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/__init__.py", line 105, in _get_punkt_tokenizer
|
||||
[rank0]: return PunktTokenizer(language)
|
||||
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank0]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/punkt.py", line 1744, in __init__
|
||||
[rank0]: self.load_lang(lang)
|
||||
[rank0]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/punkt.py", line 1749, in load_lang
|
||||
[rank0]: lang_dir = find(f"tokenizers/punkt_tab/{lang}/")
|
||||
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank0]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/data.py", line 579, in find
|
||||
[rank0]: raise LookupError(resource_not_found)
|
||||
[rank0]: LookupError:
|
||||
[rank0]: **********************************************************************
|
||||
[rank0]: Resource [93mpunkt_tab[0m not found.
|
||||
[rank0]: Please use the NLTK Downloader to obtain the resource:
|
||||
|
||||
[rank0]: [31m>>> import nltk
|
||||
[rank0]: >>> nltk.download('punkt_tab')
|
||||
[rank0]: [0m
|
||||
[rank0]: For more information see: https://www.nltk.org/data.html
|
||||
|
||||
[rank0]: Attempted to load [93mtokenizers/punkt_tab/english/[0m
|
||||
|
||||
[rank0]: Searched in:
|
||||
[rank0]: - '/home/nws8519/nltk_data'
|
||||
[rank0]: - '/home/nws8519/.conda/envs/olmo/nltk_data'
|
||||
[rank0]: - '/home/nws8519/.conda/envs/olmo/share/nltk_data'
|
||||
[rank0]: - '/home/nws8519/.conda/envs/olmo/lib/nltk_data'
|
||||
[rank0]: - '/usr/share/nltk_data'
|
||||
[rank0]: - '/usr/local/share/nltk_data'
|
||||
[rank0]: - '/usr/lib/nltk_data'
|
||||
[rank0]: - '/usr/local/lib/nltk_data'
|
||||
[rank0]: **********************************************************************
|
||||
|
||||
[rank2]: Traceback (most recent call last):
|
||||
[rank2]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 178, in <module>
|
||||
[rank2]: main()
|
||||
[rank2]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 122, in main
|
||||
[rank2]: dataset = SentenceDataset(comment_texts, comment_types, priming, typology, instructions)
|
||||
[rank2]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank2]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 76, in __init__
|
||||
[rank2]: sentences = split_to_sentences(cleaned_comment)
|
||||
[rank2]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank2]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 106, in split_to_sentences
|
||||
[rank2]: return nltk.sent_tokenize(text)
|
||||
[rank2]: ^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank2]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/__init__.py", line 119, in sent_tokenize
|
||||
[rank2]: tokenizer = _get_punkt_tokenizer(language)
|
||||
[rank2]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank2]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/__init__.py", line 105, in _get_punkt_tokenizer
|
||||
[rank2]: return PunktTokenizer(language)
|
||||
[rank2]: ^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank2]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/punkt.py", line 1744, in __init__
|
||||
[rank2]: self.load_lang(lang)
|
||||
[rank2]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/punkt.py", line 1749, in load_lang
|
||||
[rank2]: lang_dir = find(f"tokenizers/punkt_tab/{lang}/")
|
||||
[rank2]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank2]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/data.py", line 579, in find
|
||||
[rank2]: raise LookupError(resource_not_found)
|
||||
[rank2]: LookupError:
|
||||
[rank2]: **********************************************************************
|
||||
[rank2]: Resource [93mpunkt_tab[0m not found.
|
||||
[rank2]: Please use the NLTK Downloader to obtain the resource:
|
||||
|
||||
[rank2]: [31m>>> import nltk
|
||||
[rank2]: >>> nltk.download('punkt_tab')
|
||||
[rank2]: [0m
|
||||
[rank2]: For more information see: https://www.nltk.org/data.html
|
||||
|
||||
[rank2]: Attempted to load [93mtokenizers/punkt_tab/english/[0m
|
||||
|
||||
[rank2]: Searched in:
|
||||
[rank2]: - '/home/nws8519/nltk_data'
|
||||
[rank2]: - '/home/nws8519/.conda/envs/olmo/nltk_data'
|
||||
[rank2]: - '/home/nws8519/.conda/envs/olmo/share/nltk_data'
|
||||
[rank2]: - '/home/nws8519/.conda/envs/olmo/lib/nltk_data'
|
||||
[rank2]: - '/usr/share/nltk_data'
|
||||
[rank2]: - '/usr/local/share/nltk_data'
|
||||
[rank2]: - '/usr/lib/nltk_data'
|
||||
[rank2]: - '/usr/local/lib/nltk_data'
|
||||
[rank2]: **********************************************************************
|
||||
|
||||
/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py:117: DtypeWarning: Columns (21) have mixed types. Specify dtype option on import or set low_memory=False.
|
||||
df = pd.read_csv("/home/nws8519/git/mw-lifecycle-analysis/p2/quest/072525_pp_biberplus_labels.csv")
|
||||
/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py:117: DtypeWarning: Columns (21) have mixed types. Specify dtype option on import or set low_memory=False.
|
||||
df = pd.read_csv("/home/nws8519/git/mw-lifecycle-analysis/p2/quest/072525_pp_biberplus_labels.csv")
|
||||
[rank1]: Traceback (most recent call last):
|
||||
[rank1]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 178, in <module>
|
||||
[rank1]: main()
|
||||
[rank1]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 122, in main
|
||||
[rank1]: dataset = SentenceDataset(comment_texts, comment_types, priming, typology, instructions)
|
||||
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank1]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 76, in __init__
|
||||
[rank1]: sentences = split_to_sentences(cleaned_comment)
|
||||
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank1]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 106, in split_to_sentences
|
||||
[rank1]: return nltk.sent_tokenize(text)
|
||||
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank1]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/__init__.py", line 119, in sent_tokenize
|
||||
[rank1]: tokenizer = _get_punkt_tokenizer(language)
|
||||
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank1]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/__init__.py", line 105, in _get_punkt_tokenizer
|
||||
[rank1]: return PunktTokenizer(language)
|
||||
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank1]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/punkt.py", line 1744, in __init__
|
||||
[rank1]: self.load_lang(lang)
|
||||
[rank1]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/punkt.py", line 1749, in load_lang
|
||||
[rank1]: lang_dir = find(f"tokenizers/punkt_tab/{lang}/")
|
||||
[rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank1]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/data.py", line 579, in find
|
||||
[rank1]: raise LookupError(resource_not_found)
|
||||
[rank1]: LookupError:
|
||||
[rank1]: **********************************************************************
|
||||
[rank1]: Resource [93mpunkt_tab[0m not found.
|
||||
[rank1]: Please use the NLTK Downloader to obtain the resource:
|
||||
|
||||
[rank1]: [31m>>> import nltk
|
||||
[rank1]: >>> nltk.download('punkt_tab')
|
||||
[rank1]: [0m
|
||||
[rank1]: For more information see: https://www.nltk.org/data.html
|
||||
|
||||
[rank1]: Attempted to load [93mtokenizers/punkt_tab/english/[0m
|
||||
|
||||
[rank1]: Searched in:
|
||||
[rank1]: - '/home/nws8519/nltk_data'
|
||||
[rank1]: - '/home/nws8519/.conda/envs/olmo/nltk_data'
|
||||
[rank1]: - '/home/nws8519/.conda/envs/olmo/share/nltk_data'
|
||||
[rank1]: - '/home/nws8519/.conda/envs/olmo/lib/nltk_data'
|
||||
[rank1]: - '/usr/share/nltk_data'
|
||||
[rank1]: - '/usr/local/share/nltk_data'
|
||||
[rank1]: - '/usr/lib/nltk_data'
|
||||
[rank1]: - '/usr/local/lib/nltk_data'
|
||||
[rank1]: **********************************************************************
|
||||
|
||||
[rank3]: Traceback (most recent call last):
|
||||
[rank3]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 178, in <module>
|
||||
[rank3]: main()
|
||||
[rank3]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 122, in main
|
||||
[rank3]: dataset = SentenceDataset(comment_texts, comment_types, priming, typology, instructions)
|
||||
[rank3]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank3]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 76, in __init__
|
||||
[rank3]: sentences = split_to_sentences(cleaned_comment)
|
||||
[rank3]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank3]: File "/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py", line 106, in split_to_sentences
|
||||
[rank3]: return nltk.sent_tokenize(text)
|
||||
[rank3]: ^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank3]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/__init__.py", line 119, in sent_tokenize
|
||||
[rank3]: tokenizer = _get_punkt_tokenizer(language)
|
||||
[rank3]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank3]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/__init__.py", line 105, in _get_punkt_tokenizer
|
||||
[rank3]: return PunktTokenizer(language)
|
||||
[rank3]: ^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank3]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/punkt.py", line 1744, in __init__
|
||||
[rank3]: self.load_lang(lang)
|
||||
[rank3]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/tokenize/punkt.py", line 1749, in load_lang
|
||||
[rank3]: lang_dir = find(f"tokenizers/punkt_tab/{lang}/")
|
||||
[rank3]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
[rank3]: File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/nltk/data.py", line 579, in find
|
||||
[rank3]: raise LookupError(resource_not_found)
|
||||
[rank3]: LookupError:
|
||||
[rank3]: **********************************************************************
|
||||
[rank3]: Resource [93mpunkt_tab[0m not found.
|
||||
[rank3]: Please use the NLTK Downloader to obtain the resource:
|
||||
|
||||
[rank3]: [31m>>> import nltk
|
||||
[rank3]: >>> nltk.download('punkt_tab')
|
||||
[rank3]: [0m
|
||||
[rank3]: For more information see: https://www.nltk.org/data.html
|
||||
|
||||
[rank3]: Attempted to load [93mtokenizers/punkt_tab/english/[0m
|
||||
|
||||
[rank3]: Searched in:
|
||||
[rank3]: - '/home/nws8519/nltk_data'
|
||||
[rank3]: - '/home/nws8519/.conda/envs/olmo/nltk_data'
|
||||
[rank3]: - '/home/nws8519/.conda/envs/olmo/share/nltk_data'
|
||||
[rank3]: - '/home/nws8519/.conda/envs/olmo/lib/nltk_data'
|
||||
[rank3]: - '/usr/share/nltk_data'
|
||||
[rank3]: - '/usr/local/share/nltk_data'
|
||||
[rank3]: - '/usr/lib/nltk_data'
|
||||
[rank3]: - '/usr/local/lib/nltk_data'
|
||||
[rank3]: **********************************************************************
|
||||
|
||||
[rank2]:[W904 10:05:56.100290280 ProcessGroupNCCL.cpp:1476] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see https://pytorch.org/docs/stable/distributed.html#shutdown (function operator())
|
||||
[rank0]:[W904 10:05:56.107999460 ProcessGroupNCCL.cpp:1476] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see https://pytorch.org/docs/stable/distributed.html#shutdown (function operator())
|
||||
W0904 10:05:57.705000 1400307 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/api.py:900] Sending process 1400332 closing signal SIGTERM
|
||||
W0904 10:05:57.720000 1400308 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/api.py:900] Sending process 1400334 closing signal SIGTERM
|
||||
E0904 10:05:57.770000 1400307 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/api.py:874] failed (exitcode: 1) local_rank: 0 (pid: 1400331) of binary: /home/nws8519/.conda/envs/olmo/bin/python3.11
|
||||
Traceback (most recent call last):
|
||||
File "/home/nws8519/.conda/envs/olmo/bin/torchrun", line 8, in <module>
|
||||
sys.exit(main())
|
||||
@ -29,93 +243,6 @@ Traceback (most recent call last):
|
||||
return f(*args, **kwargs)
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 892, in main
|
||||
return f(*args, **kwargs)
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 892, in main
|
||||
run(args)
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 883, in run
|
||||
run(args)
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 883, in run
|
||||
elastic_launch(
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 139, in __call__
|
||||
elastic_launch(
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 139, in __call__
|
||||
return launch_agent(self._config, self._entrypoint, list(args))
|
||||
return launch_agent(self._config, self._entrypoint, list(args))
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 261, in launch_agent
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 261, in launch_agent
|
||||
result = agent.run()
|
||||
result = agent.run()
|
||||
^^^^^^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/metrics/api.py", line 138, in wrapper
|
||||
^^^^^^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/metrics/api.py", line 138, in wrapper
|
||||
result = f(*args, **kwargs)
|
||||
result = f(*args, **kwargs)
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/agent/server/api.py", line 711, in run
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/agent/server/api.py", line 711, in run
|
||||
result = self._invoke_run(role)
|
||||
result = self._invoke_run(role)
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/agent/server/api.py", line 864, in _invoke_run
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/agent/server/api.py", line 864, in _invoke_run
|
||||
self._initialize_workers(self._worker_group)
|
||||
self._initialize_workers(self._worker_group)
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/metrics/api.py", line 138, in wrapper
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/metrics/api.py", line 138, in wrapper
|
||||
result = f(*args, **kwargs)
|
||||
result = f(*args, **kwargs)
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/agent/server/api.py", line 683, in _initialize_workers
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/agent/server/api.py", line 683, in _initialize_workers
|
||||
self._rendezvous(worker_group)
|
||||
self._rendezvous(worker_group)
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/metrics/api.py", line 138, in wrapper
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/metrics/api.py", line 138, in wrapper
|
||||
result = f(*args, **kwargs)
|
||||
result = f(*args, **kwargs)
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/agent/server/api.py", line 500, in _rendezvous
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/agent/server/api.py", line 500, in _rendezvous
|
||||
rdzv_info = spec.rdzv_handler.next_rendezvous()
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/rendezvous/static_tcp_rendezvous.py", line 67, in next_rendezvous
|
||||
self._store = TCPStore( # type: ignore[call-arg]
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
torch.distributed.DistNetworkError: The server socket has failed to listen on any local network address. port: 29500, useIpv6: false, code: -98, name: EADDRINUSE, message: address already in use
|
||||
rdzv_info = spec.rdzv_handler.next_rendezvous()
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/rendezvous/static_tcp_rendezvous.py", line 67, in next_rendezvous
|
||||
self._store = TCPStore( # type: ignore[call-arg]
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
torch.distributed.DistNetworkError: The server socket has failed to listen on any local network address. port: 29500, useIpv6: false, code: -98, name: EADDRINUSE, message: address already in use
|
||||
E0903 19:04:19.236000 1488504 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/api.py:874] failed (exitcode: 2) local_rank: 0 (pid: 1488524) of binary: /home/nws8519/.conda/envs/olmo/bin/python3.11
|
||||
E0903 19:04:19.236000 2554912 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/api.py:874] failed (exitcode: 2) local_rank: 0 (pid: 2554950) of binary: /home/nws8519/.conda/envs/olmo/bin/python3.11
|
||||
Traceback (most recent call last):
|
||||
File "/home/nws8519/.conda/envs/olmo/bin/torchrun", line 8, in <module>
|
||||
sys.exit(main())
|
||||
^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper
|
||||
Traceback (most recent call last):
|
||||
File "/home/nws8519/.conda/envs/olmo/bin/torchrun", line 8, in <module>
|
||||
return f(*args, **kwargs)
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 892, in main
|
||||
sys.exit(main())
|
||||
^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper
|
||||
return f(*args, **kwargs)
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 892, in main
|
||||
run(args)
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 883, in run
|
||||
run(args)
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 883, in run
|
||||
elastic_launch(
|
||||
@ -126,20 +253,31 @@ Traceback (most recent call last):
|
||||
raise ChildFailedError(
|
||||
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
|
||||
============================================================
|
||||
nnodes FAILED
|
||||
/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py FAILED
|
||||
------------------------------------------------------------
|
||||
Failures:
|
||||
<NO_OTHER_FAILURES>
|
||||
------------------------------------------------------------
|
||||
Root Cause (first observed failure):
|
||||
[0]:
|
||||
time : 2025-09-03_19:04:19
|
||||
host : qgpu2013
|
||||
time : 2025-09-04_10:05:57
|
||||
host : qgpu0203
|
||||
rank : 0 (local_rank: 0)
|
||||
exitcode : 2 (pid: 1488524)
|
||||
exitcode : 1 (pid: 1400331)
|
||||
error_file: <N/A>
|
||||
traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html
|
||||
============================================================
|
||||
E0904 10:05:57.885000 1400308 /gpfs/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/api.py:874] failed (exitcode: 1) local_rank: 0 (pid: 1400333) of binary: /home/nws8519/.conda/envs/olmo/bin/python3.11
|
||||
Traceback (most recent call last):
|
||||
File "/home/nws8519/.conda/envs/olmo/bin/torchrun", line 8, in <module>
|
||||
sys.exit(main())
|
||||
^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper
|
||||
return f(*args, **kwargs)
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 892, in main
|
||||
run(args)
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 883, in run
|
||||
elastic_launch(
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 139, in __call__
|
||||
return launch_agent(self._config, self._entrypoint, list(args))
|
||||
@ -148,20 +286,56 @@ Root Cause (first observed failure):
|
||||
raise ChildFailedError(
|
||||
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
|
||||
============================================================
|
||||
nnodes FAILED
|
||||
/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py FAILED
|
||||
------------------------------------------------------------
|
||||
Failures:
|
||||
<NO_OTHER_FAILURES>
|
||||
------------------------------------------------------------
|
||||
Root Cause (first observed failure):
|
||||
[0]:
|
||||
time : 2025-09-03_19:04:19
|
||||
host : qgpu2014
|
||||
rank : 0 (local_rank: 0)
|
||||
exitcode : 2 (pid: 2554950)
|
||||
time : 2025-09-04_10:05:57
|
||||
host : qgpu0203
|
||||
rank : 2 (local_rank: 0)
|
||||
exitcode : 1 (pid: 1400333)
|
||||
error_file: <N/A>
|
||||
traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html
|
||||
============================================================
|
||||
srun: error: qgpu2013: tasks 0-1: Exited with exit code 1
|
||||
srun: error: qgpu2014: tasks 2-3: Exited with exit code 1
|
||||
unsupervised olmo categorization pau at Wed Sep 3 19:04:19 CDT 2025
|
||||
Traceback (most recent call last):
|
||||
File "/home/nws8519/.conda/envs/olmo/bin/torchrun", line 8, in <module>
|
||||
sys.exit(main())
|
||||
^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper
|
||||
return f(*args, **kwargs)
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 892, in main
|
||||
run(args)
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 883, in run
|
||||
elastic_launch(
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 139, in __call__
|
||||
return launch_agent(self._config, self._entrypoint, list(args))
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 265, in launch_agent
|
||||
if result.is_failed():
|
||||
^^^^^^^^^^^^^^^^
|
||||
AttributeError: 'NoneType' object has no attribute 'is_failed'
|
||||
Traceback (most recent call last):
|
||||
File "/home/nws8519/.conda/envs/olmo/bin/torchrun", line 8, in <module>
|
||||
sys.exit(main())
|
||||
^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper
|
||||
return f(*args, **kwargs)
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 892, in main
|
||||
run(args)
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/run.py", line 883, in run
|
||||
elastic_launch(
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 139, in __call__
|
||||
return launch_agent(self._config, self._entrypoint, list(args))
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "/home/nws8519/.conda/envs/olmo/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 265, in launch_agent
|
||||
if result.is_failed():
|
||||
^^^^^^^^^^^^^^^^
|
||||
AttributeError: 'NoneType' object has no attribute 'is_failed'
|
||||
srun: error: qgpu0203: tasks 2-3: Exited with exit code 1
|
||||
srun: error: qgpu0202: tasks 0-1: Exited with exit code 1
|
||||
unsupervised olmo categorization pau at Thu Sep 4 10:05:58 CDT 2025
|
||||
|
Binary file not shown.
@ -20,12 +20,26 @@ if __name__ == "__main__":
|
||||
biber_vec_df = biber_vec_df[biber_vec_df['comment_type'] == 'task_description']
|
||||
biber_vecs = format_df_data(biber_vec_df)
|
||||
#handoff to PCA model
|
||||
pca = KernelPCA(n_components=2, kernel="rbf")
|
||||
'''
|
||||
pca_trial = PCA()
|
||||
biber_vecs_pca_trial = pca_trial.fit_transform(biber_vecs)
|
||||
|
||||
explained_variance = pca_trial.explained_variance_ratio_
|
||||
cumulative_variance = np.cumsum(explained_variance)
|
||||
|
||||
n_components = np.argmax(cumulative_variance >= 0.90) + 1
|
||||
print(f"Number of PCs explaining 90% variance: {n_components}")
|
||||
'''
|
||||
pca = PCA(n_components=18)
|
||||
biber_vecs_pca = pca.fit_transform(biber_vecs)
|
||||
selected_axis = "source"
|
||||
|
||||
component_variances = np.var(biber_vecs_pca, axis=0)
|
||||
print("Variance of each PCA component:", component_variances)
|
||||
|
||||
#first looking at comment_type
|
||||
le = LabelEncoder()
|
||||
colors = le.fit_transform(biber_vec_df['AuthorWMFAffil'])
|
||||
colors = le.fit_transform(biber_vec_df[selected_axis])
|
||||
|
||||
plt.scatter(biber_vecs_pca[:, 0], biber_vecs_pca[:, 1],
|
||||
c=colors, edgecolor='none', alpha=0.5, cmap="viridis")
|
||||
@ -38,16 +52,16 @@ if __name__ == "__main__":
|
||||
plot_df = pd.DataFrame({
|
||||
"PC1": biber_vecs_pca[:, 0],
|
||||
"PC2": biber_vecs_pca[:, 1],
|
||||
"AuthorWMFAffil": biber_vec_df["AuthorWMFAffil"].astype(str)
|
||||
selected_axis: biber_vec_df[selected_axis].astype(str)
|
||||
})
|
||||
plt.figure(figsize=(8,6))
|
||||
sns.scatterplot(
|
||||
data=plot_df, x="PC1", y="PC2", hue="AuthorWMFAffil",
|
||||
data=plot_df, x="PC1", y="PC2", hue="source",
|
||||
palette="tab10", s=40, alpha=0.7, edgecolor=None
|
||||
)
|
||||
plt.xlabel('component 1')
|
||||
plt.ylabel('component 2')
|
||||
plt.legend(title='AuthorWMFAffil', bbox_to_anchor=(1.05, 1), loc=2)
|
||||
plt.legend(title=selected_axis, bbox_to_anchor=(1.05, 1), loc=2)
|
||||
plt.tight_layout()
|
||||
plt.savefig("biber_kernelpca_affil.png", dpi=300)
|
||||
plt.savefig(f"{selected_axis}_090425_biber_kernelpca_affil.png", dpi=300)
|
||||
plt.show()
|
||||
|
@ -1,7 +1,7 @@
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
from datautils import MyTrainDataset
|
||||
#from utils import MyTrainDataset
|
||||
|
||||
import torch.multiprocessing as mp
|
||||
import torch.distributed as dist
|
||||
@ -84,7 +84,7 @@ class SentenceDataset(Dataset):
|
||||
return self.samples[idx]
|
||||
|
||||
# ----------------- data handling functions
|
||||
7 def preprocess_comment(raw_text):
|
||||
def preprocess_comment(raw_text):
|
||||
# 1. replace code with CODE
|
||||
comment_text = re.sub(r'`[^`]+`', 'CODE', raw_text) # Inline code
|
||||
comment_text = re.sub(r'```[\s\S]+?```', 'CODE', comment_text) # Block code
|
||||
|
@ -24,10 +24,10 @@ echo "running the bertopic job at $(date)"
|
||||
|
||||
srun torchrun \
|
||||
--nnodes 2 \
|
||||
--nproc 2 \
|
||||
--nproc-per-node 2 \
|
||||
--rdzv_id $RANDOM \
|
||||
--rdzv_backend c10d \
|
||||
--rdzv_endpoint "$SLURMD_NODENAME:29502" \
|
||||
/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/info_labeling.py 10000 100
|
||||
/home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/olmo_parallel_cat.py 10000 100
|
||||
|
||||
echo "unsupervised olmo categorization pau at $(date)"
|
||||
|
BIN
p2/quest/source_090425_biber_kernelpca_affil.png
Normal file
BIN
p2/quest/source_090425_biber_kernelpca_affil.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 517 KiB |
Loading…
Reference in New Issue
Block a user