first pass at implementing pca for the style vectors
This commit is contained in:
parent
b714e8dedb
commit
89105b7660
BIN
p2/quest/090225_biber_pca_plot.png
Normal file
BIN
p2/quest/090225_biber_pca_plot.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 270 KiB |
5
p2/quest/neurobiber-pca.log
Normal file
5
p2/quest/neurobiber-pca.log
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
starting the job at: Tue Sep 2 15:27:43 CDT 2025
|
||||||
|
setting up the environment
|
||||||
|
running the neurobiber labeling script
|
||||||
|
job finished, cleaning up
|
||||||
|
job pau at: Tue Sep 2 15:28:28 CDT 2025
|
34
p2/quest/python_scripts/neurobiber_PCA.py
Normal file
34
p2/quest/python_scripts/neurobiber_PCA.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
from sklearn.decomposition import PCA
|
||||||
|
from sklearn.preprocessing import LabelEncoder
|
||||||
|
import pandas as pd
|
||||||
|
#import torch
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
|
||||||
|
def format_df_data(df):
|
||||||
|
#this accounts for the somewhat idiosyncratic way that I saved my data
|
||||||
|
normalized_cols = [col for col in df.columns if col.startswith('normalized_')]
|
||||||
|
x = df[normalized_cols].astype(float).values
|
||||||
|
#x = np.vstack(df['features'].values)
|
||||||
|
return x
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
biber_vec_df = pd.read_csv("/home/nws8519/git/mw-lifecycle-analysis/p2/quest/072525_pp_biberplus_labels.csv", low_memory=False)
|
||||||
|
biber_vecs = format_df_data(biber_vec_df)
|
||||||
|
#handoff to PCA model
|
||||||
|
pca = PCA(2)
|
||||||
|
biber_vecs_pca = pca.fit_transform(biber_vecs)
|
||||||
|
|
||||||
|
#first looking at comment_type
|
||||||
|
le = LabelEncoder()
|
||||||
|
colors = le.fit_transform(biber_vec_df['comment_type'])
|
||||||
|
|
||||||
|
plt.scatter(biber_vecs_pca[:, 0], biber_vecs_pca[:, 1],
|
||||||
|
c=colors, edgecolor='none', alpha=0.5, cmap="viridis")
|
||||||
|
plt.xlabel('component 1')
|
||||||
|
plt.ylabel('component 2')
|
||||||
|
plt.colorbar()
|
||||||
|
|
||||||
|
plt.savefig("090225_biber_pca_plot.png", dpi=300)
|
32
p2/quest/slurm_jobs/pca_run.sh
Normal file
32
p2/quest/slurm_jobs/pca_run.sh
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#SBATCH -A p32852
|
||||||
|
#SBATCH -p gengpu
|
||||||
|
#SBATCH --gres=gpu:a100:1
|
||||||
|
#SBATCH --nodes=2
|
||||||
|
#SBATCH --ntasks-per-node=1
|
||||||
|
#SBATCH --time=24:00:00
|
||||||
|
#SBATCH --mem=64G
|
||||||
|
#SBATCH --cpus-per-task=4
|
||||||
|
#SBATCH --job-name=neurobiber-pca
|
||||||
|
#SBATCH --output=neurobiber-pca.log
|
||||||
|
#SBATCH --mail-type=BEGIN,END,FAIL
|
||||||
|
#SBATCH --mail-user=gaughan@u.northwestern.edu
|
||||||
|
|
||||||
|
echo "starting the job at: $(date)"
|
||||||
|
|
||||||
|
echo "setting up the environment"
|
||||||
|
|
||||||
|
module purge
|
||||||
|
eval "$(conda shell.bash hook)"
|
||||||
|
conda activate neurobiber
|
||||||
|
|
||||||
|
echo "running the neurobiber labeling script"
|
||||||
|
|
||||||
|
python /home/nws8519/git/mw-lifecycle-analysis/p2/quest/python_scripts/neurobiber_PCA.py
|
||||||
|
|
||||||
|
echo "job finished, cleaning up"
|
||||||
|
|
||||||
|
conda deactivate
|
||||||
|
|
||||||
|
echo "job pau at: $(date)"
|
||||||
|
|
Loading…
Reference in New Issue
Block a user