1
0
mw-lifecycle-analysis/p2/quest/python_scripts/neurobiber_PCA.py
2025-09-02 15:50:47 -05:00

36 lines
1.2 KiB
Python

from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder
import pandas as pd
#import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def format_df_data(df):
#this accounts for the somewhat idiosyncratic way that I saved my data
normalized_cols = [col for col in df.columns if col.startswith('normalized_')]
x = df[normalized_cols].astype(float).values
#x = np.vstack(df['features'].values)
return x
if __name__ == "__main__":
biber_vec_df = pd.read_csv("/home/nws8519/git/mw-lifecycle-analysis/p2/quest/072525_pp_biberplus_labels.csv", low_memory=False)
biber_vec_df = biber_vec_df[biber_vec_df['comment_type'] == 'task_description']
biber_vecs = format_df_data(biber_vec_df)
#handoff to PCA model
pca = PCA(2)
biber_vecs_pca = pca.fit_transform(biber_vecs)
#first looking at comment_type
le = LabelEncoder()
colors = le.fit_transform(biber_vec_df['source'])
plt.scatter(biber_vecs_pca[:, 0], biber_vecs_pca[:, 1],
c=colors, edgecolor='none', alpha=0.5, cmap="viridis")
plt.xlabel('component 1')
plt.ylabel('component 2')
plt.colorbar()
plt.savefig("090225_biber_pca_plot.png", dpi=300)