diff --git a/p2/quest/090225_biber_pca_plot.png b/p2/quest/090225_biber_pca_plot.png index db77ca8..b0fac34 100644 Binary files a/p2/quest/090225_biber_pca_plot.png and b/p2/quest/090225_biber_pca_plot.png differ diff --git a/p2/quest/neurobiber-pca.log b/p2/quest/neurobiber-pca.log index 332705c..537ccbb 100644 --- a/p2/quest/neurobiber-pca.log +++ b/p2/quest/neurobiber-pca.log @@ -1,5 +1,5 @@ -starting the job at: Tue Sep 2 15:34:49 CDT 2025 +starting the job at: Tue Sep 2 15:49:08 CDT 2025 setting up the environment running the neurobiber labeling script job finished, cleaning up -job pau at: Tue Sep 2 15:35:35 CDT 2025 +job pau at: Tue Sep 2 15:49:52 CDT 2025 diff --git a/p2/quest/python_scripts/neurobiber_PCA.py b/p2/quest/python_scripts/neurobiber_PCA.py index b0804ca..ce427fa 100644 --- a/p2/quest/python_scripts/neurobiber_PCA.py +++ b/p2/quest/python_scripts/neurobiber_PCA.py @@ -16,6 +16,7 @@ def format_df_data(df): if __name__ == "__main__": biber_vec_df = pd.read_csv("/home/nws8519/git/mw-lifecycle-analysis/p2/quest/072525_pp_biberplus_labels.csv", low_memory=False) + biber_vec_df = biber_vec_df[biber_vec_df['comment_type'] == 'task_description'] biber_vecs = format_df_data(biber_vec_df) #handoff to PCA model pca = PCA(2) @@ -23,7 +24,7 @@ if __name__ == "__main__": #first looking at comment_type le = LabelEncoder() - colors = le.fit_transform(biber_vec_df['phase']) + colors = le.fit_transform(biber_vec_df['source']) plt.scatter(biber_vecs_pca[:, 0], biber_vecs_pca[:, 1], c=colors, edgecolor='none', alpha=0.5, cmap="viridis")