looking for new phase pca
This commit is contained in:
parent
809e858bbf
commit
ccf434db38
@ -1,9 +1,8 @@
|
|||||||
starting the job at: Thu Sep 4 10:09:58 CDT 2025
|
starting the job at: Thu Sep 4 10:23:23 CDT 2025
|
||||||
setting up the environment
|
setting up the environment
|
||||||
running the neurobiber labeling script
|
running the neurobiber labeling script
|
||||||
Number of PCs explaining 90% variance: 18
|
|
||||||
Variance of each PCA component: [88.92832185 39.46471687 32.34601523 20.19544345 14.0083261 11.5837521
|
Variance of each PCA component: [88.92832185 39.46471687 32.34601523 20.19544345 14.0083261 11.5837521
|
||||||
7.82584723 6.89064989 6.07988254 5.80726367 5.49782354 4.50587747
|
7.82584723 6.89064989 6.07988254 5.80726367 5.49782354 4.50587747
|
||||||
4.31482409 2.81997326 2.62989708 2.27205352 2.09396341 2.00076119]
|
4.31482409 2.81997326 2.62989708 2.27205352 2.09396341 2.00076119]
|
||||||
job finished, cleaning up
|
job finished, cleaning up
|
||||||
job pau at: Thu Sep 4 10:10:21 CDT 2025
|
job pau at: Thu Sep 4 10:23:47 CDT 2025
|
||||||
|
BIN
p2/quest/phase_090425_biber_kernelpca_affil.png
Normal file
BIN
p2/quest/phase_090425_biber_kernelpca_affil.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 797 KiB |
@ -32,7 +32,7 @@ if __name__ == "__main__":
|
|||||||
'''
|
'''
|
||||||
pca = PCA(n_components=18)
|
pca = PCA(n_components=18)
|
||||||
biber_vecs_pca = pca.fit_transform(biber_vecs)
|
biber_vecs_pca = pca.fit_transform(biber_vecs)
|
||||||
selected_axis = "source"
|
selected_axis = "phase"
|
||||||
|
|
||||||
component_variances = np.var(biber_vecs_pca, axis=0)
|
component_variances = np.var(biber_vecs_pca, axis=0)
|
||||||
print("Variance of each PCA component:", component_variances)
|
print("Variance of each PCA component:", component_variances)
|
||||||
@ -41,14 +41,23 @@ if __name__ == "__main__":
|
|||||||
le = LabelEncoder()
|
le = LabelEncoder()
|
||||||
colors = le.fit_transform(biber_vec_df[selected_axis])
|
colors = le.fit_transform(biber_vec_df[selected_axis])
|
||||||
|
|
||||||
plt.scatter(biber_vecs_pca[:, 0], biber_vecs_pca[:, 1],
|
plot_df = pd.DataFrame({
|
||||||
c=colors, edgecolor='none', alpha=0.5, cmap="viridis")
|
"PC1": biber_vecs_pca[:, 0],
|
||||||
plt.xlabel('component 1')
|
"PC2": biber_vecs_pca[:, 1],
|
||||||
plt.ylabel('component 2')
|
selected_axis: biber_vec_df[selected_axis].astype(str),
|
||||||
plt.colorbar()
|
"source":biber_vec_df['source'].astype(str)
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
g = sns.FacetGrid(plot_df, col="source", col_wrap=4, hue=selected_axis, palette="tab10", height=4, sharex=False, sharey=False)
|
||||||
|
g.map_dataframe(sns.scatterplot, x="PC1", y="PC2", alpha=0.7, s=40)
|
||||||
|
g.add_legend(title=selected_axis)
|
||||||
|
g.set_axis_labels("PC1", "PC2")
|
||||||
|
g.fig.subplots_adjust(top=0.9)
|
||||||
|
g.fig.suptitle(f"PCA by {selected_axis}, faceted by source")
|
||||||
|
|
||||||
#plt.savefig("090225_biber_pca_plot.png", dpi=300)
|
#plt.savefig("090225_biber_pca_plot.png", dpi=300)
|
||||||
|
'''
|
||||||
plot_df = pd.DataFrame({
|
plot_df = pd.DataFrame({
|
||||||
"PC1": biber_vecs_pca[:, 0],
|
"PC1": biber_vecs_pca[:, 0],
|
||||||
"PC2": biber_vecs_pca[:, 1],
|
"PC2": biber_vecs_pca[:, 1],
|
||||||
@ -62,6 +71,7 @@ if __name__ == "__main__":
|
|||||||
plt.xlabel('component 1')
|
plt.xlabel('component 1')
|
||||||
plt.ylabel('component 2')
|
plt.ylabel('component 2')
|
||||||
plt.legend(title=selected_axis, bbox_to_anchor=(1.05, 1), loc=2)
|
plt.legend(title=selected_axis, bbox_to_anchor=(1.05, 1), loc=2)
|
||||||
plt.tight_layout()
|
'''
|
||||||
plt.savefig(f"{selected_axis}_090425_biber_kernelpca_affil.png", dpi=300)
|
g.fig.tight_layout()
|
||||||
|
g.savefig(f"{selected_axis}_090425_biber_kernelpca_affil.png", dpi=300)
|
||||||
plt.show()
|
plt.show()
|
||||||
|
@ -14,7 +14,9 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, OlmoForCausalLM
|
|||||||
import csv
|
import csv
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import nltk
|
import nltk
|
||||||
|
nltk.download('punkt')
|
||||||
# ----------------- prompts for LLM
|
# ----------------- prompts for LLM
|
||||||
priming = "For the **GIVEN SENTENCE**, please categorize it into one of the defined [[CATEGORIES]]. Each [[CATEGORY]] is described in the TYPOLOGY for reference. Your task is to match the**GIVEN SENTENCE** to the **[[CATEGORY]]** that most accurately describes the content of the comment. Only provide the category as your output. Do not provide any text beyond the category name."
|
priming = "For the **GIVEN SENTENCE**, please categorize it into one of the defined [[CATEGORIES]]. Each [[CATEGORY]] is described in the TYPOLOGY for reference. Your task is to match the**GIVEN SENTENCE** to the **[[CATEGORY]]** that most accurately describes the content of the comment. Only provide the category as your output. Do not provide any text beyond the category name."
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user