updated PCA results with dropped rows
This commit is contained in:
parent
e61d3b6599
commit
f636969541
37338
p2/quest/100125_description_PCA_df.csv
Normal file
37338
p2/quest/100125_description_PCA_df.csv
Normal file
File diff suppressed because one or more lines are too long
299
p2/quest/100125_description_neurobiber-pca.log
Normal file
299
p2/quest/100125_description_neurobiber-pca.log
Normal file
@ -0,0 +1,299 @@
|
||||
starting the job at: Wed Oct 1 20:55:40 CDT 2025
|
||||
setting up the environment
|
||||
running the neurobiber labeling script
|
||||
Number of PCs explaining 90% variance: 21
|
||||
Variance of each PCA component: [44.14465236 25.51079987 20.02977026 11.84052754 8.73144858 8.38589906
|
||||
6.95245699 5.64852989 5.25245119 4.98015739 4.87640589 3.84009303
|
||||
3.46134099 2.49633957 2.31075199 2.07408882 1.83990439 1.83715267
|
||||
1.69163987 1.34972345 1.21923888]
|
||||
PC1:
|
||||
BIN_CAP: 0.575
|
||||
BIN_NNP: 0.568
|
||||
BIN_DET: -0.296
|
||||
BIN_ART: -0.232
|
||||
BIN_PREP: -0.226
|
||||
BIN_PIN: -0.226
|
||||
BIN_RB: -0.126
|
||||
BIN_INF: -0.109
|
||||
BIN_PRP: -0.105
|
||||
BIN_SBJP: -0.105
|
||||
PC2:
|
||||
BIN_PREP: 0.498
|
||||
BIN_PIN: 0.498
|
||||
BIN_NN: 0.460
|
||||
BIN_CAP: 0.334
|
||||
BIN_NNP: 0.313
|
||||
BIN_DET: 0.148
|
||||
BIN_NOMZ: -0.112
|
||||
BIN_ART: 0.111
|
||||
BIN_INF: 0.097
|
||||
BIN_CONJ: 0.075
|
||||
PC3:
|
||||
BIN_NN: 0.811
|
||||
BIN_PIN: -0.235
|
||||
BIN_PREP: -0.235
|
||||
BIN_NNP: -0.223
|
||||
BIN_PRP: -0.196
|
||||
BIN_SBJP: -0.196
|
||||
BIN_RB: -0.175
|
||||
BIN_INF: -0.130
|
||||
BIN_FPP1: -0.091
|
||||
BIN_VPRT: -0.085
|
||||
PC4:
|
||||
BIN_DET: 0.587
|
||||
BIN_ART: 0.528
|
||||
BIN_PREP: -0.282
|
||||
BIN_PIN: -0.282
|
||||
BIN_CAP: 0.252
|
||||
BIN_INDA: 0.183
|
||||
BIN_VPRT: 0.178
|
||||
BIN_JJ: -0.137
|
||||
BIN_NOMZ: -0.130
|
||||
BIN_NNP: 0.123
|
||||
PC5:
|
||||
BIN_RB: 0.439
|
||||
BIN_CAP: 0.348
|
||||
BIN_PRP: 0.313
|
||||
BIN_SBJP: 0.313
|
||||
BIN_NNP: -0.285
|
||||
BIN_ART: -0.234
|
||||
BIN_VPRT: 0.231
|
||||
BIN_NN: 0.229
|
||||
BIN_DET: -0.210
|
||||
BIN_NOMZ: -0.160
|
||||
PC6:
|
||||
BIN_JJ: 0.552
|
||||
BIN_CAP: 0.454
|
||||
BIN_NNP: -0.397
|
||||
BIN_NOMZ: 0.374
|
||||
BIN_X: -0.208
|
||||
BIN_QUOT: -0.184
|
||||
BIN_NN: -0.160
|
||||
BIN_NUM: -0.146
|
||||
BIN_CONT: -0.117
|
||||
BIN_ART: 0.088
|
||||
PC7:
|
||||
BIN_JJ: 0.552
|
||||
BIN_NNP: 0.417
|
||||
BIN_VPRT: 0.374
|
||||
BIN_CAP: -0.333
|
||||
BIN_RB: 0.258
|
||||
BIN_QUOT: -0.224
|
||||
BIN_X: -0.175
|
||||
BIN_INF: -0.172
|
||||
BIN_AUXB: 0.157
|
||||
BIN_XX0: 0.091
|
||||
PC8:
|
||||
BIN_INF: 0.720
|
||||
BIN_QUOT: -0.330
|
||||
BIN_VPRT: -0.252
|
||||
BIN_RB: 0.200
|
||||
BIN_TO: 0.190
|
||||
BIN_NOMZ: 0.159
|
||||
BIN_NUM: -0.152
|
||||
BIN_NNP: 0.147
|
||||
BIN_PRP: -0.132
|
||||
BIN_SBJP: -0.132
|
||||
PC9:
|
||||
BIN_QUOT: 0.681
|
||||
BIN_JJ: 0.417
|
||||
BIN_INF: 0.317
|
||||
BIN_CONT: 0.281
|
||||
BIN_NOMZ: -0.266
|
||||
BIN_PRP: -0.139
|
||||
BIN_SBJP: -0.139
|
||||
BIN_X: 0.129
|
||||
BIN_RB: 0.084
|
||||
BIN_CAP: 0.072
|
||||
PC10:
|
||||
BIN_RB: 0.507
|
||||
BIN_PRP: -0.411
|
||||
BIN_SBJP: -0.411
|
||||
BIN_NNP: -0.204
|
||||
BIN_X: 0.202
|
||||
BIN_FPP1: -0.195
|
||||
BIN_INF: -0.193
|
||||
BIN_NOMZ: -0.158
|
||||
BIN_NUM: 0.156
|
||||
BIN_JJ: -0.154
|
||||
PC11:
|
||||
BIN_X: 0.632
|
||||
BIN_NOMZ: -0.436
|
||||
BIN_QUOT: -0.379
|
||||
BIN_JJ: 0.317
|
||||
BIN_CONT: -0.171
|
||||
BIN_NUM: 0.159
|
||||
BIN_RB: -0.149
|
||||
BIN_PRP: 0.119
|
||||
BIN_SBJP: 0.119
|
||||
BIN_INF: 0.106
|
||||
PC12:
|
||||
BIN_VPRT: 0.495
|
||||
BIN_X: 0.445
|
||||
BIN_AUXB: 0.381
|
||||
BIN_NUM: -0.346
|
||||
BIN_NOMZ: 0.291
|
||||
BIN_RB: -0.234
|
||||
BIN_PASS: 0.177
|
||||
BIN_JJ: -0.159
|
||||
BIN_VBD: -0.118
|
||||
BIN_BEMA: 0.112
|
||||
PC13:
|
||||
BIN_NUM: 0.440
|
||||
BIN_X: -0.437
|
||||
BIN_RB: -0.347
|
||||
BIN_NOMZ: -0.338
|
||||
BIN_AUXB: 0.333
|
||||
BIN_VPRT: 0.223
|
||||
BIN_INF: 0.210
|
||||
BIN_PASS: 0.141
|
||||
BIN_TO: 0.132
|
||||
BIN_BEMA: 0.127
|
||||
PC14:
|
||||
BIN_AUXB: 0.473
|
||||
BIN_VPRT: -0.443
|
||||
BIN_NUM: -0.405
|
||||
BIN_VBD: 0.282
|
||||
BIN_CONT: -0.239
|
||||
BIN_NOMZ: -0.211
|
||||
BIN_PASS: 0.209
|
||||
BIN_BEMA: 0.165
|
||||
BIN_INF: -0.156
|
||||
BIN_CCONJ: 0.149
|
||||
PC15:
|
||||
BIN_NUM: 0.581
|
||||
BIN_NOMZ: 0.428
|
||||
BIN_AUXB: 0.327
|
||||
BIN_VPRT: -0.213
|
||||
BIN_PGAS: -0.197
|
||||
BIN_X: 0.187
|
||||
BIN_RB: 0.164
|
||||
BIN_BEMA: 0.163
|
||||
BIN_QUOT: 0.143
|
||||
BIN_CCONJ: -0.143
|
||||
PC16:
|
||||
BIN_PGAS: 0.702
|
||||
BIN_CONJ: -0.428
|
||||
BIN_CCONJ: -0.371
|
||||
BIN_SCONJ: 0.217
|
||||
BIN_WH: 0.138
|
||||
BIN_WZPRES: 0.132
|
||||
BIN_TO: 0.132
|
||||
BIN_GER: 0.090
|
||||
BIN_VBD: 0.089
|
||||
BIN_NUM: 0.088
|
||||
PC17:
|
||||
BIN_CCONJ: 0.462
|
||||
BIN_PGAS: 0.459
|
||||
BIN_CONJ: 0.395
|
||||
BIN_CONT: -0.333
|
||||
BIN_QUOT: 0.184
|
||||
BIN_NUM: 0.180
|
||||
BIN_VPRT: 0.177
|
||||
BIN_VBD: -0.172
|
||||
BIN_XX0: -0.170
|
||||
BIN_SPAU: -0.139
|
||||
PC18:
|
||||
BIN_CCONJ: 0.691
|
||||
BIN_CONJ: -0.502
|
||||
BIN_CONT: 0.238
|
||||
BIN_VBD: 0.152
|
||||
BIN_NUM: 0.149
|
||||
BIN_ANDC: 0.144
|
||||
BIN_INDA: -0.122
|
||||
BIN_XX0: 0.120
|
||||
BIN_PRIV: -0.115
|
||||
BIN_PHC: 0.101
|
||||
PC19:
|
||||
BIN_CONT: 0.563
|
||||
BIN_CONJ: 0.459
|
||||
BIN_PGAS: 0.332
|
||||
BIN_SPAU: 0.255
|
||||
BIN_XX0: 0.234
|
||||
BIN_QUOT: -0.231
|
||||
BIN_RB: -0.223
|
||||
BIN_SCONJ: -0.172
|
||||
BIN_AUXB: 0.163
|
||||
BIN_PASS: 0.131
|
||||
PC20:
|
||||
BIN_INDA: 0.674
|
||||
BIN_DET: -0.416
|
||||
BIN_QUAN: -0.265
|
||||
BIN_ART: 0.217
|
||||
BIN_FPP1: -0.202
|
||||
BIN_PGAS: -0.152
|
||||
BIN_CONJ: -0.150
|
||||
BIN_SCONJ: 0.141
|
||||
BIN_CCONJ: 0.130
|
||||
BIN_DEMO: -0.128
|
||||
PC21:
|
||||
BIN_SCONJ: 0.568
|
||||
BIN_PRIV: 0.541
|
||||
BIN_TO: -0.332
|
||||
BIN_WH: 0.270
|
||||
BIN_RB: -0.158
|
||||
BIN_INDA: -0.139
|
||||
BIN_COND: 0.134
|
||||
BIN_VPRT: -0.130
|
||||
BIN_CCONJ: 0.129
|
||||
BIN_CONJ: 0.102
|
||||
Top 10 PC1 values:
|
||||
PC1 PC2 ... AuthorPHID date_created
|
||||
19173 40.268860 26.736392 ... PHID-USER-doeppszazlm3r7xah4il 1416964345
|
||||
23127 34.022257 7.573103 ... PHID-USER-myidf5vlkwvrgp2iwn76 1433839792
|
||||
23533 33.055352 7.623438 ... PHID-USER-sai77mtxmpqnm6pycyvz 1424498718
|
||||
24553 33.053151 7.621628 ... PHID-USER-sai77mtxmpqnm6pycyvz 1424498559
|
||||
23532 33.050949 7.619818 ... PHID-USER-sai77mtxmpqnm6pycyvz 1424498772
|
||||
22245 31.318686 5.617453 ... PHID-USER-v7vgzvvcw7v2umf737ri 1438377936
|
||||
18500 29.657022 4.747496 ... PHID-USER-hbffue25ov3attlvclze 1387662960
|
||||
22023 29.625085 9.081212 ... PHID-USER-a6p24cvyblhfzc7we7nc 1440568477
|
||||
14809 28.210405 6.749195 ... PHID-USER-zjzhrhmn36icnzbckqy4 1379900100
|
||||
22930 27.824399 14.949181 ... PHID-USER-fo56wm4wxiwpoofn2xdu 1436249770
|
||||
|
||||
[10 rows x 28 columns]
|
||||
|
||||
Bottom 10 PC1 values:
|
||||
PC1 PC2 ... AuthorPHID date_created
|
||||
23485 -16.873824 13.740160 ... PHID-USER-u7udgblfyop6qd5wxot6 1425991276
|
||||
22060 -16.135690 12.174259 ... PHID-USER-2nnm76h4ykalvvref2ye 1440412099
|
||||
22845 -15.391146 13.319574 ... PHID-USER-2nnm76h4ykalvvref2ye 1440085454
|
||||
24795 -15.084050 14.347308 ... PHID-USER-5dwuaigmkz2vzg65lape 1419297091
|
||||
7451 -14.541432 5.740545 ... PHID-USER-ysftv67jxeaxdwcakvwo 1374347580
|
||||
23471 -13.857781 7.962597 ... PHID-USER-2nnm76h4ykalvvref2ye 1426228927
|
||||
22443 -13.803016 7.605012 ... PHID-USER-fo56wm4wxiwpoofn2xdu 1435267334
|
||||
23300 -13.605468 0.980452 ... PHID-USER-evd3wnvnlb66lrwulch4 1423322226
|
||||
11814 -13.401241 7.881186 ... PHID-USER-5pyvkdz65d5h5vxebodc 1372684440
|
||||
968 -13.313317 0.369182 ... PHID-USER-j5ma2nageni56xp567v5 1377621000
|
||||
|
||||
[10 rows x 28 columns]
|
||||
Top 10 PC2 values:
|
||||
PC1 PC2 ... AuthorPHID date_created
|
||||
24610 6.265218 29.494190 ... PHID-USER-tafngdco2cilcyr7qhhg 1422645688
|
||||
20963 27.578946 27.679075 ... PHID-USER-rooknayvbydy6sodz3lx 1436311793
|
||||
24082 -4.360480 27.219954 ... PHID-USER-jcypqodpdpbcicgwgh7j 1419534643
|
||||
19173 40.268860 26.736392 ... PHID-USER-doeppszazlm3r7xah4il 1416964345
|
||||
24824 -2.967505 23.097004 ... PHID-USER-mdihg2tyzmlvyhn3h32y 1418230141
|
||||
24818 20.182195 22.630740 ... PHID-USER-hbtlbu4zftxnz4i6f7yf 1418856731
|
||||
13345 6.075708 22.048374 ... PHID-USER-ydswvwhh5pm4lshahjje 1371860160
|
||||
21020 6.876811 21.888275 ... PHID-USER-zcsdm7lwcehnusyhh6xp 1435194938
|
||||
20973 -7.021508 20.911008 ... PHID-USER-hxwwywcyzpooynxuo7a2 1435878993
|
||||
22029 0.897428 20.736628 ... PHID-USER-a6p24cvyblhfzc7we7nc 1440568357
|
||||
|
||||
[10 rows x 28 columns]
|
||||
|
||||
Bottom 10 PC2 values:
|
||||
PC1 PC2 ... AuthorPHID date_created
|
||||
3134 5.691116 -12.652404 ... PHID-USER-ydswvwhh5pm4lshahjje 1374855900
|
||||
654 -0.763875 -12.369520 ... PHID-USER-hbtlbu4zftxnz4i6f7yf 1366408980
|
||||
16080 -0.816582 -12.352041 ... PHID-USER-zjzhrhmn36icnzbckqy4 1350678600
|
||||
1207 4.758836 -12.101115 ... PHID-USER-slccyo5rqasgpljxny7g 1374857700
|
||||
17982 6.571867 -11.954035 ... PHID-USER-kqibbfgfpgocyzwe32lv 1412196840
|
||||
1885 15.905505 -11.884510 ... PHID-USER-hyfm4swq76s4j642w46x 1372088340
|
||||
2934 0.131925 -11.738040 ... PHID-USER-it53o2f2kyryqyj33uzt 1375529520
|
||||
2109 -2.111122 -11.398959 ... PHID-USER-p6hvqn5njgnxuagekh4b 1367215380
|
||||
13276 15.471863 -11.316666 ... PHID-USER-z6nzrwuaij3spgyg23jt 1373035320
|
||||
24126 -1.622360 -11.265986 ... PHID-USER-lhtlnmkdbzlz6pbxaqdd 1430156915
|
||||
|
||||
[10 rows x 28 columns]
|
||||
job finished, cleaning up
|
||||
job pau at: Wed Oct 1 20:56:13 CDT 2025
|
||||
BIN
p2/quest/100125_description_pca.pkl
Normal file
BIN
p2/quest/100125_description_pca.pkl
Normal file
Binary file not shown.
108677
p2/quest/100125_subcomment_PCA_df.csv
Normal file
108677
p2/quest/100125_subcomment_PCA_df.csv
Normal file
File diff suppressed because one or more lines are too long
344
p2/quest/100125_subcomment_neurobiber-pca.log
Normal file
344
p2/quest/100125_subcomment_neurobiber-pca.log
Normal file
@ -0,0 +1,344 @@
|
||||
starting the job at: Wed Oct 1 21:22:40 CDT 2025
|
||||
setting up the environment
|
||||
running the neurobiber labeling script
|
||||
Number of PCs explaining 90% variance: 25
|
||||
Variance of each PCA component: [261.88760369 82.85870809 65.99452385 61.23806692 38.87318255
|
||||
32.55896743 26.32760005 21.94878602 18.68985685 16.2308729
|
||||
13.53042996 11.45815987 10.63830359 9.15456628 8.85934109
|
||||
8.30582956 8.03192941 7.15336822 6.76087663 6.48544413
|
||||
5.93048664 5.76580965 5.61052561 5.26965951 4.93708453]
|
||||
PC1:
|
||||
BIN_CAP: 0.683
|
||||
BIN_NNP: 0.645
|
||||
BIN_DET: -0.151
|
||||
BIN_PIN: -0.126
|
||||
BIN_PREP: -0.126
|
||||
BIN_VPRT: -0.091
|
||||
BIN_ART: -0.090
|
||||
BIN_RB: -0.085
|
||||
BIN_PRP: -0.076
|
||||
BIN_SBJP: -0.076
|
||||
PC2:
|
||||
BIN_NN: 0.750
|
||||
BIN_NNP: -0.310
|
||||
BIN_RB: -0.259
|
||||
BIN_PRP: -0.227
|
||||
BIN_SBJP: -0.227
|
||||
BIN_CAP: 0.227
|
||||
BIN_VPRT: -0.164
|
||||
BIN_FPP1: -0.114
|
||||
BIN_NUM: 0.104
|
||||
BIN_INF: -0.096
|
||||
PC3:
|
||||
BIN_CAP: 0.660
|
||||
BIN_NNP: -0.479
|
||||
BIN_RB: 0.251
|
||||
BIN_PRP: 0.220
|
||||
BIN_SBJP: 0.220
|
||||
BIN_PREP: 0.165
|
||||
BIN_PIN: 0.165
|
||||
BIN_X: -0.139
|
||||
BIN_VPRT: 0.133
|
||||
BIN_FPP1: 0.123
|
||||
PC4:
|
||||
BIN_PIN: 0.641
|
||||
BIN_PREP: 0.641
|
||||
BIN_NNP: 0.280
|
||||
BIN_RB: -0.173
|
||||
BIN_CONJ: 0.159
|
||||
BIN_NN: -0.077
|
||||
BIN_TO: 0.076
|
||||
BIN_X: -0.074
|
||||
BIN_VPRT: -0.066
|
||||
BIN_CAP: -0.059
|
||||
PC5:
|
||||
BIN_DET: 0.620
|
||||
BIN_ART: 0.381
|
||||
BIN_X: -0.276
|
||||
BIN_NN: 0.265
|
||||
BIN_VPRT: 0.262
|
||||
BIN_NNP: 0.247
|
||||
BIN_AUXB: 0.220
|
||||
BIN_NUM: -0.186
|
||||
BIN_INF: -0.166
|
||||
BIN_INDA: 0.157
|
||||
PC6:
|
||||
BIN_NN: 0.481
|
||||
BIN_PRP: 0.463
|
||||
BIN_SBJP: 0.463
|
||||
BIN_NNP: 0.243
|
||||
BIN_FPP1: 0.239
|
||||
BIN_DET: -0.184
|
||||
BIN_AUXB: -0.175
|
||||
BIN_PASS: -0.145
|
||||
BIN_CAP: -0.142
|
||||
BIN_PIT: 0.127
|
||||
PC7:
|
||||
BIN_RB: 0.780
|
||||
BIN_NN: 0.265
|
||||
BIN_DET: -0.189
|
||||
BIN_PRP: -0.187
|
||||
BIN_SBJP: -0.187
|
||||
BIN_JJ: -0.169
|
||||
BIN_NNP: 0.154
|
||||
BIN_X: -0.154
|
||||
BIN_TIME: 0.139
|
||||
BIN_ART: -0.137
|
||||
PC8:
|
||||
BIN_JJ: 0.676
|
||||
BIN_INF: 0.339
|
||||
BIN_VPRT: -0.318
|
||||
BIN_ART: 0.226
|
||||
BIN_PASS: -0.221
|
||||
BIN_AUXB: -0.218
|
||||
BIN_NUM: -0.215
|
||||
BIN_CONJ: -0.149
|
||||
BIN_RB: 0.146
|
||||
BIN_PEAS: -0.120
|
||||
PC9:
|
||||
BIN_INF: 0.655
|
||||
BIN_JJ: -0.538
|
||||
BIN_VPRT: -0.301
|
||||
BIN_DET: 0.247
|
||||
BIN_TO: 0.132
|
||||
BIN_ART: 0.129
|
||||
BIN_PRIV: 0.108
|
||||
BIN_NUM: 0.083
|
||||
BIN_RB: -0.079
|
||||
BIN_POMD: 0.072
|
||||
PC10:
|
||||
BIN_INF: 0.423
|
||||
BIN_AUXB: 0.375
|
||||
BIN_VPRT: 0.375
|
||||
BIN_JJ: 0.261
|
||||
BIN_ART: -0.261
|
||||
BIN_RB: -0.250
|
||||
BIN_VBD: -0.246
|
||||
BIN_X: -0.229
|
||||
BIN_DET: -0.214
|
||||
BIN_PASS: 0.171
|
||||
PC11:
|
||||
BIN_X: 0.786
|
||||
BIN_PUBV: -0.276
|
||||
BIN_VPRT: 0.266
|
||||
BIN_VBD: -0.264
|
||||
BIN_NUM: -0.197
|
||||
BIN_CONJ: -0.151
|
||||
BIN_JJ: -0.137
|
||||
BIN_INF: 0.099
|
||||
BIN_UH: -0.098
|
||||
BIN_NOMZ: -0.080
|
||||
PC12:
|
||||
BIN_NUM: 0.765
|
||||
BIN_VBD: -0.265
|
||||
BIN_VPRT: 0.211
|
||||
BIN_UH: -0.173
|
||||
BIN_RB: 0.163
|
||||
BIN_INDA: 0.145
|
||||
BIN_PGAS: -0.144
|
||||
BIN_JJ: 0.140
|
||||
BIN_QUOT: -0.140
|
||||
BIN_ART: 0.138
|
||||
PC13:
|
||||
BIN_VBD: 0.484
|
||||
BIN_QUOT: -0.401
|
||||
BIN_AUXB: 0.355
|
||||
BIN_CONT: -0.283
|
||||
BIN_PASS: 0.249
|
||||
BIN_X: 0.241
|
||||
BIN_UH: -0.200
|
||||
BIN_VPRT: -0.182
|
||||
BIN_NUM: 0.162
|
||||
BIN_PGAS: -0.136
|
||||
PC14:
|
||||
BIN_PUBV: 0.472
|
||||
BIN_CONJ: -0.395
|
||||
BIN_UH: -0.342
|
||||
BIN_VBD: 0.322
|
||||
BIN_QUOT: 0.291
|
||||
BIN_VPRT: 0.251
|
||||
BIN_CONT: 0.221
|
||||
BIN_NUM: 0.150
|
||||
BIN_PASS: -0.133
|
||||
BIN_TO: 0.128
|
||||
PC15:
|
||||
BIN_QUOT: 0.522
|
||||
BIN_CONT: 0.421
|
||||
BIN_PUBV: -0.315
|
||||
BIN_PGAS: -0.292
|
||||
BIN_CONJ: 0.238
|
||||
BIN_UH: -0.231
|
||||
BIN_NOMZ: -0.198
|
||||
BIN_PASS: 0.193
|
||||
BIN_VBD: 0.191
|
||||
BIN_AUXB: 0.169
|
||||
PC16:
|
||||
BIN_CONJ: 0.633
|
||||
BIN_PUBV: 0.516
|
||||
BIN_NUM: -0.248
|
||||
BIN_PGAS: -0.195
|
||||
BIN_UH: -0.170
|
||||
BIN_X: 0.160
|
||||
BIN_VPRT: 0.158
|
||||
BIN_ART: 0.157
|
||||
BIN_DEMP: -0.128
|
||||
BIN_TIME: -0.105
|
||||
PC17:
|
||||
BIN_UH: 0.685
|
||||
BIN_PGAS: -0.508
|
||||
BIN_VBD: 0.218
|
||||
BIN_CCONJ: -0.206
|
||||
BIN_VPRT: 0.160
|
||||
BIN_NOMZ: -0.147
|
||||
BIN_CONJ: -0.129
|
||||
BIN_PUBV: 0.117
|
||||
BIN_ART: 0.101
|
||||
BIN_INDA: 0.099
|
||||
PC18:
|
||||
BIN_ART: 0.458
|
||||
BIN_DET: -0.343
|
||||
BIN_DEMO: -0.305
|
||||
BIN_DEMP: -0.285
|
||||
BIN_INDA: 0.275
|
||||
BIN_CCONJ: 0.237
|
||||
BIN_AUXB: 0.216
|
||||
BIN_PIT: 0.216
|
||||
BIN_CONJ: -0.212
|
||||
BIN_FPP1: -0.211
|
||||
PC19:
|
||||
BIN_PGAS: 0.633
|
||||
BIN_CCONJ: -0.324
|
||||
BIN_UH: 0.322
|
||||
BIN_AUXB: 0.268
|
||||
BIN_CONJ: 0.251
|
||||
BIN_PRIV: 0.213
|
||||
BIN_BEMA: 0.153
|
||||
BIN_TIME: -0.129
|
||||
BIN_PROD: -0.128
|
||||
BIN_NUM: 0.119
|
||||
PC20:
|
||||
BIN_PRIV: 0.446
|
||||
BIN_QUES: -0.421
|
||||
BIN_CCONJ: 0.397
|
||||
BIN_VPRT: 0.238
|
||||
BIN_FPP1: 0.228
|
||||
BIN_AUXB: -0.206
|
||||
BIN_VBD: 0.196
|
||||
BIN_BEMA: -0.176
|
||||
BIN_PIT: -0.157
|
||||
BIN_SPP2: -0.149
|
||||
PC21:
|
||||
BIN_NOMZ: 0.493
|
||||
BIN_PRIV: 0.470
|
||||
BIN_CCONJ: -0.319
|
||||
BIN_PUBV: -0.287
|
||||
BIN_VBD: 0.191
|
||||
BIN_SCONJ: 0.180
|
||||
BIN_NUM: -0.174
|
||||
BIN_PGAS: -0.161
|
||||
BIN_UH: -0.157
|
||||
BIN_DEMP: -0.150
|
||||
PC22:
|
||||
BIN_CCONJ: 0.511
|
||||
BIN_QUES: 0.398
|
||||
BIN_CONJ: 0.252
|
||||
BIN_PASS: -0.240
|
||||
BIN_BEMA: 0.221
|
||||
BIN_WH: 0.204
|
||||
BIN_DEMO: -0.183
|
||||
BIN_PEAS: -0.172
|
||||
BIN_VBD: 0.167
|
||||
BIN_SCONJ: 0.161
|
||||
PC23:
|
||||
BIN_NOMZ: 0.623
|
||||
BIN_VBD: -0.245
|
||||
BIN_AUXB: 0.197
|
||||
BIN_QUES: -0.193
|
||||
BIN_INDA: -0.191
|
||||
BIN_PGAS: -0.186
|
||||
BIN_SPAU: 0.185
|
||||
BIN_VPRT: -0.180
|
||||
BIN_FPP1: 0.171
|
||||
BIN_DEMP: 0.161
|
||||
PC24:
|
||||
BIN_DEMO: 0.550
|
||||
BIN_TIME: -0.411
|
||||
BIN_DEMP: -0.407
|
||||
BIN_CCONJ: 0.320
|
||||
BIN_XX0: 0.232
|
||||
BIN_QUOT: -0.170
|
||||
BIN_CONT: 0.169
|
||||
BIN_PROD: -0.111
|
||||
BIN_PEAS: 0.108
|
||||
BIN_BEMA: 0.106
|
||||
PC25:
|
||||
BIN_TIME: 0.451
|
||||
BIN_XX0: -0.339
|
||||
BIN_SPAU: -0.327
|
||||
BIN_QUOT: 0.318
|
||||
BIN_DEMO: 0.293
|
||||
BIN_DEMP: -0.288
|
||||
BIN_PRIV: 0.214
|
||||
BIN_BEMA: 0.171
|
||||
BIN_CONT: -0.152
|
||||
BIN_FPP1: 0.150
|
||||
Top 10 PC1 values:
|
||||
PC1 PC2 ... AuthorPHID date_created
|
||||
23531 124.439666 -17.084926 ... PHID-USER-arjqb24x4oae7awzpfp6 1424754141
|
||||
707 124.420877 -17.096490 ... PHID-USER-pun3sjvg3cemjzbgyo2t 1363132183
|
||||
744 124.420877 -17.096490 ... PHID-USER-fovtl67ew4l4cc3oeypc 1353551242
|
||||
749 124.420877 -17.096490 ... PHID-USER-fovtl67ew4l4cc3oeypc 1353384355
|
||||
2243 124.420877 -17.096490 ... PHID-USER-fovtl67ew4l4cc3oeypc 1356175107
|
||||
5921 124.420877 -17.096490 ... PHID-USER-fovtl67ew4l4cc3oeypc 1353366778
|
||||
5933 124.420877 -17.096490 ... PHID-USER-fovtl67ew4l4cc3oeypc 1353123761
|
||||
5935 124.420877 -17.096490 ... PHID-USER-fovtl67ew4l4cc3oeypc 1353386649
|
||||
10080 124.420877 -17.096490 ... PHID-USER-fovtl67ew4l4cc3oeypc 1366298361
|
||||
10418 124.420877 -17.096490 ... PHID-USER-fovtl67ew4l4cc3oeypc 1355363288
|
||||
|
||||
[10 rows x 28 columns]
|
||||
|
||||
Bottom 10 PC1 values:
|
||||
PC1 PC2 ... AuthorPHID date_created
|
||||
13752 -24.770207 2.744701 ... PHID-USER-43lnvui4hacyjrc2lflj 1384635692
|
||||
14250 -24.494552 0.056822 ... PHID-USER-fo56wm4wxiwpoofn2xdu 1383955246
|
||||
24560 -23.004747 -10.092946 ... PHID-USER-lzhljhpbm3qfphvqyill 1439545382
|
||||
22484 -22.583854 -1.262632 ... PHID-USER-fo56wm4wxiwpoofn2xdu 1440034588
|
||||
13907 -22.517418 10.551266 ... PHID-USER-kqibbfgfpgocyzwe32lv 1372691031
|
||||
14784 -22.018336 -17.579192 ... PHID-USER-xezsyhikbr7hjrig2ofp 1644598152
|
||||
23443 -21.721400 -10.425522 ... PHID-USER-ppytiem7rcsbnstfsrvq 1502483520
|
||||
19108 -21.707016 26.037928 ... PHID-USER-xy6c3ul27f336aaedx2d 1417798935
|
||||
19140 -21.620289 -0.871317 ... PHID-USER-fo56wm4wxiwpoofn2xdu 1432914688
|
||||
21658 -21.107540 10.458097 ... PHID-USER-x7ti5ksby4ubsabntlxa 1482280859
|
||||
|
||||
[10 rows x 28 columns]
|
||||
Top 10 PC2 values:
|
||||
PC1 PC2 ... AuthorPHID date_created
|
||||
117 53.656282 89.055606 ... PHID-USER-7ey733eainlhx5xqp4d3 1375331403
|
||||
2447 53.318077 88.847447 ... PHID-USER-dw53c5cb2qfhyemej57o 1456539439
|
||||
2471 53.318077 88.847447 ... PHID-USER-r7wrkcx7j2vutqs6hr3g 1384994015
|
||||
21231 53.299288 88.835882 ... PHID-USER-lhtlnmkdbzlz6pbxaqdd 1442087854
|
||||
2728 19.161791 77.490370 ... PHID-USER-it53o2f2kyryqyj33uzt 1377545735
|
||||
5024 19.161791 77.490370 ... PHID-USER-it53o2f2kyryqyj33uzt 1377544792
|
||||
5135 19.161791 77.490370 ... PHID-USER-it53o2f2kyryqyj33uzt 1377544788
|
||||
17487 -14.919339 66.179552 ... PHID-USER-zjzhrhmn36icnzbckqy4 1327978205
|
||||
17377 -14.938128 66.167987 ... PHID-USER-wrimmmr5w2zt7nk2t753 1317586881
|
||||
23739 -14.994495 66.133294 ... PHID-USER-ydswvwhh5pm4lshahjje 1462375135
|
||||
|
||||
[10 rows x 28 columns]
|
||||
|
||||
Bottom 10 PC2 values:
|
||||
PC1 PC2 ... AuthorPHID date_created
|
||||
14532 55.995569 -39.880029 ... PHID-USER-z3kqk2bjnqneldcznht6 1384007851
|
||||
6321 56.014358 -39.868465 ... PHID-USER-my5s6nat437le6q5fq7d 1555396632
|
||||
6322 56.014358 -39.868465 ... PHID-USER-my5s6nat437le6q5fq7d 1457021044
|
||||
6770 56.014358 -39.868465 ... PHID-USER-qduasitr62ffvc5eiivd 1445420618
|
||||
6771 56.014358 -39.868465 ... PHID-USER-qduasitr62ffvc5eiivd 1445420596
|
||||
10442 56.014358 -39.868465 ... PHID-USER-unpoeiyj52rmcfqi5rbw 1604534063
|
||||
10443 56.014358 -39.868465 ... PHID-USER-unpoeiyj52rmcfqi5rbw 1604532057
|
||||
10528 56.014358 -39.868465 ... PHID-USER-wkpnidxoctuhawexig5p 1429475491
|
||||
10529 56.014358 -39.868465 ... PHID-USER-wkpnidxoctuhawexig5p 1429475153
|
||||
11837 56.014358 -39.868465 ... PHID-USER-25bxvdt2svnidzfzjpk7 1453561129
|
||||
|
||||
[10 rows x 28 columns]
|
||||
job finished, cleaning up
|
||||
job pau at: Wed Oct 1 21:23:06 CDT 2025
|
||||
BIN
p2/quest/100125_subcomment_pca.pkl
Normal file
BIN
p2/quest/100125_subcomment_pca.pkl
Normal file
Binary file not shown.
BIN
p2/quest/description_closed_relevance_100125_biber_pca_final.png
Normal file
BIN
p2/quest/description_closed_relevance_100125_biber_pca_final.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.6 MiB |
@ -38,25 +38,24 @@ def format_df_data(df):
|
||||
return x
|
||||
|
||||
if __name__ == "__main__":
|
||||
biber_vec_df = pd.read_csv("/home/nws8519/git/mw-lifecycle-analysis/p2/quest/092325_biberplus_complete_labels.csv", low_memory=False)
|
||||
biber_vec_df = biber_vec_df[biber_vec_df['comment_type'] == 'task_description']
|
||||
biber_vec_df = pd.read_csv("/home/nws8519/git/mw-lifecycle-analysis/analysis_data/092925_unified_phab.csv", low_memory=False)
|
||||
biber_vec_df = biber_vec_df[biber_vec_df['comment_type'] != 'task_description']
|
||||
#biber_vec_df = biber_vec_df[biber_vec_df['AuthorPHID'] != "PHID-USER-idceizaw6elwiwm5xshb"]
|
||||
#biber_vec_df = biber_vec_df[biber_vec_df['comment_text'] != 'nan']
|
||||
biber_vecs = format_df_data(biber_vec_df)
|
||||
#handoff to PCA model
|
||||
'''
|
||||
pca_trial = PCA()
|
||||
biber_vecs_pca_trial = pca_trial.fit_transform(biber_vecs)
|
||||
|
||||
explained_variance = pca_trial.explained_variance_ratio_
|
||||
cumulative_variance = np.cumsum(explained_variance)
|
||||
|
||||
n_components = np.argmax(cumulative_variance >= 0.90) + 1
|
||||
print(f"Number of PCs explaining 90% variance: {n_components}")
|
||||
'''
|
||||
pca = PCA(n_components=18)
|
||||
argmax_components = np.argmax(cumulative_variance >= 0.90) + 1
|
||||
print(f"Number of PCs explaining 90% variance: {argmax_components}")
|
||||
|
||||
pca = PCA(n_components=argmax_components)
|
||||
biber_vecs_pca = pca.fit_transform(biber_vecs)
|
||||
with open('092525_description_pca.pkl', 'wb') as f:
|
||||
with open('100125_subcomment_pca.pkl', 'wb') as f:
|
||||
pickle.dump(pca, f)
|
||||
selected_axis = "closed_relevance"
|
||||
|
||||
@ -82,9 +81,13 @@ if __name__ == "__main__":
|
||||
pc_dict['week_index'] = biber_vec_df['week_index']
|
||||
pc_dict['priority'] = biber_vec_df['priority']
|
||||
pc_dict['closed_relevance'] = biber_vec_df['closed_relevance']
|
||||
pc_dict['TaskPHID'] = biber_vec_df['TaskPHID']
|
||||
pc_dict['AuthorPHID'] = biber_vec_df['AuthorPHID']
|
||||
pc_dict['date_created'] = biber_vec_df['date_created']
|
||||
|
||||
|
||||
plot_df = pd.DataFrame(pc_dict)
|
||||
#plot_df.to_csv("092325_subcomment_PCA_df.csv", index=False)
|
||||
plot_df.to_csv("100125_subcomment_PCA_df.csv", index=False)
|
||||
|
||||
print("Top 10 PC1 values:")
|
||||
print(plot_df.nlargest(10, "PC1"))
|
||||
@ -97,12 +100,12 @@ if __name__ == "__main__":
|
||||
print(plot_df.nsmallest(10, "PC2"))
|
||||
|
||||
|
||||
g = sns.FacetGrid(plot_df, col="source", row="phase", hue=selected_axis, palette="tab10", height=4, sharex=False, sharey=False)
|
||||
g.map_dataframe(sns.scatterplot, x="PC1", y="PC2", alpha=0.7, s=40)
|
||||
g.add_legend(title=selected_axis)
|
||||
g.set_axis_labels("PC1", "PC2")
|
||||
g.fig.subplots_adjust(top=0.9)
|
||||
g.fig.suptitle(f"PCA by {selected_axis}, faceted by source")
|
||||
#g = sns.FacetGrid(plot_df, col="source", row="phase", hue=selected_axis, palette="tab10", height=4, sharex=False, sharey=False)
|
||||
#g.map_dataframe(sns.scatterplot, x="PC1", y="PC2", alpha=0.7, s=40)
|
||||
#g.add_legend(title=selected_axis)
|
||||
#g.set_axis_labels("PC1", "PC2")
|
||||
#g.fig.subplots_adjust(top=0.9)
|
||||
#g.fig.suptitle(f"PCA by {selected_axis}, faceted by source")
|
||||
|
||||
#plt.savefig("090225_biber_pca_plot.png", dpi=300)
|
||||
'''
|
||||
@ -120,6 +123,6 @@ if __name__ == "__main__":
|
||||
plt.ylabel('component 2')
|
||||
plt.legend(title=selected_axis, bbox_to_anchor=(1.05, 1), loc=2)
|
||||
'''
|
||||
g.fig.tight_layout()
|
||||
g.savefig(f"description_{selected_axis}_092525_biber_pca_final.png", dpi=300)
|
||||
plt.show()
|
||||
#g.fig.tight_layout()
|
||||
#g.savefig(f"subcomment_{selected_axis}_100125_biber_pca_final.png", dpi=300)
|
||||
#plt.show()
|
||||
|
||||
@ -8,7 +8,7 @@
|
||||
#SBATCH --mem=64G
|
||||
#SBATCH --cpus-per-task=4
|
||||
#SBATCH --job-name=neurobiber-pca
|
||||
#SBATCH --output=092525_neurobiber-pca.log
|
||||
#SBATCH --output=100125_subcomment_neurobiber-pca.log
|
||||
#SBATCH --mail-type=BEGIN,END,FAIL
|
||||
#SBATCH --mail-user=gaughan@u.northwestern.edu
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user