updating with new PCA run
This commit is contained in:
parent
e0cb055ff7
commit
df1dcf1224
421
p2/quest/121625_total_neurobiber-pca.log
Normal file
421
p2/quest/121625_total_neurobiber-pca.log
Normal file
@ -0,0 +1,421 @@
|
|||||||
|
starting the job at: Tue Dec 16 15:26:31 CST 2025
|
||||||
|
setting up the environment
|
||||||
|
running the neurobiber labeling script
|
||||||
|
0 [The #Cloud-Services project tag is not intend...
|
||||||
|
1 [Token is used for 2-factor auth., I'm surpris...
|
||||||
|
2 [Oh, of course it's visible since you /might/ ...
|
||||||
|
3 [Can this be closed?, We can now use the proxy...
|
||||||
|
4 [I just now tried creating a new instance, and...
|
||||||
|
...
|
||||||
|
25022 [I think this should be low priority., Only po...
|
||||||
|
25023 [Go to some long article, scroll all the way d...
|
||||||
|
25024 [In Microsoft Word, both character-level styli...
|
||||||
|
25025 [LocalSettings.php lacks wgSecureLogin, wgCook...
|
||||||
|
25026 [``CODE`CODE$wgSecureLoginCODE$wgServerCODE$wg...
|
||||||
|
Name: olmo_cleaned_sentences, Length: 21302, dtype: object
|
||||||
|
[[11. ]
|
||||||
|
[15.5]
|
||||||
|
[10.5]
|
||||||
|
...
|
||||||
|
[26. ]
|
||||||
|
[18. ]
|
||||||
|
[ 5. ]]
|
||||||
|
Number of PCs explaining 90% variance: 25
|
||||||
|
Variance of each PCA component: [227.24126404 147.97706893 75.8294832 65.33178909 59.86158651
|
||||||
|
35.58328853 32.35739864 25.76291291 21.24897975 18.6584478
|
||||||
|
16.64923169 15.07461057 11.37754832 10.83761848 9.1933966
|
||||||
|
8.88840899 8.1956979 8.05178179 7.19167725 6.9942501
|
||||||
|
6.65183757 6.39961806 6.22347534 5.83078813 5.49524439]
|
||||||
|
PC1:
|
||||||
|
normalized_CAP: 0.613
|
||||||
|
normalized_NNP: 0.566
|
||||||
|
median_sentence_length: -0.473
|
||||||
|
normalized_DET: -0.123
|
||||||
|
normalized_PIN: -0.111
|
||||||
|
normalized_PREP: -0.111
|
||||||
|
normalized_ART: -0.088
|
||||||
|
normalized_NN: -0.088
|
||||||
|
normalized_VPRT: -0.056
|
||||||
|
normalized_JJ: -0.055
|
||||||
|
PC2:
|
||||||
|
median_sentence_length: 0.860
|
||||||
|
normalized_NNP: 0.436
|
||||||
|
normalized_CAP: 0.184
|
||||||
|
normalized_DET: -0.083
|
||||||
|
normalized_NN: -0.075
|
||||||
|
normalized_VPRT: -0.058
|
||||||
|
normalized_RB: -0.050
|
||||||
|
normalized_PIN: -0.049
|
||||||
|
normalized_PREP: -0.049
|
||||||
|
normalized_AUXB: -0.048
|
||||||
|
PC3:
|
||||||
|
normalized_NN: 0.692
|
||||||
|
normalized_CAP: 0.451
|
||||||
|
normalized_NNP: -0.303
|
||||||
|
normalized_RB: -0.190
|
||||||
|
normalized_PREP: 0.184
|
||||||
|
normalized_PIN: 0.184
|
||||||
|
normalized_PRP: -0.165
|
||||||
|
normalized_SBJP: -0.165
|
||||||
|
normalized_VPRT: -0.107
|
||||||
|
median_sentence_length: 0.100
|
||||||
|
PC4:
|
||||||
|
normalized_PREP: 0.484
|
||||||
|
normalized_PIN: 0.484
|
||||||
|
normalized_CAP: 0.373
|
||||||
|
normalized_NN: -0.340
|
||||||
|
normalized_PRP: 0.244
|
||||||
|
normalized_SBJP: 0.244
|
||||||
|
normalized_RB: 0.169
|
||||||
|
normalized_INF: 0.168
|
||||||
|
normalized_FPP1: 0.135
|
||||||
|
normalized_NNP: -0.118
|
||||||
|
PC5:
|
||||||
|
normalized_NNP: 0.463
|
||||||
|
normalized_CAP: -0.426
|
||||||
|
normalized_PIN: 0.421
|
||||||
|
normalized_PREP: 0.421
|
||||||
|
normalized_RB: -0.305
|
||||||
|
normalized_SBJP: -0.178
|
||||||
|
normalized_PRP: -0.178
|
||||||
|
median_sentence_length: -0.129
|
||||||
|
normalized_CONJ: 0.112
|
||||||
|
normalized_VPRT: -0.108
|
||||||
|
PC6:
|
||||||
|
normalized_DET: 0.682
|
||||||
|
normalized_ART: 0.406
|
||||||
|
normalized_VPRT: 0.300
|
||||||
|
normalized_AUXB: 0.244
|
||||||
|
normalized_NNP: 0.191
|
||||||
|
normalized_INDA: 0.178
|
||||||
|
normalized_NUM: -0.142
|
||||||
|
normalized_DEMP: 0.124
|
||||||
|
normalized_NN: 0.114
|
||||||
|
normalized_BEMA: 0.112
|
||||||
|
PC7:
|
||||||
|
normalized_PRP: 0.482
|
||||||
|
normalized_SBJP: 0.482
|
||||||
|
normalized_NN: 0.455
|
||||||
|
normalized_FPP1: 0.253
|
||||||
|
normalized_NNP: 0.242
|
||||||
|
normalized_RB: -0.201
|
||||||
|
normalized_AUXB: -0.168
|
||||||
|
normalized_PASS: -0.144
|
||||||
|
normalized_CAP: -0.143
|
||||||
|
normalized_PIT: 0.128
|
||||||
|
PC8:
|
||||||
|
normalized_RB: 0.804
|
||||||
|
normalized_NN: 0.344
|
||||||
|
normalized_NNP: 0.219
|
||||||
|
normalized_NUM: -0.157
|
||||||
|
normalized_CAP: -0.137
|
||||||
|
normalized_DET: -0.129
|
||||||
|
normalized_TIME: 0.125
|
||||||
|
normalized_XX0: 0.110
|
||||||
|
normalized_SPAU: 0.101
|
||||||
|
normalized_PRP: -0.092
|
||||||
|
PC9:
|
||||||
|
normalized_JJ: 0.578
|
||||||
|
normalized_INF: 0.353
|
||||||
|
sentence_count: 0.314
|
||||||
|
normalized_VPRT: -0.302
|
||||||
|
normalized_NUM: -0.263
|
||||||
|
normalized_ART: 0.229
|
||||||
|
normalized_PASS: -0.218
|
||||||
|
normalized_AUXB: -0.214
|
||||||
|
normalized_CONJ: -0.139
|
||||||
|
normalized_DET: 0.125
|
||||||
|
PC10:
|
||||||
|
normalized_JJ: 0.568
|
||||||
|
normalized_INF: -0.553
|
||||||
|
normalized_VPRT: 0.390
|
||||||
|
normalized_DET: -0.252
|
||||||
|
normalized_ART: -0.140
|
||||||
|
normalized_AUXB: 0.124
|
||||||
|
normalized_NUM: -0.124
|
||||||
|
sentence_count: 0.116
|
||||||
|
normalized_TO: -0.101
|
||||||
|
normalized_BEMA: 0.093
|
||||||
|
PC11:
|
||||||
|
sentence_count: 0.617
|
||||||
|
normalized_INF: -0.440
|
||||||
|
normalized_JJ: -0.321
|
||||||
|
normalized_AUXB: -0.271
|
||||||
|
normalized_VPRT: -0.190
|
||||||
|
normalized_ART: 0.184
|
||||||
|
normalized_RB: 0.170
|
||||||
|
normalized_TO: -0.140
|
||||||
|
normalized_PASS: -0.118
|
||||||
|
normalized_DET: 0.117
|
||||||
|
PC12:
|
||||||
|
sentence_count: 0.639
|
||||||
|
normalized_VPRT: 0.347
|
||||||
|
normalized_INF: 0.325
|
||||||
|
normalized_AUXB: 0.280
|
||||||
|
normalized_VBD: -0.240
|
||||||
|
normalized_JJ: -0.223
|
||||||
|
normalized_PASS: 0.157
|
||||||
|
normalized_ART: -0.145
|
||||||
|
normalized_DET: -0.130
|
||||||
|
normalized_PUBV: -0.124
|
||||||
|
PC13:
|
||||||
|
normalized_NUM: 0.592
|
||||||
|
normalized_VBD: -0.482
|
||||||
|
normalized_AUXB: -0.289
|
||||||
|
normalized_VPRT: 0.263
|
||||||
|
normalized_PASS: -0.235
|
||||||
|
normalized_ART: 0.157
|
||||||
|
normalized_CONJ: 0.150
|
||||||
|
normalized_INDA: 0.134
|
||||||
|
normalized_TIME: -0.133
|
||||||
|
normalized_PUBV: -0.130
|
||||||
|
PC14:
|
||||||
|
normalized_NUM: 0.479
|
||||||
|
normalized_QUOT: -0.361
|
||||||
|
normalized_VBD: 0.317
|
||||||
|
normalized_JJ: 0.276
|
||||||
|
normalized_AUXB: 0.246
|
||||||
|
normalized_CONT: -0.243
|
||||||
|
sentence_count: 0.226
|
||||||
|
normalized_PUBV: 0.204
|
||||||
|
normalized_VPRT: -0.204
|
||||||
|
normalized_CONJ: 0.173
|
||||||
|
PC15:
|
||||||
|
normalized_PUBV: 0.534
|
||||||
|
normalized_CONJ: -0.332
|
||||||
|
normalized_VBD: 0.308
|
||||||
|
normalized_UH: -0.306
|
||||||
|
normalized_QUOT: 0.294
|
||||||
|
normalized_VPRT: 0.236
|
||||||
|
normalized_CONT: 0.212
|
||||||
|
normalized_NUM: 0.196
|
||||||
|
normalized_PASS: -0.162
|
||||||
|
normalized_TO: 0.135
|
||||||
|
PC16:
|
||||||
|
normalized_QUOT: 0.572
|
||||||
|
normalized_CONT: 0.438
|
||||||
|
normalized_CONJ: 0.287
|
||||||
|
normalized_PGAS: -0.274
|
||||||
|
normalized_PUBV: -0.245
|
||||||
|
normalized_NOMZ: -0.204
|
||||||
|
normalized_UH: -0.164
|
||||||
|
normalized_PASS: 0.160
|
||||||
|
normalized_VBD: 0.155
|
||||||
|
normalized_VPRT: -0.151
|
||||||
|
PC17:
|
||||||
|
normalized_PUBV: 0.491
|
||||||
|
normalized_CONJ: 0.476
|
||||||
|
normalized_PGAS: -0.306
|
||||||
|
normalized_NUM: -0.295
|
||||||
|
normalized_ART: 0.246
|
||||||
|
normalized_UH: 0.198
|
||||||
|
normalized_VPRT: 0.196
|
||||||
|
normalized_DEMP: -0.178
|
||||||
|
normalized_DET: -0.163
|
||||||
|
normalized_INDA: 0.145
|
||||||
|
PC18:
|
||||||
|
normalized_UH: 0.757
|
||||||
|
normalized_PGAS: -0.319
|
||||||
|
normalized_CONJ: -0.290
|
||||||
|
normalized_NOMZ: -0.255
|
||||||
|
normalized_VBD: 0.167
|
||||||
|
normalized_CCONJ: -0.161
|
||||||
|
normalized_NUM: 0.139
|
||||||
|
normalized_SCONJ: 0.097
|
||||||
|
normalized_JJ: 0.091
|
||||||
|
sentence_count: 0.077
|
||||||
|
PC19:
|
||||||
|
normalized_ART: 0.442
|
||||||
|
normalized_CONJ: -0.352
|
||||||
|
normalized_DEMO: -0.294
|
||||||
|
normalized_DET: -0.286
|
||||||
|
normalized_AUXB: 0.279
|
||||||
|
normalized_DEMP: -0.242
|
||||||
|
normalized_PIT: 0.234
|
||||||
|
normalized_INDA: 0.226
|
||||||
|
normalized_FPP1: -0.211
|
||||||
|
normalized_NUM: 0.172
|
||||||
|
PC20:
|
||||||
|
normalized_PGAS: 0.741
|
||||||
|
normalized_UH: 0.330
|
||||||
|
normalized_X: -0.274
|
||||||
|
normalized_CONJ: 0.238
|
||||||
|
normalized_AUXB: 0.157
|
||||||
|
normalized_CONT: 0.149
|
||||||
|
normalized_QUES: -0.136
|
||||||
|
normalized_NUM: 0.116
|
||||||
|
normalized_PUBV: 0.115
|
||||||
|
normalized_NOMZ: -0.114
|
||||||
|
PC21:
|
||||||
|
normalized_CCONJ: 0.608
|
||||||
|
normalized_QUES: -0.369
|
||||||
|
normalized_X: -0.250
|
||||||
|
normalized_AUXB: -0.218
|
||||||
|
normalized_PRIV: 0.214
|
||||||
|
normalized_VPRT: 0.208
|
||||||
|
normalized_BEMA: -0.191
|
||||||
|
normalized_TIME: 0.185
|
||||||
|
normalized_FPP1: 0.171
|
||||||
|
normalized_SCONJ: -0.135
|
||||||
|
PC22:
|
||||||
|
normalized_X: 0.585
|
||||||
|
normalized_PRIV: 0.499
|
||||||
|
normalized_QUES: -0.278
|
||||||
|
normalized_CCONJ: -0.270
|
||||||
|
normalized_VBD: 0.232
|
||||||
|
normalized_DEMO: -0.162
|
||||||
|
normalized_PUBV: -0.154
|
||||||
|
normalized_FPP1: 0.153
|
||||||
|
normalized_CONJ: 0.139
|
||||||
|
normalized_ART: 0.101
|
||||||
|
PC23:
|
||||||
|
normalized_NOMZ: 0.568
|
||||||
|
normalized_X: -0.520
|
||||||
|
normalized_CCONJ: -0.308
|
||||||
|
normalized_PRIV: 0.276
|
||||||
|
normalized_PUBV: -0.162
|
||||||
|
normalized_PASS: 0.147
|
||||||
|
normalized_FPP1: 0.128
|
||||||
|
normalized_CONJ: -0.126
|
||||||
|
normalized_PGAS: -0.125
|
||||||
|
normalized_DEMO: 0.122
|
||||||
|
PC24:
|
||||||
|
normalized_QUES: 0.430
|
||||||
|
normalized_CCONJ: 0.385
|
||||||
|
normalized_CONJ: 0.280
|
||||||
|
normalized_PRIV: 0.278
|
||||||
|
normalized_VBD: 0.259
|
||||||
|
normalized_WH: 0.242
|
||||||
|
normalized_PUBV: -0.237
|
||||||
|
normalized_SCONJ: 0.209
|
||||||
|
normalized_PASS: -0.198
|
||||||
|
normalized_TIME: -0.191
|
||||||
|
PC25:
|
||||||
|
normalized_DEMP: 0.448
|
||||||
|
normalized_NOMZ: 0.443
|
||||||
|
normalized_DEMO: -0.420
|
||||||
|
normalized_INDA: -0.209
|
||||||
|
normalized_VBD: -0.199
|
||||||
|
normalized_SPAU: 0.194
|
||||||
|
normalized_PRIV: -0.186
|
||||||
|
normalized_UH: 0.186
|
||||||
|
normalized_PEAS: -0.182
|
||||||
|
normalized_NUM: 0.157
|
||||||
|
Top 10 PC1 values:
|
||||||
|
PC1 PC2 ... date_created comment_type
|
||||||
|
2471 118.825452 48.883675 ... 1424754141 task_subcomment
|
||||||
|
971 118.822061 48.877781 ... 1354316739 task_subcomment
|
||||||
|
984 118.822061 48.877781 ... 1359160095 task_subcomment
|
||||||
|
987 118.822061 48.877781 ... 1362102239 task_subcomment
|
||||||
|
989 118.822061 48.877781 ... 1362441994 task_subcomment
|
||||||
|
1486 118.822061 48.877781 ... 1362478487 task_subcomment
|
||||||
|
3708 118.822061 48.877781 ... 1344625237 task_subcomment
|
||||||
|
3714 118.822061 48.877781 ... 1345813989 task_subcomment
|
||||||
|
3720 118.822061 48.877781 ... 1348771229 task_subcomment
|
||||||
|
3730 118.822061 48.877781 ... 1349619536 task_subcomment
|
||||||
|
|
||||||
|
[10 rows x 36 columns]
|
||||||
|
|
||||||
|
Bottom 10 PC1 values:
|
||||||
|
PC1 PC2 ... date_created comment_type
|
||||||
|
24881 -254.291673 450.247095 ... 1350678600 task_description
|
||||||
|
4413 -218.707675 406.488424 ... 1463441072 task_subcomment
|
||||||
|
4412 -218.516235 406.636956 ... 1463441050 task_subcomment
|
||||||
|
18176 -142.494770 253.636183 ... 1380947348 task_subcomment
|
||||||
|
11326 -140.543606 274.762080 ... 1354470131 task_subcomment
|
||||||
|
6778 -107.601033 180.118546 ... 1374730027 task_subcomment
|
||||||
|
13442 -100.902011 197.908102 ... 1440633395 task_subcomment
|
||||||
|
693 -98.717155 181.023968 ... 1379611711 task_subcomment
|
||||||
|
4410 -94.493897 160.641828 ... 1463439992 task_subcomment
|
||||||
|
46 -89.814365 179.163194 ... 1441031208 task_subcomment
|
||||||
|
|
||||||
|
[10 rows x 36 columns]
|
||||||
|
Top 10 PC2 values:
|
||||||
|
PC1 PC2 ... date_created comment_type
|
||||||
|
24881 -254.291673 450.247095 ... 1350678600 task_description
|
||||||
|
4412 -218.516235 406.636956 ... 1463441050 task_subcomment
|
||||||
|
4413 -218.707675 406.488424 ... 1463441072 task_subcomment
|
||||||
|
11326 -140.543606 274.762080 ... 1354470131 task_subcomment
|
||||||
|
18176 -142.494770 253.636183 ... 1380947348 task_subcomment
|
||||||
|
13442 -100.902011 197.908102 ... 1440633395 task_subcomment
|
||||||
|
693 -98.717155 181.023968 ... 1379611711 task_subcomment
|
||||||
|
6778 -107.601033 180.118546 ... 1374730027 task_subcomment
|
||||||
|
46 -89.814365 179.163194 ... 1441031208 task_subcomment
|
||||||
|
4410 -94.493897 160.641828 ... 1463439992 task_subcomment
|
||||||
|
|
||||||
|
[10 rows x 36 columns]
|
||||||
|
|
||||||
|
Bottom 10 PC2 values:
|
||||||
|
PC1 PC2 ... date_created comment_type
|
||||||
|
12410 -7.877613 -20.755159 ... 1422554389 task_subcomment
|
||||||
|
22835 -7.877613 -20.755159 ... 1462375135 task_subcomment
|
||||||
|
19501 -7.867440 -20.737476 ... 1317586881 task_subcomment
|
||||||
|
24870 -7.864049 -20.731582 ... 1327978205 task_subcomment
|
||||||
|
18694 -10.567395 -19.469923 ... 1377104818 task_subcomment
|
||||||
|
1330 -4.665646 -19.045749 ... 1321220595 task_subcomment
|
||||||
|
1103 -5.695317 -18.884254 ... 1428954897 task_subcomment
|
||||||
|
14631 -5.958980 -18.052657 ... 1412324629 task_subcomment
|
||||||
|
598 -15.028547 -17.737022 ... 1384635692 task_subcomment
|
||||||
|
4154 -10.280136 -17.191996 ... 1380638194 task_subcomment
|
||||||
|
|
||||||
|
[10 rows x 36 columns]
|
||||||
|
Top 10 PC3 values:
|
||||||
|
PC1 PC2 ... date_created comment_type
|
||||||
|
1639 53.461173 -2.188626 ... 1375331403 task_subcomment
|
||||||
|
3549 53.400133 -2.294723 ... 1456539439 task_subcomment
|
||||||
|
15715 53.400133 -2.294723 ... 1384994015 task_subcomment
|
||||||
|
12766 53.396742 -2.300618 ... 1442087854 task_subcomment
|
||||||
|
11104 22.288240 -10.664704 ... 1377544788 task_subcomment
|
||||||
|
11105 22.288240 -10.664704 ... 1377544792 task_subcomment
|
||||||
|
11107 22.288240 -10.664704 ... 1377545735 task_subcomment
|
||||||
|
11911 57.304544 2.321048 ... 1350946140 task_subcomment
|
||||||
|
3474 14.292880 -10.641835 ... 1374012685 task_subcomment
|
||||||
|
13254 35.952938 -3.871137 ... 1434130529 task_subcomment
|
||||||
|
|
||||||
|
[10 rows x 36 columns]
|
||||||
|
|
||||||
|
Bottom 10 PC3 values:
|
||||||
|
PC1 PC2 ... date_created comment_type
|
||||||
|
103 58.026764 29.547741 ... 1453561068 task_subcomment
|
||||||
|
104 58.026764 29.547741 ... 1453561129 task_subcomment
|
||||||
|
168 58.026764 29.547741 ... 1420466644 task_subcomment
|
||||||
|
169 58.026764 29.547741 ... 1420473867 task_subcomment
|
||||||
|
506 58.026764 29.547741 ... 1491557250 task_subcomment
|
||||||
|
507 58.026764 29.547741 ... 1491557269 task_subcomment
|
||||||
|
1202 58.026764 29.547741 ... 1601272820 task_subcomment
|
||||||
|
1225 58.026764 29.547741 ... 1431790268 task_subcomment
|
||||||
|
1226 58.026764 29.547741 ... 1431790446 task_subcomment
|
||||||
|
1276 58.026764 29.547741 ... 1624208185 task_subcomment
|
||||||
|
|
||||||
|
[10 rows x 36 columns]
|
||||||
|
Top 10 PC4 values:
|
||||||
|
PC1 PC2 ... date_created comment_type
|
||||||
|
14207 78.893820 23.026780 ... 1676690655 task_subcomment
|
||||||
|
1109 30.520772 -5.107689 ... 1430255616 task_subcomment
|
||||||
|
13611 31.636333 -3.165316 ... 1399747303 task_subcomment
|
||||||
|
1498 14.905192 -9.401637 ... 1424206043 task_subcomment
|
||||||
|
598 -15.028547 -17.737022 ... 1384635692 task_subcomment
|
||||||
|
15423 50.507754 10.487969 ... 1355446597 task_subcomment
|
||||||
|
13536 33.130617 -3.199009 ... 1354149956 task_subcomment
|
||||||
|
20484 16.484139 -8.759647 ... 1361832639 task_subcomment
|
||||||
|
13234 16.481595 -8.764067 ... 1438011707 task_subcomment
|
||||||
|
15790 16.797678 -8.172293 ... 1436224473 task_subcomment
|
||||||
|
|
||||||
|
[10 rows x 36 columns]
|
||||||
|
|
||||||
|
Bottom 10 PC4 values:
|
||||||
|
PC1 PC2 ... date_created comment_type
|
||||||
|
12410 -7.877613 -20.755159 ... 1422554389 task_subcomment
|
||||||
|
22835 -7.877613 -20.755159 ... 1462375135 task_subcomment
|
||||||
|
19501 -7.867440 -20.737476 ... 1317586881 task_subcomment
|
||||||
|
24870 -7.864049 -20.731582 ... 1327978205 task_subcomment
|
||||||
|
1103 -5.695317 -18.884254 ... 1428954897 task_subcomment
|
||||||
|
1335 -5.631462 -15.155290 ... 1328300138 task_subcomment
|
||||||
|
14631 -5.958980 -18.052657 ... 1412324629 task_subcomment
|
||||||
|
14666 1.866478 -10.987911 ... 1434020520 task_subcomment
|
||||||
|
21167 8.687041 -12.537389 ... 1372042733 task_subcomment
|
||||||
|
406 10.504327 -8.538062 ... 1374557457 task_subcomment
|
||||||
|
|
||||||
|
[10 rows x 36 columns]
|
||||||
|
job finished, cleaning up
|
||||||
|
job pau at: Tue Dec 16 15:27:19 CST 2025
|
||||||
BIN
p2/quest/121625_total_pca.pkl
Normal file
BIN
p2/quest/121625_total_pca.pkl
Normal file
Binary file not shown.
130647
p2/quest/121625_total_pca_df.csv
Normal file
130647
p2/quest/121625_total_pca_df.csv
Normal file
File diff suppressed because one or more lines are too long
@ -85,7 +85,7 @@ def format_df_data(df):
|
|||||||
return x
|
return x
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
biber_vec_df = pd.read_csv("/home/nws8519/git/mw-lifecycle-analysis/analysis_data/120725_unified.csv", low_memory=False)
|
biber_vec_df = pd.read_csv("/home/nws8519/git/mw-lifecycle-analysis/analysis_data/forPCA_121625_unified.csv", low_memory=False)
|
||||||
#biber_vec_df = biber_vec_df[biber_vec_df['comment_type'] != 'task_description']
|
#biber_vec_df = biber_vec_df[biber_vec_df['comment_type'] != 'task_description']
|
||||||
biber_vec_df = biber_vec_df[biber_vec_df['AuthorPHID'] != "PHID-USER-idceizaw6elwiwm5xshb"]
|
biber_vec_df = biber_vec_df[biber_vec_df['AuthorPHID'] != "PHID-USER-idceizaw6elwiwm5xshb"]
|
||||||
biber_vec_df = biber_vec_df[biber_vec_df['comment_text'] != 'nan']
|
biber_vec_df = biber_vec_df[biber_vec_df['comment_text'] != 'nan']
|
||||||
@ -103,7 +103,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
pca = PCA(n_components=argmax_components)
|
pca = PCA(n_components=argmax_components)
|
||||||
biber_vecs_pca = pca.fit_transform(biber_vecs)
|
biber_vecs_pca = pca.fit_transform(biber_vecs)
|
||||||
with open('121525_total_pca.pkl', 'wb') as f:
|
with open('121625_total_pca.pkl', 'wb') as f:
|
||||||
pickle.dump(pca, f)
|
pickle.dump(pca, f)
|
||||||
selected_axis = "AuthorWMFAffil"
|
selected_axis = "AuthorWMFAffil"
|
||||||
|
|
||||||
@ -136,7 +136,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
|
|
||||||
plot_df = pd.DataFrame(pc_dict)
|
plot_df = pd.DataFrame(pc_dict)
|
||||||
plot_df.to_csv("121525_total_pca_df.csv", index=False)
|
plot_df.to_csv("121625_total_pca_df.csv", index=False)
|
||||||
|
|
||||||
print("Top 10 PC1 values:")
|
print("Top 10 PC1 values:")
|
||||||
print(plot_df.nlargest(10, "PC1"))
|
print(plot_df.nlargest(10, "PC1"))
|
||||||
|
|||||||
@ -8,7 +8,7 @@
|
|||||||
#SBATCH --mem=64G
|
#SBATCH --mem=64G
|
||||||
#SBATCH --cpus-per-task=4
|
#SBATCH --cpus-per-task=4
|
||||||
#SBATCH --job-name=neurobiber-pca
|
#SBATCH --job-name=neurobiber-pca
|
||||||
#SBATCH --output=121525_total_neurobiber-pca.log
|
#SBATCH --output=121625_total_neurobiber-pca.log
|
||||||
#SBATCH --mail-type=BEGIN,END,FAIL
|
#SBATCH --mail-type=BEGIN,END,FAIL
|
||||||
#SBATCH --mail-user=gaughan@u.northwestern.edu
|
#SBATCH --mail-user=gaughan@u.northwestern.edu
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user