| 
						
					 | 
					
						
						
						
						
							
						
						
							2d21ff1137
							
						
					 | 
					
						
						
							
							Merge branch 'master' of code:cdsc_reddit into excise_reindex
						
						
						
						
						
					 | 
					
						2021-08-03 15:02:08 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							cf86c7492c
							
						
					 | 
					
						
						
							
							update clustering scripts
						
						
						
						
						
					 | 
					
						2021-08-03 14:55:02 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							c6122bb429
							
						
					 | 
					
						
						
							
							Merge branch 'master' of code:cdsc_reddit
						
						
						
						
						
					 | 
					
						2021-07-28 15:32:21 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							596e1ff339
							
						
					 | 
					
						
						
							
							no longer do we need to get daily dumps
						
						
						
						
						
					 | 
					
						2021-07-28 15:32:04 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							87ffaa6858
							
						
					 | 
					
						
						
							
							script for picking the best clustering given constraints
						
						
						
						
						
					 | 
					
						2021-05-14 19:10:36 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							7b14db67de
							
						
					 | 
					
						
						
							
							Merge branch 'excise_reindex' of code:cdsc_reddit into excise_reindex
						
						
						
						
						
					 | 
					
						2021-05-13 22:28:31 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							0b95bea30e
							
						
					 | 
					
						
						
							
							support isolates in visualization
						
						
						
						
						
					 | 
					
						2021-05-13 22:26:58 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							582cf263ea
							
						
					 | 
					
						
						
							
							bug fix in affinity clustering
						
						
						
						
						
					 | 
					
						2021-05-13 22:26:15 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							8a2248fae1
							
						
					 | 
					
						
						
							
							Merge remote-tracking branch 'origin/excise_reindex' into temp
						
						
						
						
						
					 | 
					
						2021-05-10 18:32:03 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							47ba04aa97
							
						
					 | 
					
						
						
							
							add script for pulling cluster timeseries
						
						
						
						
						
					 | 
					
						2021-05-10 18:24:22 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							4cb7eeec80
							
						
					 | 
					
						
						
							
							Refactor to make a decent api.
						
						
						
						
						
					 | 
					
						2021-05-10 13:46:49 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							f05cb962e0
							
						
					 | 
					
						
						
							
							refactor clustring in object oriented style
						
						
						
						
						
					 | 
					
						2021-05-07 22:33:26 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							8d1df5b26e
							
						
					 | 
					
						
						
							
							refactor clustering.py into method-specific files.
						
						
						
						
						
					 | 
					
						2021-05-03 11:28:48 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							e1c9d9af6f
							
						
					 | 
					
						
						
							
							Remove 'exclude phrases' parameter.
						
						
						
						
						
					 | 
					
						2021-05-03 10:37:09 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							7df8436067
							
						
					 | 
					
						
						
							
							Use Latent semantic indexing and hdbscan
						
						
						
						
						
					 | 
					
						2021-05-02 23:39:55 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							36b24ee933
							
						
					 | 
					
						
						
							
							reindex tfidf in memory instead of using spark
						
						
						
						
						
					 | 
					
						2021-04-30 12:48:19 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							6a3bfa26ee
							
						
					 | 
					
						
						
							
							bugfix
						
						
						
						
						
					 | 
					
						2021-04-26 22:31:05 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							3a758f1fc8
							
						
					 | 
					
						
						
							
							Merge branch 'charliepatch' of code:cdsc_reddit into charliepatch
						
						
						
						
						
					 | 
					
						2021-04-26 13:58:25 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							806cfc948f
							
						
					 | 
					
						
						
							
							support passing in list of tfidf vectors.
						
						
						
						
						
						
						
						Also lowercases included subreddits. 
						
					 | 
					
						2021-04-26 13:20:43 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							0fe120e4ab
							
						
					 | 
					
						
						
							
							support passing in list of tfidf vectors.
						
						
						
						
						
						
						
						Also lowercases included subreddits. 
						
					 | 
					
						2021-04-26 11:44:56 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							f20365c07e
							
						
					 | 
					
						
						
							
							Merge branch 'master' of code:cdsc_reddit
						
						
						
						
						
					 | 
					
						2021-04-22 10:46:26 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							34e0a0a30d
							
						
					 | 
					
						
						
							
							version of weekly_cosine_similarities.py from klone
						
						
						
						
						
					 | 
					
						2021-04-22 10:38:10 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							003a48aea5
							
						
					 | 
					
						
						
							
							bugfix in weekly similarities
						
						
						
						
						
					 | 
					
						2021-04-22 10:37:04 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							37dd0ef55f
							
						
					 | 
					
						
						
							
							bugfixes in clustering selection.
						
						
						
						
						
					 | 
					
						2021-04-21 16:56:25 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							ac06a8757a
							
						
					 | 
					
						
						
							
							calculate some user-level attributes to detect bots
						
						
						
						
						
					 | 
					
						2021-04-20 11:34:36 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							01a4c35358
							
						
					 | 
					
						
						
							
							grid sweep selection for clustering hyperparameters
						
						
						
						
						
					 | 
					
						2021-04-20 11:33:54 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							628a70734b
							
						
					 | 
					
						
						
							
							Merge branch 'master' of code:cdsc_reddit
						
						
						
						
						
					 | 
					
						2021-04-05 23:21:35 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							f0176d9f0d
							
						
					 | 
					
						
						
							
							Changes for cosine similarities on klone.
						
						
						
						
						
					 | 
					
						2021-04-05 23:21:06 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							a013f6718b
							
						
					 | 
					
						
						
							
							export timeseries functions
						
						
						
						
						
					 | 
					
						2021-03-24 17:18:30 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							36cb0a5546
							
						
					 | 
					
						
						
							
							add code for pulling activity time series from parquet.
						
						
						
						
						
					 | 
					
						2021-03-24 16:08:57 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							06430903f0
							
						
					 | 
					
						
						
							
							add included_subreddits parameter to cosine similarities.
						
						
						
						
						
					 | 
					
						2021-02-22 18:38:34 -08:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							4dc949de5f
							
						
					 | 
					
						
						
							
							Changes from hyak.
						
						
						
						
						
					 | 
					
						2021-02-22 16:03:48 -08:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							140d1bdd17
							
						
					 | 
					
						
						
							
							fix bug in viz.
						
						
						
						
						
					 | 
					
						2021-01-27 20:26:15 -08:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							554660275f
							
						
					 | 
					
						
						
							
							add visualization for 10000 subreddits based on author-tf similarities.
						
						
						
						
						
					 | 
					
						2021-01-27 20:22:24 -08:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							b4dd9acbd8
							
						
					 | 
					
						
						
							
							Merge branch 'master' of code:cdsc_reddit
						
						
						
						
						
					 | 
					
						2021-01-27 20:09:23 -08:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
						
						
							
						
						
							dbe4c87f8b
							
						
					 | 
					
						
						
							
							add cluster selection to visualization
						
						
						
						
						
					 | 
					
						2021-01-27 20:08:07 -08:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							3155600514
							
						
					 | 
					
						
						
							
							remove nsfw subs from topN
						
						
						
						
						
					 | 
					
						2020-12-28 21:11:44 -08:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							4e20dce188
							
						
					 | 
					
						
						
							
							Updating to support wang-style user overlaps.
						
						
						
						
						
					 | 
					
						2020-12-24 22:38:04 -08:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							56269deee3
							
						
					 | 
					
						
						
							
							Some improvements to run affinity clustering on larger dataset and
						
						
						
						
						
						
						
						compute density. 
						
					 | 
					
						2020-12-12 20:42:47 -08:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							e6294b5b90
							
						
					 | 
					
						
						
							
							Refactor and reorganze.
						
						
						
						
						
					 | 
					
						2020-12-08 17:32:20 -08:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							a60747292e
							
						
					 | 
					
						
						
							
							Add code for running tf-idf at the weekly level.
						
						
						
						
						
					 | 
					
						2020-12-01 22:54:48 -08:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
						
						
							
						
						
							db5879d6c9
							
						
					 | 
					
						
						
							
							refactor visualization code.
						
						
						
						
						
					 | 
					
						2020-11-17 16:46:49 -08:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
						
						
							
						
						
							13eb95b3b0
							
						
					 | 
					
						
						
							
							Merge remote-tracking branch 'refs/remotes/origin/master' into master
						
						
						
						
						
					 | 
					
						2020-11-17 16:33:14 -08:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					| 
						
					 | 
					
						
						
						
						
							
						
						
							2cc897543a
							
						
					 | 
					
						
						
							
							git-annex in nathante@nate-x1:~/cdsc_reddit
						
						
						
						
						
					 | 
					
						2020-11-17 16:33:13 -08:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							1bf206d219
							
						
					 | 
					
						
						
							
							git-annex in nathante@mox2.hyak.local:/gscratch/comdata/users/nathante/cdsc-reddit
						
						
						
						
						
					 | 
					
						2020-11-17 16:31:48 -08:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							f8ff8b2d0f
							
						
					 | 
					
						
						
							
							Update code for clustering + tsne.
						
						
						
						
						
					 | 
					
						2020-11-17 15:59:20 -08:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							82d184d9c6
							
						
					 | 
					
						
						
							
							Update code for building simlarity matrices.
						
						
						
						
						
					 | 
					
						2020-11-17 12:52:48 -08:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							e794214653
							
						
					 | 
					
						
						
							
							bugfix in completing tfidf similarity matrices.
						
						
						
						
						
					 | 
					
						2020-11-12 11:47:53 -08:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							220a540beb
							
						
					 | 
					
						
						
							
							increase learning rate.
						
						
						
						
						
					 | 
					
						2020-11-11 16:58:39 -08:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Nate E TeBlunthuis
							
						 
					 | 
					
						
						
						
						
							
						
						
							cd43a94865
							
						
					 | 
					
						
						
							
							increase iterations and perplectity and early_exaggeration
						
						
						
						
						
					 | 
					
						2020-11-11 16:55:39 -08:00 | 
					
					
						
						
							
							
							
						
					 |