13
0

add visualization for 10000 subreddits based on author-tf similarities.

This commit is contained in:
Nate E TeBlunthuis 2021-01-27 20:22:24 -08:00
parent b4dd9acbd8
commit 554660275f
4 changed files with 88 additions and 7 deletions

View File

@ -0,0 +1,11 @@
all: subreddit_author_tf_similarities_10000.html #comment_authors_10000.html
# wang_tsne_10000.html
# wang_tsne_10000.html:/gscratch/comdata/output/reddit_tsne/wang_similarity_10000.feather /gscratch/comdata/output/reddit_clustering/wang_similarity_10000.feather tsne_vis.py
# python3 tsne_vis.py --tsne_data=/gscratch/comdata/output/reddit_tsne/wang_similarity_10000.feather --clusters=/gscratch/comdata/output/reddit_clustering/wang_similarity_10000.feather --output=wang_tsne_10000.html
# comment_authors_10000.html:/gscratch/comdata/output/reddit_tsne/comment_authors_10000.feather /gscratch/comdata/output/reddit_clustering/comment_authors_10000.feather tsne_vis.py
# python3 tsne_vis.py --tsne_data=/gscratch/comdata/output/reddit_similarity/comment_authors_10000.feather --clusters=/gscratch/comdata/output/reddit_clustering/comment_authors_10000.feather --output=comment_authors_10000.html
subreddit_author_tf_similarities_10000.html:/gscratch/comdata/output/reddit_tsne/subreddit_author_tf_similarities_10000.feather /gscratch/comdata/output/reddit_clustering/subreddit_author_tf_similarities_10000.feather tsne_vis.py
start_spark_and_run.sh 1 tsne_vis.py --tsne_data=/gscratch/comdata/output/reddit_tsne/subreddit_author_tf_similarities_10000.feather --clusters=/gscratch/comdata/output/reddit_clustering/subreddit_author_tf_similarities_10000.feather --output=subreddit_author_tf_similarities_10000.html

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -155,13 +155,13 @@ def build_visualization(tsne_data, clusters, output):
if __name__ == "__main__": if __name__ == "__main__":
fire.Fire(build_visualization) fire.Fire(build_visualization)
commenter_data = pd.read_feather("tsne_author_fit.feather") # commenter_data = pd.read_feather("tsne_author_fit.feather")
clusters = pd.read_feather('author_3000_clusters.feather') # clusters = pd.read_feather('author_3000_clusters.feather')
commenter_data = assign_cluster_colors(commenter_data,clusters,10,8) # commenter_data = assign_cluster_colors(commenter_data,clusters,10,8)
commenter_zoom_plot = zoom_plot(commenter_data) # commenter_zoom_plot = zoom_plot(commenter_data)
commenter_viewport_plot = viewport_plot(commenter_data) # commenter_viewport_plot = viewport_plot(commenter_data)
commenter_zoom_plot.save("subreddit_commenters_tsne_3000.html") # commenter_zoom_plot.save("subreddit_commenters_tsne_3000.html")
commenter_viewport_plot.save("subreddit_commenters_tsne_3000_viewport.html") # commenter_viewport_plot.save("subreddit_commenters_tsne_3000_viewport.html")
# chart = chart.properties(width=10000,height=10000) # chart = chart.properties(width=10000,height=10000)
# chart.save("test_tsne_whole.svg") # chart.save("test_tsne_whole.svg")