We're creating a fresh archive because the history for our old chapter includes API keys, data files, and other material we can't share.
2494 lines
84 KiB
Plaintext
2494 lines
84 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Import data and get things setup"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 52,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import random\n",
|
||
"random.seed(9001)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 53,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Populating the interactive namespace from numpy and matplotlib\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/usr/lib/python3/dist-packages/IPython/core/magics/pylab.py:161: UserWarning: pylab import has clobbered these variables: ['sin', 'pi', 'median', 'random', 'percentile', 'save', 'deprecated', 'Rectangle', 'load', 'mean', 'plot', 'cos']\n",
|
||
"`%matplotlib` prevents importing * from pylab and numpy\n",
|
||
" \"\\n`%matplotlib` prevents importing * from pylab and numpy\"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# turn on the magic so we have inline figures\n",
|
||
"%pylab inline\n",
|
||
"import matplotlib\n",
|
||
"matplotlib.style.use('ggplot')\n",
|
||
"from IPython.display import display"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 54,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# import code to write r modules and create our variable we'll write to\n",
|
||
"import rpy2.robjects as robjects\n",
|
||
"from rpy2.robjects import pandas2ri\n",
|
||
"pandas2ri.activate()\n",
|
||
"\n",
|
||
"r = {}\n",
|
||
"def remember(name, x):\n",
|
||
" r[name] = x\n",
|
||
" display(x)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 55,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# load in modules we'll need for analysis\n",
|
||
"import subprocess\n",
|
||
"import csv\n",
|
||
"from igraph import *\n",
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"import re"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 56,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"# grab the largest connected compontent with a little function\n",
|
||
"def get_largest_component(g):\n",
|
||
" g_components = g.components(mode=\"WEAK\")\n",
|
||
" max_size = max(g_components.sizes())\n",
|
||
" for g_tmp in g_components.subgraphs():\n",
|
||
" if g_tmp.vcount() == max_size:\n",
|
||
" return(g_tmp)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 57,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# look the full edgelist into igraph\n",
|
||
"def edge_list_iter(df):\n",
|
||
" for i, row in df.iterrows():\n",
|
||
" yield (row['from'], row['to'])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 58,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# list top 5 journals for each of the clusters\n",
|
||
"def top_journals_for_clusters(clu):\n",
|
||
" articles_tmp = pd.merge(clu, articles[['eid', 'source_title']])\n",
|
||
" \n",
|
||
" output = pd.DataFrame()\n",
|
||
" for cid in articles_tmp['cluster'].unique():\n",
|
||
" journal_counts = articles_tmp['source_title'][articles_tmp['cluster'] == cid].value_counts().head(5)\n",
|
||
" tmp = pd.DataFrame({'cluster' : cid, 'count' : journal_counts }) \n",
|
||
" output = output.append(tmp)\n",
|
||
"\n",
|
||
" output = output.reset_index()\n",
|
||
" output = output.rename(columns = {'index' : \"journal\"})\n",
|
||
" return(output)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 59,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def infomap_edgelist(g, edgelist_filename, directed=True):\n",
|
||
" nodes_tmp = pd.DataFrame([ {'node_infomap' : v.index, \n",
|
||
" 'eid' : v['name']} for v in g.vs ])\n",
|
||
"\n",
|
||
" # write out the edgelist to an external file so we can call infomap on it\n",
|
||
" with open(edgelist_filename + \".txt\", 'w') as f:\n",
|
||
" for e in g.es:\n",
|
||
" if e.source != e.target:\n",
|
||
" if 'weight' in e.attributes():\n",
|
||
" print(\"{}\\t{}\\t{}\".format(e.source, e.target, e['weight']), file=f)\n",
|
||
" else:\n",
|
||
" print(\"{}\\t{}\".format(e.source, e.target), file=f)\n",
|
||
"\n",
|
||
" \n",
|
||
" # run the external program to generate the infomap clustering\n",
|
||
" infomap_cmdline = [\"infomap/Infomap\", edgelist_filename + \".txt\", \"output_dir -z --map --clu --tree\"]\n",
|
||
" if directed:\n",
|
||
" infomap_cmdline.append(\"-d\")\n",
|
||
" subprocess.call(infomap_cmdline)\n",
|
||
"\n",
|
||
" # load up the clu data\n",
|
||
" clu = pd.read_csv(\"output_dir/\" + edgelist_filename + \".clu\",\n",
|
||
" header=None, comment=\"#\", delim_whitespace=True)\n",
|
||
" clu.columns = ['node_infomap', 'cluster', 'flow']\n",
|
||
" \n",
|
||
" return pd.merge(clu, nodes_tmp, on=\"node_infomap\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 60,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def write_graphml(g, clu, graphml_filename):\n",
|
||
" clu = clu[['node_infomap', 'cluster']].sort_values('node_infomap')\n",
|
||
" g.vs[\"cluster\"] = clu[\"cluster\"].tolist()\n",
|
||
" g.write_graphml(graphml_filename)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 61,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# load article data\n",
|
||
"articles = pd.read_csv(\"../../processed_data/abstracts.tsv\", delimiter=\"\\t\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# network for just the central \"social media\" set"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 62,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# this contains the list of all INCOMING citations to for paper in the original set\n",
|
||
"raw_edgelist = pd.read_csv(\"../../processed_data/social_media_edgelist.txt\", delimiter=\"\\t\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 63,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"g_sm_all = Graph.TupleList([i for i in edge_list_iter(raw_edgelist)], directed=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 64,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"g_sm = get_largest_component(g_sm_all)\n",
|
||
"g_sm = g_sm.simplify()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 65,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"g_sm_clu = infomap_edgelist(g_sm, \"sm_edgelist_infomap\", directed=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 66,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"2 1817\n",
|
||
"1 1748\n",
|
||
"3 1088\n",
|
||
"4 653\n",
|
||
"6 355\n",
|
||
"10 114\n",
|
||
"5 104\n",
|
||
"9 90\n",
|
||
"8 59\n",
|
||
"7 44\n",
|
||
"12 27\n",
|
||
"11 19\n",
|
||
"13 10\n",
|
||
"14 5\n",
|
||
"15 3\n",
|
||
"16 2\n",
|
||
"18 1\n",
|
||
"17 1\n",
|
||
"Name: cluster, dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 66,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"g_sm_clu['cluster'].value_counts()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 67,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style>\n",
|
||
" .dataframe thead tr:only-child th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: left;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>journal</th>\n",
|
||
" <th>cluster</th>\n",
|
||
" <th>count</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>40</th>\n",
|
||
" <td>Lecture Notes in Computer Science (including s...</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>41</th>\n",
|
||
" <td>WSDM 2013 - Proceedings of the 6th ACM Interna...</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>42</th>\n",
|
||
" <td>Conference on Human Factors in Computing Syste...</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>43</th>\n",
|
||
" <td>WWW 2013 Companion - Proceedings of the 22nd I...</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>44</th>\n",
|
||
" <td>PLoS ONE</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" journal cluster count\n",
|
||
"40 Lecture Notes in Computer Science (including s... 9 4\n",
|
||
"41 WSDM 2013 - Proceedings of the 6th ACM Interna... 9 4\n",
|
||
"42 Conference on Human Factors in Computing Syste... 9 2\n",
|
||
"43 WWW 2013 Companion - Proceedings of the 22nd I... 9 2\n",
|
||
"44 PLoS ONE 9 2"
|
||
]
|
||
},
|
||
"execution_count": 67,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"tmp = top_journals_for_clusters(g_sm_clu)\n",
|
||
"tmp[tmp.cluster == 9]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 68,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"write_graphml(g_sm, g_sm_clu, \"g_sm.graphml\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# larger network that contains the incoming cites to citing articles"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 69,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# this contains the list of all INCOMING citations to everything in the original set\n",
|
||
"# plus every INCOMING citation to every paper that cites one of those papers\n",
|
||
"raw_edgelist_files = [\"../../processed_data/citation_edgelist.txt\",\n",
|
||
" \"../../processed_data/social_media_edgelist.txt\"]\n",
|
||
"combo_raw_edgelist = pd.concat([pd.read_csv(x, delimiter=\"\\t\") for x in raw_edgelist_files])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 70,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"g_full_all = Graph.TupleList([i for i in edge_list_iter(combo_raw_edgelist)], directed=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 71,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"g_full = get_largest_component(g_full_all)\n",
|
||
"g_full = g_full.simplify()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 72,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"g_full_clu = infomap_edgelist(g_full, \"citation_edglist_infomap\", directed=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 73,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"1 9243\n",
|
||
"2 8225\n",
|
||
"3 6826\n",
|
||
"4 3227\n",
|
||
"6 2835\n",
|
||
"5 2704\n",
|
||
"7 1911\n",
|
||
"9 810\n",
|
||
"8 803\n",
|
||
"10 589\n",
|
||
"11 520\n",
|
||
"12 491\n",
|
||
"13 336\n",
|
||
"14 219\n",
|
||
"15 175\n",
|
||
"17 162\n",
|
||
"16 153\n",
|
||
"22 139\n",
|
||
"18 135\n",
|
||
"19 118\n",
|
||
"25 117\n",
|
||
"23 106\n",
|
||
"21 93\n",
|
||
"24 88\n",
|
||
"30 84\n",
|
||
"28 79\n",
|
||
"27 78\n",
|
||
"32 76\n",
|
||
"26 73\n",
|
||
"20 71\n",
|
||
" ... \n",
|
||
"54 26\n",
|
||
"56 25\n",
|
||
"52 23\n",
|
||
"49 23\n",
|
||
"55 22\n",
|
||
"58 19\n",
|
||
"62 18\n",
|
||
"61 18\n",
|
||
"63 18\n",
|
||
"60 17\n",
|
||
"66 15\n",
|
||
"59 15\n",
|
||
"57 15\n",
|
||
"65 14\n",
|
||
"68 13\n",
|
||
"53 7\n",
|
||
"64 6\n",
|
||
"73 6\n",
|
||
"71 4\n",
|
||
"70 4\n",
|
||
"74 3\n",
|
||
"67 3\n",
|
||
"72 3\n",
|
||
"69 3\n",
|
||
"75 2\n",
|
||
"78 1\n",
|
||
"79 1\n",
|
||
"77 1\n",
|
||
"80 1\n",
|
||
"76 1\n",
|
||
"Name: cluster, Length: 80, dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 73,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"g_full_clu['cluster'].value_counts()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 74,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style>\n",
|
||
" .dataframe thead tr:only-child th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: left;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>journal</th>\n",
|
||
" <th>cluster</th>\n",
|
||
" <th>count</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>Public Relations Review</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>119</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>Lecture Notes in Computer Science (including s...</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>81</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>Computers in Human Behavior</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>71</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>Proceedings of the Annual Hawaii International...</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>49</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>Government Information Quarterly</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>40</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>Journal of Medical Internet Research</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>149</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>PLoS ONE</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>43</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>Studies in Health Technology and Informatics</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>41</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>Lecture Notes in Computer Science (including s...</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>32</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>Annals of Emergency Medicine</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>17</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>10</th>\n",
|
||
" <td>Lecture Notes in Computer Science (including s...</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>180</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>11</th>\n",
|
||
" <td>ACM International Conference Proceeding Series</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>51</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>12</th>\n",
|
||
" <td>International Conference on Information and Kn...</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>38</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>13</th>\n",
|
||
" <td>CEUR Workshop Proceedings</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>37</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>14</th>\n",
|
||
" <td>PLoS ONE</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>36</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>15</th>\n",
|
||
" <td>Information Communication and Society</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>70</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>16</th>\n",
|
||
" <td>New Media and Society</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>34</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>17</th>\n",
|
||
" <td>First Monday</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>24</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>18</th>\n",
|
||
" <td>Lecture Notes in Computer Science (including s...</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>23</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>19</th>\n",
|
||
" <td>Computers in Human Behavior</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>21</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>20</th>\n",
|
||
" <td>Computers in Human Behavior</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>42</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>21</th>\n",
|
||
" <td>Cyberpsychology, Behavior, and Social Networking</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>42</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>22</th>\n",
|
||
" <td>Personality and Individual Differences</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>11</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>23</th>\n",
|
||
" <td>Journal of Medical Internet Research</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>11</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>24</th>\n",
|
||
" <td>Journal of Adolescent Health</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>11</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25</th>\n",
|
||
" <td>Computers in Human Behavior</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>38</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>26</th>\n",
|
||
" <td>Lecture Notes in Computer Science (including s...</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>24</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>27</th>\n",
|
||
" <td>Computers and Education</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>16</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>28</th>\n",
|
||
" <td>Conference on Human Factors in Computing Syste...</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>11</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>29</th>\n",
|
||
" <td>Journal of Marketing Education</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>11</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>286</th>\n",
|
||
" <td>Medical Journal of Australia</td>\n",
|
||
" <td>63</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>287</th>\n",
|
||
" <td>Nicotine and Tobacco Research</td>\n",
|
||
" <td>63</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>288</th>\n",
|
||
" <td>35th International Conference on Information S...</td>\n",
|
||
" <td>64</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>289</th>\n",
|
||
" <td>First Monday</td>\n",
|
||
" <td>64</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>290</th>\n",
|
||
" <td>Cyberpsychology, Behavior, and Social Networking</td>\n",
|
||
" <td>64</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>291</th>\n",
|
||
" <td>HT'12 - Proceedings of 23rd ACM Conference on ...</td>\n",
|
||
" <td>65</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>292</th>\n",
|
||
" <td>IEEE/ACM Transactions on Networking</td>\n",
|
||
" <td>65</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>293</th>\n",
|
||
" <td>Journal of Healthcare Engineering</td>\n",
|
||
" <td>65</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>294</th>\n",
|
||
" <td>International Journal of Information Management</td>\n",
|
||
" <td>66</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>295</th>\n",
|
||
" <td>Journal of Theoretical and Applied Electronic ...</td>\n",
|
||
" <td>66</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>296</th>\n",
|
||
" <td>Journal of Experimental and Theoretical Artifi...</td>\n",
|
||
" <td>66</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>297</th>\n",
|
||
" <td>McKinsey Quarterly</td>\n",
|
||
" <td>66</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>298</th>\n",
|
||
" <td>Lecture Notes in Computer Science (including s...</td>\n",
|
||
" <td>66</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>299</th>\n",
|
||
" <td>Science (New York, N.Y.)</td>\n",
|
||
" <td>67</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>300</th>\n",
|
||
" <td>International Conference on Information and Kn...</td>\n",
|
||
" <td>68</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>301</th>\n",
|
||
" <td>Lecture Notes in Computer Science (including s...</td>\n",
|
||
" <td>68</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>302</th>\n",
|
||
" <td>16th Americas Conference on Information System...</td>\n",
|
||
" <td>68</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>303</th>\n",
|
||
" <td>Procedia Engineering</td>\n",
|
||
" <td>68</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>304</th>\n",
|
||
" <td>International Journal of Virtual and Personal ...</td>\n",
|
||
" <td>68</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>305</th>\n",
|
||
" <td>Scientometrics</td>\n",
|
||
" <td>69</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>306</th>\n",
|
||
" <td>Conference on Human Factors in Computing Syste...</td>\n",
|
||
" <td>70</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>307</th>\n",
|
||
" <td>NyS</td>\n",
|
||
" <td>71</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>308</th>\n",
|
||
" <td>Aslib Proceedings: New Information Perspectives</td>\n",
|
||
" <td>71</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>309</th>\n",
|
||
" <td>WWW 2013 Companion - Proceedings of the 22nd I...</td>\n",
|
||
" <td>72</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>310</th>\n",
|
||
" <td>Cyberpsychology, Behavior, and Social Networking</td>\n",
|
||
" <td>72</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>311</th>\n",
|
||
" <td>PACIS 2011 - 15th Pacific Asia Conference on I...</td>\n",
|
||
" <td>73</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>312</th>\n",
|
||
" <td>Proceedings of the International Conference on...</td>\n",
|
||
" <td>73</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>313</th>\n",
|
||
" <td>Online (Wilton, Connecticut)</td>\n",
|
||
" <td>74</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>314</th>\n",
|
||
" <td>Catalan Journal of Communication and Cultural ...</td>\n",
|
||
" <td>75</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>315</th>\n",
|
||
" <td>Proceedings - Pacific Asia Conference on Infor...</td>\n",
|
||
" <td>75</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>316 rows × 3 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" journal cluster count\n",
|
||
"0 Public Relations Review 1 119\n",
|
||
"1 Lecture Notes in Computer Science (including s... 1 81\n",
|
||
"2 Computers in Human Behavior 1 71\n",
|
||
"3 Proceedings of the Annual Hawaii International... 1 49\n",
|
||
"4 Government Information Quarterly 1 40\n",
|
||
"5 Journal of Medical Internet Research 2 149\n",
|
||
"6 PLoS ONE 2 43\n",
|
||
"7 Studies in Health Technology and Informatics 2 41\n",
|
||
"8 Lecture Notes in Computer Science (including s... 2 32\n",
|
||
"9 Annals of Emergency Medicine 2 17\n",
|
||
"10 Lecture Notes in Computer Science (including s... 3 180\n",
|
||
"11 ACM International Conference Proceeding Series 3 51\n",
|
||
"12 International Conference on Information and Kn... 3 38\n",
|
||
"13 CEUR Workshop Proceedings 3 37\n",
|
||
"14 PLoS ONE 3 36\n",
|
||
"15 Information Communication and Society 4 70\n",
|
||
"16 New Media and Society 4 34\n",
|
||
"17 First Monday 4 24\n",
|
||
"18 Lecture Notes in Computer Science (including s... 4 23\n",
|
||
"19 Computers in Human Behavior 4 21\n",
|
||
"20 Computers in Human Behavior 5 42\n",
|
||
"21 Cyberpsychology, Behavior, and Social Networking 5 42\n",
|
||
"22 Personality and Individual Differences 5 11\n",
|
||
"23 Journal of Medical Internet Research 5 11\n",
|
||
"24 Journal of Adolescent Health 5 11\n",
|
||
"25 Computers in Human Behavior 6 38\n",
|
||
"26 Lecture Notes in Computer Science (including s... 6 24\n",
|
||
"27 Computers and Education 6 16\n",
|
||
"28 Conference on Human Factors in Computing Syste... 6 11\n",
|
||
"29 Journal of Marketing Education 6 11\n",
|
||
".. ... ... ...\n",
|
||
"286 Medical Journal of Australia 63 1\n",
|
||
"287 Nicotine and Tobacco Research 63 1\n",
|
||
"288 35th International Conference on Information S... 64 1\n",
|
||
"289 First Monday 64 1\n",
|
||
"290 Cyberpsychology, Behavior, and Social Networking 64 1\n",
|
||
"291 HT'12 - Proceedings of 23rd ACM Conference on ... 65 1\n",
|
||
"292 IEEE/ACM Transactions on Networking 65 1\n",
|
||
"293 Journal of Healthcare Engineering 65 1\n",
|
||
"294 International Journal of Information Management 66 2\n",
|
||
"295 Journal of Theoretical and Applied Electronic ... 66 1\n",
|
||
"296 Journal of Experimental and Theoretical Artifi... 66 1\n",
|
||
"297 McKinsey Quarterly 66 1\n",
|
||
"298 Lecture Notes in Computer Science (including s... 66 1\n",
|
||
"299 Science (New York, N.Y.) 67 1\n",
|
||
"300 International Conference on Information and Kn... 68 1\n",
|
||
"301 Lecture Notes in Computer Science (including s... 68 1\n",
|
||
"302 16th Americas Conference on Information System... 68 1\n",
|
||
"303 Procedia Engineering 68 1\n",
|
||
"304 International Journal of Virtual and Personal ... 68 1\n",
|
||
"305 Scientometrics 69 1\n",
|
||
"306 Conference on Human Factors in Computing Syste... 70 2\n",
|
||
"307 NyS 71 2\n",
|
||
"308 Aslib Proceedings: New Information Perspectives 71 1\n",
|
||
"309 WWW 2013 Companion - Proceedings of the 22nd I... 72 1\n",
|
||
"310 Cyberpsychology, Behavior, and Social Networking 72 1\n",
|
||
"311 PACIS 2011 - 15th Pacific Asia Conference on I... 73 1\n",
|
||
"312 Proceedings of the International Conference on... 73 1\n",
|
||
"313 Online (Wilton, Connecticut) 74 1\n",
|
||
"314 Catalan Journal of Communication and Cultural ... 75 1\n",
|
||
"315 Proceedings - Pacific Asia Conference on Infor... 75 1\n",
|
||
"\n",
|
||
"[316 rows x 3 columns]"
|
||
]
|
||
},
|
||
"execution_count": 74,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"top_journals_for_clusters(g_full_clu)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 75,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"write_graphml(g_full, g_full_clu, \"g_full.graphml\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# create the meta-network of connections between clusters"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 76,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style>\n",
|
||
" .dataframe thead tr:only-child th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: left;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>to_cluster</th>\n",
|
||
" <th>from_cluster</th>\n",
|
||
" <th>value</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>396</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>278</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>233</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>171</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>6</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>85</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>7</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>57</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>8</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>86</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>9</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>25</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>29</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>10</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>12</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>11</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>12</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>13</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>412</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>15</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>117</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>16</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>126</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>17</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>187</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>18</th>\n",
|
||
" <td>6</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>104</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>19</th>\n",
|
||
" <td>7</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>175</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>20</th>\n",
|
||
" <td>8</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>68</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>21</th>\n",
|
||
" <td>9</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>16</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>22</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>23</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>24</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>26</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>184</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>27</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>150</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>29</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>174</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>30</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>345</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>31</th>\n",
|
||
" <td>6</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>11</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>32</th>\n",
|
||
" <td>7</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>99</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>204</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>205</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>206</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>207</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>208</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>209</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>210</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>211</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>212</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>213</th>\n",
|
||
" <td>6</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>214</th>\n",
|
||
" <td>7</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>215</th>\n",
|
||
" <td>8</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>216</th>\n",
|
||
" <td>9</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>217</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>218</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>219</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>220</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>221</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>222</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>223</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>224</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>225</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>226</th>\n",
|
||
" <td>6</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>227</th>\n",
|
||
" <td>7</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>228</th>\n",
|
||
" <td>8</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>229</th>\n",
|
||
" <td>9</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>230</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>231</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>232</th>\n",
|
||
" <td>12</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>233</th>\n",
|
||
" <td>13</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>221 rows × 3 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" to_cluster from_cluster value\n",
|
||
"1 2 1 396\n",
|
||
"2 3 1 278\n",
|
||
"3 4 1 233\n",
|
||
"4 5 1 171\n",
|
||
"5 6 1 85\n",
|
||
"6 7 1 57\n",
|
||
"7 8 1 86\n",
|
||
"8 9 1 25\n",
|
||
"9 10 1 29\n",
|
||
"10 11 1 12\n",
|
||
"11 12 1 0\n",
|
||
"12 13 1 3\n",
|
||
"13 1 2 412\n",
|
||
"15 3 2 117\n",
|
||
"16 4 2 126\n",
|
||
"17 5 2 187\n",
|
||
"18 6 2 104\n",
|
||
"19 7 2 175\n",
|
||
"20 8 2 68\n",
|
||
"21 9 2 16\n",
|
||
"22 10 2 4\n",
|
||
"23 11 2 3\n",
|
||
"24 12 2 0\n",
|
||
"25 13 2 4\n",
|
||
"26 1 3 184\n",
|
||
"27 2 3 150\n",
|
||
"29 4 3 174\n",
|
||
"30 5 3 345\n",
|
||
"31 6 3 11\n",
|
||
"32 7 3 99\n",
|
||
".. ... ... ...\n",
|
||
"204 10 16 0\n",
|
||
"205 11 16 0\n",
|
||
"206 12 16 0\n",
|
||
"207 13 16 1\n",
|
||
"208 1 17 0\n",
|
||
"209 2 17 0\n",
|
||
"210 3 17 0\n",
|
||
"211 4 17 3\n",
|
||
"212 5 17 4\n",
|
||
"213 6 17 0\n",
|
||
"214 7 17 0\n",
|
||
"215 8 17 2\n",
|
||
"216 9 17 0\n",
|
||
"217 10 17 0\n",
|
||
"218 11 17 0\n",
|
||
"219 12 17 0\n",
|
||
"220 13 17 0\n",
|
||
"221 1 18 3\n",
|
||
"222 2 18 0\n",
|
||
"223 3 18 0\n",
|
||
"224 4 18 2\n",
|
||
"225 5 18 2\n",
|
||
"226 6 18 0\n",
|
||
"227 7 18 0\n",
|
||
"228 8 18 0\n",
|
||
"229 9 18 0\n",
|
||
"230 10 18 0\n",
|
||
"231 11 18 0\n",
|
||
"232 12 18 0\n",
|
||
"233 13 18 0\n",
|
||
"\n",
|
||
"[221 rows x 3 columns]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"edgelist_tmp = pd.merge(raw_edgelist, g_sm_clu[[\"eid\", \"cluster\"]], how=\"inner\", left_on=\"to\", right_on=\"eid\")\n",
|
||
"edgelist_tmp = edgelist_tmp.rename(columns={'cluster' : 'to_cluster'})\n",
|
||
"edgelist_tmp.drop('eid', 1, inplace=True)\n",
|
||
" \n",
|
||
"edgelist_tmp = pd.merge(edgelist_tmp, g_sm_clu[[\"eid\", \"cluster\"]], how=\"inner\", left_on=\"from\", right_on=\"eid\")\n",
|
||
"edgelist_tmp = edgelist_tmp.rename(columns={\"cluster\" : 'from_cluster'})\n",
|
||
"edgelist_tmp.drop('eid', 1, inplace=True)\n",
|
||
"\n",
|
||
"edgelist_tmp = edgelist_tmp[[\"to_cluster\", \"from_cluster\"]]\n",
|
||
"edgelist_tmp = edgelist_tmp[edgelist_tmp[\"to_cluster\"] != edgelist_tmp[\"from_cluster\"]]\n",
|
||
"\n",
|
||
"cluster_edgelist = pd.crosstab(edgelist_tmp[\"to_cluster\"], edgelist_tmp[\"from_cluster\"])\n",
|
||
"cluster_edgelist[\"to_cluster\"] = cluster_edgelist.index\n",
|
||
"\n",
|
||
"cluster_edgelist = pd.melt(cluster_edgelist, id_vars=[\"to_cluster\"])\n",
|
||
"cluster_edgelist = cluster_edgelist[cluster_edgelist['to_cluster'] != cluster_edgelist['from_cluster']]\n",
|
||
"\n",
|
||
"remember(\"cluster_edgelist\", cluster_edgelist)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 77,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"top_clusters = g_sm_clu[\"cluster\"].value_counts().head(6).index\n",
|
||
"\n",
|
||
"# write the edgelist for the total number of clusters (currently 1-6)\n",
|
||
"cluster_edgelist_output = cluster_edgelist[(cluster_edgelist[\"to_cluster\"].isin(top_clusters)) &\n",
|
||
" (cluster_edgelist[\"from_cluster\"].isin(top_clusters))]\n",
|
||
"\n",
|
||
"cluster_edgelist_output = cluster_edgelist_output[cluster_edgelist_output[\"value\"] > 0]\n",
|
||
"\n",
|
||
"g_cluster = Graph.TupleList([tuple(x) for x in cluster_edgelist_output[[\"from_cluster\", \"to_cluster\"]].values], directed=True)\n",
|
||
"g_cluster.es[\"weight\"] = cluster_edgelist_output[\"value\"].tolist()\n",
|
||
"\n",
|
||
"# assign the number of total articles as an attribute for each node\n",
|
||
"g_cluster.vs[\"papers\"] = g_sm_clu[\"cluster\"].value_counts()[[x[\"name\"] for x in g_cluster.vs]].tolist()\n",
|
||
"\n",
|
||
"g_cluster.write_graphml(\"clusters.graphml\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# create network stats for tables (overall and within clusters)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 78,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def create_network_stats(g):\n",
|
||
" network_stats = pd.DataFrame({'eid' : g.vs['name'],\n",
|
||
" 'eig_cent' : g.eigenvector_centrality(),\n",
|
||
" 'indegree' : g.indegree(),\n",
|
||
" 'betweenness' : g.betweenness()})\n",
|
||
"\n",
|
||
" network_stats = pd.merge(network_stats,\n",
|
||
" articles[['eid', 'title', 'source_title']],\n",
|
||
" how=\"inner\")\n",
|
||
" return network_stats"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 79,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"network_stats = create_network_stats(g_full)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 80,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style>\n",
|
||
" .dataframe thead tr:only-child th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: left;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>betweenness</th>\n",
|
||
" <th>eid</th>\n",
|
||
" <th>eig_cent</th>\n",
|
||
" <th>indegree</th>\n",
|
||
" <th>title</th>\n",
|
||
" <th>source_title</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>2275</th>\n",
|
||
" <td>6393.560498</td>\n",
|
||
" <td>2-s2.0-71149088987</td>\n",
|
||
" <td>1.000000e+00</td>\n",
|
||
" <td>1876</td>\n",
|
||
" <td>Users of the world, unite! The challenges and ...</td>\n",
|
||
" <td>Business Horizons</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>179</th>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>2-s2.0-43449135033</td>\n",
|
||
" <td>6.899762e-15</td>\n",
|
||
" <td>645</td>\n",
|
||
" <td>Why we twitter: Understanding microblogging us...</td>\n",
|
||
" <td>Joint Ninth WebKDD and First SNA-KDD 2007 Work...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5120</th>\n",
|
||
" <td>669.625397</td>\n",
|
||
" <td>2-s2.0-79953711711</td>\n",
|
||
" <td>7.271520e-02</td>\n",
|
||
" <td>468</td>\n",
|
||
" <td>Social media? Get serious! Understanding the f...</td>\n",
|
||
" <td>Business Horizons</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1855</th>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>2-s2.0-67349268124</td>\n",
|
||
" <td>2.974873e-01</td>\n",
|
||
" <td>450</td>\n",
|
||
" <td>Social media: The new hybrid element of the pr...</td>\n",
|
||
" <td>Business Horizons</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" betweenness eid eig_cent indegree \\\n",
|
||
"2275 6393.560498 2-s2.0-71149088987 1.000000e+00 1876 \n",
|
||
"179 0.000000 2-s2.0-43449135033 6.899762e-15 645 \n",
|
||
"5120 669.625397 2-s2.0-79953711711 7.271520e-02 468 \n",
|
||
"1855 0.000000 2-s2.0-67349268124 2.974873e-01 450 \n",
|
||
"\n",
|
||
" title \\\n",
|
||
"2275 Users of the world, unite! The challenges and ... \n",
|
||
"179 Why we twitter: Understanding microblogging us... \n",
|
||
"5120 Social media? Get serious! Understanding the f... \n",
|
||
"1855 Social media: The new hybrid element of the pr... \n",
|
||
"\n",
|
||
" source_title \n",
|
||
"2275 Business Horizons \n",
|
||
"179 Joint Ninth WebKDD and First SNA-KDD 2007 Work... \n",
|
||
"5120 Business Horizons \n",
|
||
"1855 Business Horizons "
|
||
]
|
||
},
|
||
"execution_count": 80,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"network_stats.sort_values(\"indegree\", ascending=False).head(4)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 81,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style>\n",
|
||
" .dataframe thead tr:only-child th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: left;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>betweenness</th>\n",
|
||
" <th>eid</th>\n",
|
||
" <th>eig_cent</th>\n",
|
||
" <th>indegree</th>\n",
|
||
" <th>title</th>\n",
|
||
" <th>source_title</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>2275</th>\n",
|
||
" <td>6393.560498</td>\n",
|
||
" <td>2-s2.0-71149088987</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>1876</td>\n",
|
||
" <td>Users of the world, unite! The challenges and ...</td>\n",
|
||
" <td>Business Horizons</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2259</th>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>2-s2.0-70349816888</td>\n",
|
||
" <td>0.605279</td>\n",
|
||
" <td>70</td>\n",
|
||
" <td>The fairyland of Second Life: Virtual social w...</td>\n",
|
||
" <td>Business Horizons</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3612</th>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>2-s2.0-77949522596</td>\n",
|
||
" <td>0.563979</td>\n",
|
||
" <td>335</td>\n",
|
||
" <td>Networked narratives: Understanding word-of-mo...</td>\n",
|
||
" <td>Journal of Marketing</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7088</th>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>2-s2.0-79551582037</td>\n",
|
||
" <td>0.432951</td>\n",
|
||
" <td>36</td>\n",
|
||
" <td>Online Personal Branding: Processes, Challenge...</td>\n",
|
||
" <td>Journal of Interactive Marketing</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" betweenness eid eig_cent indegree \\\n",
|
||
"2275 6393.560498 2-s2.0-71149088987 1.000000 1876 \n",
|
||
"2259 0.000000 2-s2.0-70349816888 0.605279 70 \n",
|
||
"3612 0.000000 2-s2.0-77949522596 0.563979 335 \n",
|
||
"7088 0.000000 2-s2.0-79551582037 0.432951 36 \n",
|
||
"\n",
|
||
" title \\\n",
|
||
"2275 Users of the world, unite! The challenges and ... \n",
|
||
"2259 The fairyland of Second Life: Virtual social w... \n",
|
||
"3612 Networked narratives: Understanding word-of-mo... \n",
|
||
"7088 Online Personal Branding: Processes, Challenge... \n",
|
||
"\n",
|
||
" source_title \n",
|
||
"2275 Business Horizons \n",
|
||
"2259 Business Horizons \n",
|
||
"3612 Journal of Marketing \n",
|
||
"7088 Journal of Interactive Marketing "
|
||
]
|
||
},
|
||
"execution_count": 81,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"network_stats.sort_values(\"eig_cent\", ascending=False).head(4)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 82,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style>\n",
|
||
" .dataframe thead tr:only-child th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: left;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>betweenness</th>\n",
|
||
" <th>eid</th>\n",
|
||
" <th>eig_cent</th>\n",
|
||
" <th>indegree</th>\n",
|
||
" <th>title</th>\n",
|
||
" <th>source_title</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>2275</th>\n",
|
||
" <td>6393.560498</td>\n",
|
||
" <td>2-s2.0-71149088987</td>\n",
|
||
" <td>1.000000e+00</td>\n",
|
||
" <td>1876</td>\n",
|
||
" <td>Users of the world, unite! The challenges and ...</td>\n",
|
||
" <td>Business Horizons</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>401</th>\n",
|
||
" <td>6220.250000</td>\n",
|
||
" <td>2-s2.0-70350491889</td>\n",
|
||
" <td>3.749870e-16</td>\n",
|
||
" <td>103</td>\n",
|
||
" <td>Crisis in a networked world: Features of compu...</td>\n",
|
||
" <td>Social Science Computer Review</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2781</th>\n",
|
||
" <td>5131.824639</td>\n",
|
||
" <td>2-s2.0-84888047300</td>\n",
|
||
" <td>1.310283e-01</td>\n",
|
||
" <td>31</td>\n",
|
||
" <td>Social media metrics - A framework and guideli...</td>\n",
|
||
" <td>Journal of Interactive Marketing</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3821</th>\n",
|
||
" <td>4319.747561</td>\n",
|
||
" <td>2-s2.0-84910136235</td>\n",
|
||
" <td>3.045168e-18</td>\n",
|
||
" <td>8</td>\n",
|
||
" <td>What are health-related users tweeting? A qual...</td>\n",
|
||
" <td>Journal of Medical Internet Research</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" betweenness eid eig_cent indegree \\\n",
|
||
"2275 6393.560498 2-s2.0-71149088987 1.000000e+00 1876 \n",
|
||
"401 6220.250000 2-s2.0-70350491889 3.749870e-16 103 \n",
|
||
"2781 5131.824639 2-s2.0-84888047300 1.310283e-01 31 \n",
|
||
"3821 4319.747561 2-s2.0-84910136235 3.045168e-18 8 \n",
|
||
"\n",
|
||
" title \\\n",
|
||
"2275 Users of the world, unite! The challenges and ... \n",
|
||
"401 Crisis in a networked world: Features of compu... \n",
|
||
"2781 Social media metrics - A framework and guideli... \n",
|
||
"3821 What are health-related users tweeting? A qual... \n",
|
||
"\n",
|
||
" source_title \n",
|
||
"2275 Business Horizons \n",
|
||
"401 Social Science Computer Review \n",
|
||
"2781 Journal of Interactive Marketing \n",
|
||
"3821 Journal of Medical Internet Research "
|
||
]
|
||
},
|
||
"execution_count": 82,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"network_stats.sort_values(\"betweenness\", ascending=False).head(4)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 83,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"<matplotlib.axes._subplots.AxesSubplot at 0x7f178179c908>"
|
||
]
|
||
},
|
||
"execution_count": 83,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
},
|
||
{
|
||
"data": {
|
||
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAFKFJREFUeJzt3W9sW2fdxvHLifenJSVN7eKQLZNo1kqk2tYad0CgW/6YTlQIdRVEjBdoC6PJsjGyMbHhF9MkFinSiBwJGoEgRKNIaENKKEggJFO6ogRo4ixd1bAt6ZjUqFlM7NK667LO8XleVPPT0KS1XZ/45Ob7eVWf2T5X7sTXnJ9PznFZlmUJAGCskmIHAADYi6IHAMNR9ABgOIoeAAxH0QOA4Sh6ADAcRQ8AhqPoAcBwFD0AGI6iBwDDuYu589HRUUWjUbW2tur06dN5PYfX69Xc3FyBkxWW0zM6PZ9ExkJwej7J+Rmdlq+qqiqr+xW16AOBgAKBQDEjAIDxGN0AgOEoegAwHEUPAIaj6AHAcBQ9ABiOogcAw1H0AGC4oh5HXwiz99cVbd+lP/td0fYNANniHT0AGM6Wop+fn9fTTz+taDRqx9MDAHKQ1eimt7dXY2NjKi8vV3d3d2b7+Pi4+vv7lU6n1dTUpD179kiSDh48qM9+9rP2JAYA5CSrd/T19fUKhUKLtqXTafX19SkUCikcDmtoaEjT09N67bXXdOutt2r9+vW2BAYA5Card/S1tbWKxWKLtk1NTamyslI+n0+SVFdXp5GREc3Pz+v999/X9PS0brzxRm3fvl0lJXwUAADFkvdRN4lEQh6PJ3Pb4/FocnJS3/zmNyVJhw8f1rp165Yt+UgkokgkIknq6uqS1+vNK8dsXo8qjGwzu93uvL++leD0fBIZC8Hp+STnZ3R6vuXkXfSWZV2xzeVyZf5dX19/1ccHg0EFg8HMbSed4zlb2WZ22jms/5vT80lkLASn55Ocn9Fp+bI9H33eMxWPx6N4PJ65HY/HVVFRkdNzjI6O6qc//Wm+EQAAWci76GtqajQzM6NYLKZUKqXh4eGcLyISCATU2tqabwQAQBayGt309PRoYmJCyWRSbW1tam5uVmNjo1paWtTZ2al0Oq2GhgZVV1fntPPLLyUIALBHVkXf0dGx5Ha/3y+/35/3zrmUIADYj+MeAcBwRS16PowFAPsV9eyVjG4AwH6MbgDAcIxuAMBwjG4AwHCMbgDAcBQ9ABiOGT0AGI4ZPQAYjtENABiOogcAw1H0AGA4PowFAMPxYSwAGI7RDQAYjqIHAMNR9ABgOIoeAAzHUTcAYDiOugEAwzG6AQDDUfQAYDiKHgAMR9EDgOEoegAwHEUPAIbjOHoAMBzH0QOA4RjdAIDhKHoAMBxFDwCGo+gBwHAUPQAYjqIHAMNR9ABgOIoeAAxH0QOA4Qr+l7HT09P6wx/+oGQyqTvuuEO7du0q9C4AADnIquh7e3s1Njam8vJydXd3Z7aPj4+rv79f6XRaTU1N2rNnj2699Vbt27dP6XSa89gAgANkNbqpr69XKBRatC2dTquvr0+hUEjhcFhDQ0Oanp6WdOlkZc8++6zuuOOOwicGAOQkq6Kvra1VWVnZom1TU1OqrKyUz+eT2+1WXV2dRkZGJF06Wdnzzz+vv/71r4VPDADISd4z+kQiIY/Hk7nt8Xg0OTmpEydO6B//+IdSqZS2b9++7OMjkYgikYgkqaurS16vN68cs3k9qjCyzex2u/P++laC0/NJZCwEp+eTnJ/R6fmWk3fRW5Z1xTaXy6WtW7dq69at13x8MBhUMBjM3J6bm8s3StFkm9nr9Tr663N6PomMheD0fJLzMzotX1VVVVb3y/vwSo/Ho3g8nrkdj8dVUVGR03Nw4REAsF/eRV9TU6OZmRnFYjGlUikNDw/nfBGRQCCg1tbWfCMAALKQ1eimp6dHExMTSiaTamtrU3NzsxobG9XS0qLOzk6l02k1NDSouro6p52Pjo4qGo1S9gBgo6yKvqOjY8ntfr9ffr8/751zKUEAsB+nQAAAwxW16PkwFgDsV/Bz3eSC0Q0A2I/RDQAYjtENABiO0Q0AGI7RDQAYjqIHAMMxowcAwzGjBwDDMboBAMNR9ABgOIoeAAzHh7EAYDg+jAUAwzG6AQDDUfQAYDiKHgAMR9EDgOE46gYADMdRNwBgOEY3AGA4ih4ADEfRA4DhKHoAMBxFDwCGo+gBwHAcRw8AhuM4egAwHKMbADAcRQ8AhqPoAcBwFD0AGI6iBwDDUfQAYDiKHgAMR9EDgOEoegAwnC1/GXv06FGNjY3p3Llzuu+++3TXXXfZsRsAQBayLvre3l6NjY2pvLxc3d3dme3j4+Pq7+9XOp1WU1OT9uzZo7vvvlt33323zp8/rwMHDlD0AFBEWY9u6uvrFQqFFm1Lp9Pq6+tTKBRSOBzW0NCQpqenM/99YGBA9913X+HSAgBylnXR19bWqqysbNG2qakpVVZWyufzye12q66uTiMjI7IsS7/61a+0bds2bdq0qeChAQDZu64ZfSKRkMfjydz2eDyanJzUH//4Rx0/flwXLlzQO++8o127dl3x2EgkokgkIknq6uqS1+vNK8NsftELItvMbrc7769vJTg9n0TGQnB6Psn5GZ2ebznXVfSWZV2xzeVyaffu3dq9e/dVHxsMBhUMBjO35+bmridKUWSb2ev1Ovrrc3o+iYyF4PR8kvMzOi1fVVVVVve7rsMrPR6P4vF45nY8HldFRUXWj+fCIwBgv+sq+pqaGs3MzCgWiymVSml4eDinC4kEAgG1trZeTwQAwDVkPbrp6enRxMSEksmk2tra1NzcrMbGRrW0tKizs1PpdFoNDQ2qrq7Oeuejo6OKRqOUPQDYKOui7+joWHK73++X3+/Pa+dcShAA7McpEADAcEUtej6MBQD72XKum2wxugEA+zG6AQDDMboBAMMxugEAwzG6AQDDUfQAYDhm9ABgOGb0AGA4RjcAYDiKHgAMR9EDgOH4MBYADMeHsQBgOEY3AGA4ih4ADEfRA4DhKHoAMBxH3QCA4TjqBgAMx+gGAAxH0QOA4Sh6ADAcRQ8AhqPoAcBwRT3qZrVb+NaXs7rfbIH3W/qz3xX4GQGYjOPoAcBwHEcPAIZjRg8AhqPoAcBwFD0AGI6iBwDDUfQAYDiKHgAMR9EDgOEoegAwHEUPAIYr+F/Gzs7OamBgQBcuXNB3v/vdQj89ACBHWb2j7+3t1cMPP3xFcY+Pj+s73/mOvv3tb+u3v/2tJMnn8+mRRx4pfFIAQF6yKvr6+nqFQqFF29LptPr6+hQKhRQOhzU0NKTp6WlbQgIA8pdV0dfW1qqsrGzRtqmpKVVWVsrn88ntdquurk4jIyO2hAQA5C/vGX0ikZDH48nc9ng8mpycVDKZ1K9//Wu9/fbbGhwc1P3337/k4yORiCKRiCSpq6tLXq83rxyFPtf7apDvWi3H7XYX/DkLjYzXz+n5JOdndHq+5eRd9JZlXbHN5XJp3bp12rdv3zUfHwwGFQwGM7fn5ubyjfI/p9Br5fV6Hb/+ZLx+Ts8nOT+j0/JVVVVldb+8D6/0eDyKx+OZ2/F4XBUVFTk9BxceAQD75V30NTU1mpmZUSwWUyqV0vDwcM4XEQkEAmptbc03AgAgC1mNbnp6ejQxMaFkMqm2tjY1NzersbFRLS0t6uzsVDqdVkNDg6qrq3Pa+ejoqKLRKGUPADbKqug7OjqW3O73++X3+/PeOZcSBAD7cQoEADBcUYueD2MBwH4FP9dNLhjdAID9GN0AgOEY3QCA4RjdAIDhGN0AgOEoegAwHDN6ADAcM3oAMByjGwAwHEUPAIaj6AHAcHwYCwCG48NYADAcoxsAMBxFDwCGo+gBwHAUPQAYrqgfxnJx8PwsfOvLBX2+2RzuW/qz3xV03wDsx1E3AGA4RjcAYDiKHgAMR9EDgOEoegAwHEUPAIaj6AHAcJy9EgAMx3H0AGA4RjcAYDiKHgAMR9EDgOEoegAwHEUPAIaj6AHAcBQ9ABiOogcAw1H0AGC4gv9l7Pz8vH7+85/L7XZr69at2rlzZ6F3AQDIQVZF39vbq7GxMZWXl6u7uzuzfXx8XP39/Uqn02pqatKePXt09OhRfeYzn1EgEFA4HKboAaDIshrd1NfXKxQKLdqWTqfV19enUCikcDisoaEhTU9PKx6Py+v1XnryEiZDAFBsWTVxbW2tysrKFm2bmppSZWWlfD6f3G636urqNDIyIo/Ho3g8LkmyLKvwiQEAOcl7Rp9IJOTxeDK3PR6PJicn9cUvflG/+MUvNDY2pk996lPLPj4SiSgSiUiSurq6Mr8F5Go2r0chXwvf+nJR9uv+/dG8f0ZWitvtdnRGp+eTipdx9v667O5nw759g8M2POtieRf9Uu/WXS6Xbr75ZrW3t1/z8cFgUMFgMHN7bm4u3yj4H5BKpRz/M+L1eh2d0en5pNWRsdCu5+utqqrK6n55D9EvH9FIUjweV0VFRU7PwYVHAMB+eRd9TU2NZmZmFIvFlEqlNDw8nPNFRAKBgFpbW/ONAADIQlajm56eHk1MTCiZTKqtrU3Nzc1qbGxUS0uLOjs7lU6n1dDQoOrqarvzAgBylFXRd3R0LLnd7/fL7/fnvfPR0VFFo1He1QOAjbhmLAAYrqh/0cSHsQBgP97RA4DhOEcBABjOZXGeAgAw2qp/R//MM88UO8I1OT2j0/NJZCwEp+eTnJ/R6fmWs+qLHgBwdRQ9ABiu9Lnnnnuu2CGu16ZNm4od4ZqcntHp+SQyFoLT80nOz+j0fEvhw1gAMByjGwAwXFH/YOp6LXXN2pU2Nzen/fv36z//+Y9cLpeCwaB2796tl19+WX/+85/10Y9+VJL0wAMPZM4LNDg4qEOHDqmkpEQPPfSQtm3bZnvORx99VDfffLNKSkpUWlqqrq4unT9/XuFwWP/+97+1ceNGPfHEEyorK5NlWerv79err76qm266Se3t7bb+unr69GmFw+HM7VgspubmZr377rtFXcOlrpWcz5odPnxYAwMDkqS9e/eqvr7e1owHDhxQNBqV2+2Wz+dTe3u7PvKRjygWi+mJJ57InMN88+bN2rdvnyTprbfe0v79+3Xx4kVt375dDz30kFwuly358nlt2PlaXypjOBzW6dOnJUkXLlzQ2rVr9cILLxRlDQvCWqUWFhasxx57zHrnnXesDz74wHrqqaesU6dOrXiORCJhnTx50rIsy7pw4YL1+OOPW6dOnbJeeukl6+DBg1fc/9SpU9ZTTz1lXbx40ZqdnbUee+wxa2Fhwfac7e3t1tmzZxdtO3DggDU4OGhZlmUNDg5aBw4csCzLsqLRqNXZ2Wml02nrjTfesL7//e/bnu9DCwsL1sMPP2zFYrGir+GJEyeskydPWk8++WRmW65rlkwmrUcffdRKJpOL/m1nxvHxcSuVSmXyfphxdnZ20f0u98wzz1hvvPGGlU6nrc7OTmtsbMy2fLl+X+1+rS+V8XIvvvii9Zvf/MayrOKsYSGs2tHNctesXWkVFRWZd25r1qzRLbfcokQisez9R0ZGVFdXpxtuuEEf+9jHVFlZqampqZWKe0WWe++9V5J07733ZtZvdHRU99xzj1wul7Zs2aJ3331XZ86cWZFMx48fV2VlpTZu3HjV3CuxhktdKznXNRsfH9edd96psrIylZWV6c4779T4+LitGe+66y6VlpZKkrZs2XLVn0dJOnPmjN577z1t2bJFLpdL99xzT8FeS0vlW85y31e7X+tXy2hZlv72t7/pc5/73FWfw841LIRVO7pZ7pq1xRSLxfSvf/1Lt99+u15//XX96U9/0pEjR7Rp0yZ94xvfUFlZmRKJhDZv3px5zIYNG675QiyUzs5OSdIXvvAFBYNBnT17NnNVsIqKCp07d07SpbW9/LqdHo9HiUQi5yuI5WNoaGjRi8ppa5jrmv33z+lKZpWkQ4cOqa7u/6+HGovF9L3vfU9r1qzR1772NX3yk59c8rVkd8Zcv6/Feq3/85//VHl5uT7+8Y9ntjllDXOxaoveWuaatcUyPz+v7u5uPfjgg1q7dq127dqlr3zlK5Kkl156Sb/85S/V3t6+ZO6V8IMf/EAbNmzQ2bNn9fzzz1/1WpPFWttUKqVoNKqvf/3rkuS4NbyaXNZspX5OBwYGVFpaqp07d0q69D+m3t5erVu3Tm+99ZZeeOEFdXd3r/h65vp9LeZr/b/feDhlDXO1akc3hbhmbaGkUil1d3dr586d+vSnPy1JWr9+vUpKSlRSUqKmpiadPHlyydyJREIbNmywPeOH+ygvL9eOHTs0NTWl8vLyzEjmzJkzmQ/HPB7PogsWr9Tavvrqq/rEJz6h9evXS3LeGkrKec02bNhwRdaVWMvDhw8rGo3q8ccfz5TiDTfcoHXr1km6dCy4z+fTzMzMkq8lO9cz1+9rsV7rCwsLOnr06KLfiJyyhrlatUVfiGvWFoJlWfrJT36iW265RV/60pcy2y+faR89ejRzmcVAIKDh4WF98MEHisVimpmZ0e23325rxvn5eb333nuZf7/22mu67bbbFAgE9Morr0iSXnnlFe3YsSOT8ciRI7IsS2+++abWrl1blLGNk9bwQ7mu2bZt23Ts2DGdP39e58+f17Fjx2w/ymp8fFwHDx7U008/rZtuuimz/dy5c0qn05Kk2dlZzczMyOfzqaKiQmvWrNGbb74py7J05MgRW19LuX5fi/VaP378uKqqqhaNZJyyhrla1X8wNTY2phdffDFzzdq9e/eueIbXX39dzz77rG677bbMO6cHHnhAQ0NDevvtt+VyubRx40bt27cvU5YDAwP6y1/+opKSEj344IPavn27rRlnZ2f1wx/+UNKldymf//zntXfvXiWTSYXDYc3Nzcnr9erJJ5/MHCrY19enY8eO6cYbb1R7e7tqampszfj+++/rkUce0Y9//GOtXbtWkvSjH/2oqGt4+bWSy8vL1dzcrB07duS8ZocOHdLg4KCkS4dXNjQ02JpxcHBQqVQq8wHjh4cA/v3vf9fLL7+s0tJSlZSU6Ktf/WqmjE6ePKne3l5dvHhR27ZtU0tLS0HGI0vlO3HiRM7fVztf60tlbGxs1P79+7V582bt2rUrc99irGEhrOqiBwBc26od3QAAskPRA4DhKHoAMBxFDwCGo+gBwHAUPQAYjqIHAMNR9ABguP8DaoV4MSni/p8AAAAASUVORK5CYII=\n",
|
||
"text/plain": [
|
||
"<matplotlib.figure.Figure at 0x7f178c2ee4e0>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"network_stats['indegree'].hist(log = True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# things to store"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 84,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"23131"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"remember('total_articles', articles.shape[0])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 85,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"35620"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"4807"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"3864"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# total number of citations in the sm dataset\n",
|
||
"remember('sm_citations', raw_edgelist.shape[0])\n",
|
||
"\n",
|
||
"remember('sm_citing', len(raw_edgelist[\"from\"].unique()))\n",
|
||
"\n",
|
||
"# the number of articles in the original dataset that have any INCOMING citations\n",
|
||
"remember('sm_cited', len(raw_edgelist[\"to\"].unique()))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 86,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"212773"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"42935"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"9710"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"# total number of citations in the sm dataset\n",
|
||
"remember('all_citations', combo_raw_edgelist.shape[0])\n",
|
||
"\n",
|
||
"remember('all_citing', len(combo_raw_edgelist[\"from\"].unique()))\n",
|
||
"\n",
|
||
"# the number of articles in the original dataset that have any INCOMING citations\n",
|
||
"remember('all_cited', len(combo_raw_edgelist[\"to\"].unique()))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 87,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style>\n",
|
||
" .dataframe thead tr:only-child th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: left;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>eid</th>\n",
|
||
" <th>cluster</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2-s2.0-71149088987</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2-s2.0-70349816888</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2-s2.0-79953711711</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2-s2.0-79551630751</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2-s2.0-80051469103</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>2-s2.0-84866718851</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>2-s2.0-84877685551</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>2-s2.0-84864442547</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>2-s2.0-84861420864</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>2-s2.0-84887483487</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>10</th>\n",
|
||
" <td>2-s2.0-80955144847</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>11</th>\n",
|
||
" <td>2-s2.0-84885038309</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>12</th>\n",
|
||
" <td>2-s2.0-84886099569</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>13</th>\n",
|
||
" <td>2-s2.0-84863379783</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>14</th>\n",
|
||
" <td>2-s2.0-84899093663</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>15</th>\n",
|
||
" <td>2-s2.0-84879109859</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>16</th>\n",
|
||
" <td>2-s2.0-83055168309</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>17</th>\n",
|
||
" <td>2-s2.0-84876304322</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>18</th>\n",
|
||
" <td>2-s2.0-84866168147</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>19</th>\n",
|
||
" <td>2-s2.0-84877817428</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>20</th>\n",
|
||
" <td>2-s2.0-84873481256</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>21</th>\n",
|
||
" <td>2-s2.0-84861794897</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>22</th>\n",
|
||
" <td>2-s2.0-84899508298</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>23</th>\n",
|
||
" <td>2-s2.0-84898082465</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>24</th>\n",
|
||
" <td>2-s2.0-84879021774</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25</th>\n",
|
||
" <td>2-s2.0-80054988041</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>26</th>\n",
|
||
" <td>2-s2.0-84944394118</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>27</th>\n",
|
||
" <td>2-s2.0-84870572301</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>28</th>\n",
|
||
" <td>2-s2.0-84907167320</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>29</th>\n",
|
||
" <td>2-s2.0-84914675721</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6110</th>\n",
|
||
" <td>2-s2.0-84856086839</td>\n",
|
||
" <td>12</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6111</th>\n",
|
||
" <td>2-s2.0-84859510122</td>\n",
|
||
" <td>12</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6112</th>\n",
|
||
" <td>2-s2.0-84905121209</td>\n",
|
||
" <td>12</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6113</th>\n",
|
||
" <td>2-s2.0-84883758613</td>\n",
|
||
" <td>12</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6114</th>\n",
|
||
" <td>2-s2.0-84877953100</td>\n",
|
||
" <td>12</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6115</th>\n",
|
||
" <td>2-s2.0-84904376766</td>\n",
|
||
" <td>12</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6116</th>\n",
|
||
" <td>2-s2.0-84905837182</td>\n",
|
||
" <td>12</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6117</th>\n",
|
||
" <td>2-s2.0-84900461218</td>\n",
|
||
" <td>12</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6118</th>\n",
|
||
" <td>2-s2.0-83755228785</td>\n",
|
||
" <td>13</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6119</th>\n",
|
||
" <td>2-s2.0-84886795975</td>\n",
|
||
" <td>13</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6120</th>\n",
|
||
" <td>2-s2.0-84876132785</td>\n",
|
||
" <td>13</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6121</th>\n",
|
||
" <td>2-s2.0-84903121334</td>\n",
|
||
" <td>13</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6122</th>\n",
|
||
" <td>2-s2.0-84863720400</td>\n",
|
||
" <td>13</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6123</th>\n",
|
||
" <td>2-s2.0-84873180938</td>\n",
|
||
" <td>13</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6124</th>\n",
|
||
" <td>2-s2.0-84914112838</td>\n",
|
||
" <td>13</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6125</th>\n",
|
||
" <td>2-s2.0-84878795748</td>\n",
|
||
" <td>13</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6126</th>\n",
|
||
" <td>2-s2.0-84888011666</td>\n",
|
||
" <td>13</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6127</th>\n",
|
||
" <td>2-s2.0-84942101218</td>\n",
|
||
" <td>13</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6128</th>\n",
|
||
" <td>2-s2.0-80052752113</td>\n",
|
||
" <td>14</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6129</th>\n",
|
||
" <td>2-s2.0-84874074707</td>\n",
|
||
" <td>14</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6130</th>\n",
|
||
" <td>2-s2.0-84942582235</td>\n",
|
||
" <td>14</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6131</th>\n",
|
||
" <td>2-s2.0-70849130360</td>\n",
|
||
" <td>14</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6132</th>\n",
|
||
" <td>2-s2.0-84864152630</td>\n",
|
||
" <td>14</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6133</th>\n",
|
||
" <td>2-s2.0-84868709161</td>\n",
|
||
" <td>15</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6134</th>\n",
|
||
" <td>2-s2.0-84896350015</td>\n",
|
||
" <td>15</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6135</th>\n",
|
||
" <td>2-s2.0-84944104933</td>\n",
|
||
" <td>15</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6136</th>\n",
|
||
" <td>2-s2.0-84875539506</td>\n",
|
||
" <td>16</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6137</th>\n",
|
||
" <td>2-s2.0-84902262954</td>\n",
|
||
" <td>16</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6138</th>\n",
|
||
" <td>2-s2.0-84909954481</td>\n",
|
||
" <td>17</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6139</th>\n",
|
||
" <td>2-s2.0-84921469678</td>\n",
|
||
" <td>18</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>6140 rows × 2 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" eid cluster\n",
|
||
"0 2-s2.0-71149088987 1\n",
|
||
"1 2-s2.0-70349816888 1\n",
|
||
"2 2-s2.0-79953711711 1\n",
|
||
"3 2-s2.0-79551630751 1\n",
|
||
"4 2-s2.0-80051469103 1\n",
|
||
"5 2-s2.0-84866718851 1\n",
|
||
"6 2-s2.0-84877685551 1\n",
|
||
"7 2-s2.0-84864442547 1\n",
|
||
"8 2-s2.0-84861420864 1\n",
|
||
"9 2-s2.0-84887483487 1\n",
|
||
"10 2-s2.0-80955144847 1\n",
|
||
"11 2-s2.0-84885038309 1\n",
|
||
"12 2-s2.0-84886099569 1\n",
|
||
"13 2-s2.0-84863379783 1\n",
|
||
"14 2-s2.0-84899093663 1\n",
|
||
"15 2-s2.0-84879109859 1\n",
|
||
"16 2-s2.0-83055168309 1\n",
|
||
"17 2-s2.0-84876304322 1\n",
|
||
"18 2-s2.0-84866168147 1\n",
|
||
"19 2-s2.0-84877817428 1\n",
|
||
"20 2-s2.0-84873481256 1\n",
|
||
"21 2-s2.0-84861794897 1\n",
|
||
"22 2-s2.0-84899508298 1\n",
|
||
"23 2-s2.0-84898082465 1\n",
|
||
"24 2-s2.0-84879021774 1\n",
|
||
"25 2-s2.0-80054988041 1\n",
|
||
"26 2-s2.0-84944394118 1\n",
|
||
"27 2-s2.0-84870572301 1\n",
|
||
"28 2-s2.0-84907167320 1\n",
|
||
"29 2-s2.0-84914675721 1\n",
|
||
"... ... ...\n",
|
||
"6110 2-s2.0-84856086839 12\n",
|
||
"6111 2-s2.0-84859510122 12\n",
|
||
"6112 2-s2.0-84905121209 12\n",
|
||
"6113 2-s2.0-84883758613 12\n",
|
||
"6114 2-s2.0-84877953100 12\n",
|
||
"6115 2-s2.0-84904376766 12\n",
|
||
"6116 2-s2.0-84905837182 12\n",
|
||
"6117 2-s2.0-84900461218 12\n",
|
||
"6118 2-s2.0-83755228785 13\n",
|
||
"6119 2-s2.0-84886795975 13\n",
|
||
"6120 2-s2.0-84876132785 13\n",
|
||
"6121 2-s2.0-84903121334 13\n",
|
||
"6122 2-s2.0-84863720400 13\n",
|
||
"6123 2-s2.0-84873180938 13\n",
|
||
"6124 2-s2.0-84914112838 13\n",
|
||
"6125 2-s2.0-84878795748 13\n",
|
||
"6126 2-s2.0-84888011666 13\n",
|
||
"6127 2-s2.0-84942101218 13\n",
|
||
"6128 2-s2.0-80052752113 14\n",
|
||
"6129 2-s2.0-84874074707 14\n",
|
||
"6130 2-s2.0-84942582235 14\n",
|
||
"6131 2-s2.0-70849130360 14\n",
|
||
"6132 2-s2.0-84864152630 14\n",
|
||
"6133 2-s2.0-84868709161 15\n",
|
||
"6134 2-s2.0-84896350015 15\n",
|
||
"6135 2-s2.0-84944104933 15\n",
|
||
"6136 2-s2.0-84875539506 16\n",
|
||
"6137 2-s2.0-84902262954 16\n",
|
||
"6138 2-s2.0-84909954481 17\n",
|
||
"6139 2-s2.0-84921469678 18\n",
|
||
"\n",
|
||
"[6140 rows x 2 columns]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"remember('g_sm_clusters', g_sm_clu[[\"eid\", \"cluster\"]])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 88,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"['all_citations',\n",
|
||
" 'all_cited',\n",
|
||
" 'all_citing',\n",
|
||
" 'cluster_edgelist',\n",
|
||
" 'g_sm_clusters',\n",
|
||
" 'sm_citations',\n",
|
||
" 'sm_cited',\n",
|
||
" 'sm_citing',\n",
|
||
" 'total_articles']"
|
||
]
|
||
},
|
||
"execution_count": 88,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"sorted(r.keys())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 89,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#save the r function to rdata file\n",
|
||
"def save_to_r(r_dict, filename=\"output.RData\"):\n",
|
||
" for var_name, x in r.items():\n",
|
||
" var_name = var_name.replace('_', '.')\n",
|
||
" if type(x) == np.int64:\n",
|
||
" x = np.asscalar(x)\n",
|
||
" \n",
|
||
" if type(x) == pd.DataFrame:\n",
|
||
" rx = pandas2ri.py2ri(x)\n",
|
||
" else:\n",
|
||
" rx = x\n",
|
||
" \n",
|
||
" robjects.r.assign(var_name, x)\n",
|
||
"\n",
|
||
" # create a new variable called in R\n",
|
||
" robjects.r(\"r <- sapply(ls(), function (x) {eval(parse(text=x))})\")\n",
|
||
" robjects.r('save(\"r\", file=\"{}\")'.format(filename))\n",
|
||
" robjects.r(\"rm(list=ls())\")\n",
|
||
" \n",
|
||
"save_to_r(r, \"../../paper/data/network_data.RData\")"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.6.4"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 1
|
||
}
|