1
0

updating analysis of *relevant* phab tickets for https case

This commit is contained in:
Matthew Gaughan 2025-04-13 12:44:48 -07:00
parent 1fba61b75b
commit 5fe41d576d
2 changed files with 884 additions and 38 deletions

View File

@ -13,7 +13,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 9,
"id": "e4f0b3f0-5255-46f1-822f-e455087ba315",
"metadata": {},
"outputs": [],
@ -24,7 +24,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 10,
"id": "ac5e624b-08a4-4ede-bc96-cfc26c3edac3",
"metadata": {},
"outputs": [],
@ -32,7 +32,9 @@
"def http_relevant(text):\n",
" if pd.isnull(text):\n",
" return False\n",
"\n",
" # TODO: expanded dictionary for relevancy\n",
" # http, ip, login, auth, SSL, TLS, certificate \n",
" \n",
" for word in text.split():\n",
" if \"://\" not in word.lower() and \"http\" in word.lower():\n",
" return True\n",
@ -41,7 +43,7 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 11,
"id": "d449164e-1d28-4580-9eb1-f0f69978f114",
"metadata": {},
"outputs": [
@ -49,7 +51,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_11370/1288881096.py:35: SettingWithCopyWarning: \n",
"/tmp/ipykernel_22429/86623999.py:36: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
@ -76,13 +78,17 @@
"\n",
"# after 12-1-2012 before 12-1-2013\n",
"phab_df['timestamp'] = pd.to_datetime(phab_df['date_created'], unit='s', origin='unix', utc=True)\n",
"filtered_phab_df = phab_df[(phab_df['date_created'] < 1385856000) & (phab_df['date_created'] > 1354320000)]\n",
"#filtered_phab_df = phab_df[(phab_df['date_created'] < 1385856000) & (phab_df['date_created'] > 1354320000)]\n",
"filtered_phab_df = phab_df[(phab_df['date_created'] < 1381691276) & (phab_df['date_created'] > 1379099276)]\n",
"\n",
"#removing headless conversations\n",
"task_phab_df = filtered_phab_df[filtered_phab_df['comment_type']==\"task_description\"]\n",
"headed_task_phids = task_phab_df['conversation_id'].unique()\n",
"filtered_phab_df = filtered_phab_df[filtered_phab_df['conversation_id'].isin(headed_task_phids)]\n",
"\n",
"#TODO: filter out the sourceforge migration \n",
"# Originally from: http://sourceforge.net in the task task_summary\n",
"\n",
"#removing gerrit comments \n",
"mid_comment_phab_df = filtered_phab_df[filtered_phab_df['meta.gerrit'] != True]\n",
"\n",
@ -95,13 +101,13 @@
"task_phab_df['is_relevant'] = task_phab_df['conversation_id'].isin(relevant_conversation_ids)\n",
"mid_comment_phab_df['is_relevant'] = mid_comment_phab_df['conversation_id'].isin(relevant_conversation_ids)\n",
"\n",
"#comment_phab_df = mid_comment_phab_df[mid_comment_phab_df['is_relevant'] == True]\n",
"comment_phab_df = mid_comment_phab_df"
"comment_phab_df = mid_comment_phab_df[mid_comment_phab_df['is_relevant'] == True]\n",
"#comment_phab_df = mid_comment_phab_df"
]
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 12,
"id": "942344db-c8f5-4ed6-a757-c97f8454f18b",
"metadata": {},
"outputs": [
@ -109,9 +115,9 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Unique conversation_ids: 6139\n",
"Unique ids: 26300\n",
"Unique speakers: 506\n"
"Unique conversation_ids: 96\n",
"Unique ids: 361\n",
"Unique speakers: 47\n"
]
}
],
@ -127,7 +133,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 13,
"id": "d226d781-b002-4842-a3ae-92d4851a5878",
"metadata": {},
"outputs": [],
@ -144,7 +150,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 14,
"id": "3ae40d24-bbe8-49c3-a3a9-70bde1b4d559",
"metadata": {},
"outputs": [
@ -152,7 +158,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_11370/2783900859.py:1: SettingWithCopyWarning: \n",
"/tmp/ipykernel_22429/2783900859.py:1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
@ -177,7 +183,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 15,
"id": "a8469b16-4ae6-4b06-bf1b-1f2f6c736cab",
"metadata": {},
"outputs": [],
@ -206,7 +212,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 16,
"id": "8b9a12f9-71bf-4bc9-bcfd-c73aab4be920",
"metadata": {},
"outputs": [
@ -214,7 +220,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_11370/2805711855.py:1: SettingWithCopyWarning: \n",
"/tmp/ipykernel_22429/2805711855.py:1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
@ -237,6 +243,423 @@
"#comment_phab_df['resolved_dependency_tree'] = comment_phab_df['processed_resolved_text'].apply(extract_dependency_tree)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "370a2767-04f8-4d0b-9b94-9c6a0b408822",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2612 Recently (starting maybe 2 days ago), some goo...\n",
"2989 Although the \"Always use a secure connection w...\n",
"3080 Originally from: http://sourceforge.net/p/pywi...\n",
"3084 Originally from: http://sourceforge.net/p/pywi...\n",
"3096 Originally from: http://sourceforge.net/p/pywi...\n",
" ... \n",
"44209 Originally from: http://sourceforge.net/p/pywi...\n",
"44217 Originally from: http://sourceforge.net/p/pywi...\n",
"44265 Originally from: http://sourceforge.net/p/pywi...\n",
"44277 Originally from: http://sourceforge.net/p/pywi...\n",
"44316 Originally from: http://sourceforge.net/p/pywi...\n",
"Name: comment_text, Length: 96, dtype: object"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"comment_phab_df[comment_phab_df['comment_type'] == 'task_description']['comment_text']"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "5f138688-3d1a-4a27-b16d-d8aa438dafea",
"metadata": {},
"outputs": [
{
"ename": "KeyError",
"evalue": "44",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/indexes/base.py:3805\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3804\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3805\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3806\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n",
"File \u001b[0;32mindex.pyx:167\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mindex.pyx:196\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:2606\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:2630\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 44",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[32], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcomment_phab_df\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mcomment_text\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m44\u001b[39;49m\u001b[43m]\u001b[49m\n",
"File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/series.py:1121\u001b[0m, in \u001b[0;36mSeries.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_values[key]\n\u001b[1;32m 1120\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m key_is_scalar:\n\u001b[0;32m-> 1121\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_value\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1123\u001b[0m \u001b[38;5;66;03m# Convert generator to list before going through hashable part\u001b[39;00m\n\u001b[1;32m 1124\u001b[0m \u001b[38;5;66;03m# (We will iterate through the generator there to check for slices)\u001b[39;00m\n\u001b[1;32m 1125\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_iterator(key):\n",
"File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/series.py:1237\u001b[0m, in \u001b[0;36mSeries._get_value\u001b[0;34m(self, label, takeable)\u001b[0m\n\u001b[1;32m 1234\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_values[label]\n\u001b[1;32m 1236\u001b[0m \u001b[38;5;66;03m# Similar to Index.get_value, but we do not fall back to positional\u001b[39;00m\n\u001b[0;32m-> 1237\u001b[0m loc \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlabel\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1239\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(loc):\n\u001b[1;32m 1240\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_values[loc]\n",
"File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/indexes/base.py:3812\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3807\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(casted_key, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[1;32m 3808\u001b[0m \u001b[38;5;28misinstance\u001b[39m(casted_key, abc\u001b[38;5;241m.\u001b[39mIterable)\n\u001b[1;32m 3809\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28many\u001b[39m(\u001b[38;5;28misinstance\u001b[39m(x, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m casted_key)\n\u001b[1;32m 3810\u001b[0m ):\n\u001b[1;32m 3811\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InvalidIndexError(key)\n\u001b[0;32m-> 3812\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m 3813\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 3814\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m 3815\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m 3816\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n\u001b[1;32m 3817\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n",
"\u001b[0;31mKeyError\u001b[0m: 44"
]
}
],
"source": [
"comment_phab_df['comment_text'][44]"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "f61845ce-d91f-4b06-9039-b507905cb972",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>task_title</th>\n",
" <th>comment_text</th>\n",
" <th>date_created</th>\n",
" <th>speaker</th>\n",
" <th>meta.affil</th>\n",
" <th>conversation_id</th>\n",
" <th>comment_type</th>\n",
" <th>status</th>\n",
" <th>meta.gerrit</th>\n",
" <th>id</th>\n",
" <th>reply_to</th>\n",
" <th>timestamp</th>\n",
" <th>is_relevant</th>\n",
" <th>processed_text</th>\n",
" <th>dependency_tree</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>?embedplayer=yes broken for videos with width ...</td>\n",
" <td>Ni!\\n\\nI am experiencing an unresponsive black...</td>\n",
" <td>1383189120</td>\n",
" <td>PHID-USER-wr7prgh3p37xrvbdr6w5</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-lfhsyqihbylzxoeftr7m</td>\n",
" <td>task_description</td>\n",
" <td>resolved</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>2013-10-31 03:12:00+00:00</td>\n",
" <td>False</td>\n",
" <td>Ni!\\n\\nI am experiencing an unresponsive black...</td>\n",
" <td>[(Ni, Ni, nsubj, experiencing, [experiencing],...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>?embedplayer=yes broken for videos with width ...</td>\n",
" <td>**mdale** wrote:\\n\\n@Ryan, I just mean you wil...</td>\n",
" <td>1383856310</td>\n",
" <td>PHID-USER-ynivjflmc2dcl6w5ut5v</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-lfhsyqihbylzxoeftr7m</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>2</td>\n",
" <td>1.0</td>\n",
" <td>2013-11-07 20:31:50+00:00</td>\n",
" <td>False</td>\n",
" <td>mdale wrote:\\n\\n@Ryan, I just mean you wil...</td>\n",
" <td>[( , , dep, mdale, [mdale, wrote], [ ], []...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>?embedplayer=yes broken for videos with width ...</td>\n",
" <td>Ni!\\n\\n=) Thanks everyone for helping verify a...</td>\n",
" <td>1383796532</td>\n",
" <td>PHID-USER-wr7prgh3p37xrvbdr6w5</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-lfhsyqihbylzxoeftr7m</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>3</td>\n",
" <td>2.0</td>\n",
" <td>2013-11-07 03:55:32+00:00</td>\n",
" <td>False</td>\n",
" <td>Ni!\\n\\n=) Thanks everyone for helping verify a...</td>\n",
" <td>[(Ni, Ni, ROOT, Ni, [], [Ni, !, \\n\\n], [!]), (...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>?embedplayer=yes broken for videos with width ...</td>\n",
" <td>&gt; So putting it back to 200px specifically for...</td>\n",
" <td>1383776933</td>\n",
" <td>PHID-USER-a5pveeqqwaddgfjiv2fq</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-lfhsyqihbylzxoeftr7m</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>4</td>\n",
" <td>3.0</td>\n",
" <td>2013-11-06 22:28:53+00:00</td>\n",
" <td>False</td>\n",
" <td>&gt; So putting it back to 200px specifically for...</td>\n",
" <td>[(&gt;, &gt;, dep, seem, [seem], [&gt;], []), (So, so, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>?embedplayer=yes broken for videos with width ...</td>\n",
" <td>Many thanks to Brian and Mark for their fine w...</td>\n",
" <td>1383775629</td>\n",
" <td>PHID-USER-dbudsaorcqut7sg3vvbi</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-lfhsyqihbylzxoeftr7m</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>5</td>\n",
" <td>4.0</td>\n",
" <td>2013-11-06 22:07:09+00:00</td>\n",
" <td>False</td>\n",
" <td>Many thanks to Brian and Mark for their fine w...</td>\n",
" <td>[(Many, many, amod, thanks, [thanks], [Many], ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46297</th>\n",
" <td>Add Taiwan in Chinese to the monuments database</td>\n",
" <td>**romaine.wiki** wrote:\\n\\nhttps://commons.wik...</td>\n",
" <td>1377925197</td>\n",
" <td>PHID-USER-ynivjflmc2dcl6w5ut5v</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-ze253b4m6dtco37373fc</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>46298</td>\n",
" <td>46297.0</td>\n",
" <td>2013-08-31 04:59:57+00:00</td>\n",
" <td>False</td>\n",
" <td>romaine.wiki wrote:\\n\\n</td>\n",
" <td>[( , , dep, romaine.wiki, [romaine.wiki, wr...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46298</th>\n",
" <td>Add Taiwan in Chinese to the monuments database</td>\n",
" <td>We're playing with the templates on https://zh...</td>\n",
" <td>1377632023</td>\n",
" <td>PHID-USER-bdyms27sdtgdvjm7zfz4</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-ze253b4m6dtco37373fc</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>46299</td>\n",
" <td>46298.0</td>\n",
" <td>2013-08-27 19:33:43+00:00</td>\n",
" <td>False</td>\n",
" <td>We're playing with the templates on Dennis ...</td>\n",
" <td>[(We, we, nsubj, playing, [playing, seems], [W...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46299</th>\n",
" <td>Add Taiwan in Chinese to the monuments database</td>\n",
" <td>The links are all listed on https://commons.wi...</td>\n",
" <td>1377427853</td>\n",
" <td>PHID-USER-bdyms27sdtgdvjm7zfz4</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-ze253b4m6dtco37373fc</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>46300</td>\n",
" <td>46299.0</td>\n",
" <td>2013-08-25 10:50:53+00:00</td>\n",
" <td>False</td>\n",
" <td>The links are all listed on . The Unique Iden...</td>\n",
" <td>[(The, the, det, links, [links, listed], [The]...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46300</th>\n",
" <td>Add Taiwan in Chinese to the monuments database</td>\n",
" <td>Looks like some lists are available, but not i...</td>\n",
" <td>1376771718</td>\n",
" <td>PHID-USER-cw4amt4ewxdze5qcjdca</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-ze253b4m6dtco37373fc</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>46301</td>\n",
" <td>46300.0</td>\n",
" <td>2013-08-17 20:35:18+00:00</td>\n",
" <td>False</td>\n",
" <td>Looks like some lists are available, but not i...</td>\n",
" <td>[(Looks, look, ROOT, Looks, [], [Looks, like, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46301</th>\n",
" <td>Add Taiwan in Chinese to the monuments database</td>\n",
" <td>We already have a lot of sources in the monume...</td>\n",
" <td>1376423842</td>\n",
" <td>PHID-USER-cw4amt4ewxdze5qcjdca</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-ze253b4m6dtco37373fc</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>46302</td>\n",
" <td>46301.0</td>\n",
" <td>2013-08-13 19:57:22+00:00</td>\n",
" <td>False</td>\n",
" <td>We already have a lot of sources in the monume...</td>\n",
" <td>[(We, we, nsubj, have, [have], [We], []), (alr...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>26300 rows × 15 columns</p>\n",
"</div>"
],
"text/plain": [
" task_title \\\n",
"0 ?embedplayer=yes broken for videos with width ... \n",
"1 ?embedplayer=yes broken for videos with width ... \n",
"2 ?embedplayer=yes broken for videos with width ... \n",
"3 ?embedplayer=yes broken for videos with width ... \n",
"4 ?embedplayer=yes broken for videos with width ... \n",
"... ... \n",
"46297 Add Taiwan in Chinese to the monuments database \n",
"46298 Add Taiwan in Chinese to the monuments database \n",
"46299 Add Taiwan in Chinese to the monuments database \n",
"46300 Add Taiwan in Chinese to the monuments database \n",
"46301 Add Taiwan in Chinese to the monuments database \n",
"\n",
" comment_text date_created \\\n",
"0 Ni!\\n\\nI am experiencing an unresponsive black... 1383189120 \n",
"1 **mdale** wrote:\\n\\n@Ryan, I just mean you wil... 1383856310 \n",
"2 Ni!\\n\\n=) Thanks everyone for helping verify a... 1383796532 \n",
"3 > So putting it back to 200px specifically for... 1383776933 \n",
"4 Many thanks to Brian and Mark for their fine w... 1383775629 \n",
"... ... ... \n",
"46297 **romaine.wiki** wrote:\\n\\nhttps://commons.wik... 1377925197 \n",
"46298 We're playing with the templates on https://zh... 1377632023 \n",
"46299 The links are all listed on https://commons.wi... 1377427853 \n",
"46300 Looks like some lists are available, but not i... 1376771718 \n",
"46301 We already have a lot of sources in the monume... 1376423842 \n",
"\n",
" speaker meta.affil \\\n",
"0 PHID-USER-wr7prgh3p37xrvbdr6w5 False \n",
"1 PHID-USER-ynivjflmc2dcl6w5ut5v False \n",
"2 PHID-USER-wr7prgh3p37xrvbdr6w5 False \n",
"3 PHID-USER-a5pveeqqwaddgfjiv2fq False \n",
"4 PHID-USER-dbudsaorcqut7sg3vvbi False \n",
"... ... ... \n",
"46297 PHID-USER-ynivjflmc2dcl6w5ut5v False \n",
"46298 PHID-USER-bdyms27sdtgdvjm7zfz4 False \n",
"46299 PHID-USER-bdyms27sdtgdvjm7zfz4 False \n",
"46300 PHID-USER-cw4amt4ewxdze5qcjdca False \n",
"46301 PHID-USER-cw4amt4ewxdze5qcjdca False \n",
"\n",
" conversation_id comment_type status \\\n",
"0 PHID-TASK-lfhsyqihbylzxoeftr7m task_description resolved \n",
"1 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n",
"2 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n",
"3 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n",
"4 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n",
"... ... ... ... \n",
"46297 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n",
"46298 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n",
"46299 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n",
"46300 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n",
"46301 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n",
"\n",
" meta.gerrit id reply_to timestamp is_relevant \\\n",
"0 False 1 NaN 2013-10-31 03:12:00+00:00 False \n",
"1 False 2 1.0 2013-11-07 20:31:50+00:00 False \n",
"2 False 3 2.0 2013-11-07 03:55:32+00:00 False \n",
"3 False 4 3.0 2013-11-06 22:28:53+00:00 False \n",
"4 False 5 4.0 2013-11-06 22:07:09+00:00 False \n",
"... ... ... ... ... ... \n",
"46297 False 46298 46297.0 2013-08-31 04:59:57+00:00 False \n",
"46298 False 46299 46298.0 2013-08-27 19:33:43+00:00 False \n",
"46299 False 46300 46299.0 2013-08-25 10:50:53+00:00 False \n",
"46300 False 46301 46300.0 2013-08-17 20:35:18+00:00 False \n",
"46301 False 46302 46301.0 2013-08-13 19:57:22+00:00 False \n",
"\n",
" processed_text \\\n",
"0 Ni!\\n\\nI am experiencing an unresponsive black... \n",
"1 mdale wrote:\\n\\n@Ryan, I just mean you wil... \n",
"2 Ni!\\n\\n=) Thanks everyone for helping verify a... \n",
"3 > So putting it back to 200px specifically for... \n",
"4 Many thanks to Brian and Mark for their fine w... \n",
"... ... \n",
"46297 romaine.wiki wrote:\\n\\n \n",
"46298 We're playing with the templates on Dennis ... \n",
"46299 The links are all listed on . The Unique Iden... \n",
"46300 Looks like some lists are available, but not i... \n",
"46301 We already have a lot of sources in the monume... \n",
"\n",
" dependency_tree \n",
"0 [(Ni, Ni, nsubj, experiencing, [experiencing],... \n",
"1 [( , , dep, mdale, [mdale, wrote], [ ], []... \n",
"2 [(Ni, Ni, ROOT, Ni, [], [Ni, !, \\n\\n], [!]), (... \n",
"3 [(>, >, dep, seem, [seem], [>], []), (So, so, ... \n",
"4 [(Many, many, amod, thanks, [thanks], [Many], ... \n",
"... ... \n",
"46297 [( , , dep, romaine.wiki, [romaine.wiki, wr... \n",
"46298 [(We, we, nsubj, playing, [playing, seems], [W... \n",
"46299 [(The, the, det, links, [links, listed], [The]... \n",
"46300 [(Looks, look, ROOT, Looks, [], [Looks, like, ... \n",
"46301 [(We, we, nsubj, have, [have], [We], []), (alr... \n",
"\n",
"[26300 rows x 15 columns]"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"comment_phab_df"
]
},
{
"cell_type": "code",
"execution_count": 27,
@ -908,7 +1331,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.21"
"version": "3.9.18"
}
},
"nbformat": 4,

View File

@ -13,7 +13,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 9,
"id": "e4f0b3f0-5255-46f1-822f-e455087ba315",
"metadata": {},
"outputs": [],
@ -24,7 +24,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 10,
"id": "ac5e624b-08a4-4ede-bc96-cfc26c3edac3",
"metadata": {},
"outputs": [],
@ -32,7 +32,9 @@
"def http_relevant(text):\n",
" if pd.isnull(text):\n",
" return False\n",
"\n",
" # TODO: expanded dictionary for relevancy\n",
" # http, ip, login, auth, SSL, TLS, certificate \n",
" \n",
" for word in text.split():\n",
" if \"://\" not in word.lower() and \"http\" in word.lower():\n",
" return True\n",
@ -41,7 +43,7 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 11,
"id": "d449164e-1d28-4580-9eb1-f0f69978f114",
"metadata": {},
"outputs": [
@ -49,7 +51,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_11370/1288881096.py:35: SettingWithCopyWarning: \n",
"/tmp/ipykernel_22429/86623999.py:36: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
@ -76,13 +78,17 @@
"\n",
"# after 12-1-2012 before 12-1-2013\n",
"phab_df['timestamp'] = pd.to_datetime(phab_df['date_created'], unit='s', origin='unix', utc=True)\n",
"filtered_phab_df = phab_df[(phab_df['date_created'] < 1385856000) & (phab_df['date_created'] > 1354320000)]\n",
"#filtered_phab_df = phab_df[(phab_df['date_created'] < 1385856000) & (phab_df['date_created'] > 1354320000)]\n",
"filtered_phab_df = phab_df[(phab_df['date_created'] < 1381691276) & (phab_df['date_created'] > 1379099276)]\n",
"\n",
"#removing headless conversations\n",
"task_phab_df = filtered_phab_df[filtered_phab_df['comment_type']==\"task_description\"]\n",
"headed_task_phids = task_phab_df['conversation_id'].unique()\n",
"filtered_phab_df = filtered_phab_df[filtered_phab_df['conversation_id'].isin(headed_task_phids)]\n",
"\n",
"#TODO: filter out the sourceforge migration \n",
"# Originally from: http://sourceforge.net in the task task_summary\n",
"\n",
"#removing gerrit comments \n",
"mid_comment_phab_df = filtered_phab_df[filtered_phab_df['meta.gerrit'] != True]\n",
"\n",
@ -95,13 +101,13 @@
"task_phab_df['is_relevant'] = task_phab_df['conversation_id'].isin(relevant_conversation_ids)\n",
"mid_comment_phab_df['is_relevant'] = mid_comment_phab_df['conversation_id'].isin(relevant_conversation_ids)\n",
"\n",
"#comment_phab_df = mid_comment_phab_df[mid_comment_phab_df['is_relevant'] == True]\n",
"comment_phab_df = mid_comment_phab_df"
"comment_phab_df = mid_comment_phab_df[mid_comment_phab_df['is_relevant'] == True]\n",
"#comment_phab_df = mid_comment_phab_df"
]
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 12,
"id": "942344db-c8f5-4ed6-a757-c97f8454f18b",
"metadata": {},
"outputs": [
@ -109,9 +115,9 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Unique conversation_ids: 6139\n",
"Unique ids: 26300\n",
"Unique speakers: 506\n"
"Unique conversation_ids: 96\n",
"Unique ids: 361\n",
"Unique speakers: 47\n"
]
}
],
@ -127,7 +133,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 13,
"id": "d226d781-b002-4842-a3ae-92d4851a5878",
"metadata": {},
"outputs": [],
@ -144,7 +150,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 14,
"id": "3ae40d24-bbe8-49c3-a3a9-70bde1b4d559",
"metadata": {},
"outputs": [
@ -152,7 +158,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_11370/2783900859.py:1: SettingWithCopyWarning: \n",
"/tmp/ipykernel_22429/2783900859.py:1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
@ -177,7 +183,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 15,
"id": "a8469b16-4ae6-4b06-bf1b-1f2f6c736cab",
"metadata": {},
"outputs": [],
@ -206,7 +212,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 16,
"id": "8b9a12f9-71bf-4bc9-bcfd-c73aab4be920",
"metadata": {},
"outputs": [
@ -214,7 +220,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_11370/2805711855.py:1: SettingWithCopyWarning: \n",
"/tmp/ipykernel_22429/2805711855.py:1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
@ -237,6 +243,423 @@
"#comment_phab_df['resolved_dependency_tree'] = comment_phab_df['processed_resolved_text'].apply(extract_dependency_tree)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "370a2767-04f8-4d0b-9b94-9c6a0b408822",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2612 Recently (starting maybe 2 days ago), some goo...\n",
"2989 Although the \"Always use a secure connection w...\n",
"3080 Originally from: http://sourceforge.net/p/pywi...\n",
"3084 Originally from: http://sourceforge.net/p/pywi...\n",
"3096 Originally from: http://sourceforge.net/p/pywi...\n",
" ... \n",
"44209 Originally from: http://sourceforge.net/p/pywi...\n",
"44217 Originally from: http://sourceforge.net/p/pywi...\n",
"44265 Originally from: http://sourceforge.net/p/pywi...\n",
"44277 Originally from: http://sourceforge.net/p/pywi...\n",
"44316 Originally from: http://sourceforge.net/p/pywi...\n",
"Name: comment_text, Length: 96, dtype: object"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"comment_phab_df[comment_phab_df['comment_type'] == 'task_description']['comment_text']"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "5f138688-3d1a-4a27-b16d-d8aa438dafea",
"metadata": {},
"outputs": [
{
"ename": "KeyError",
"evalue": "44",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/indexes/base.py:3805\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3804\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3805\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3806\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n",
"File \u001b[0;32mindex.pyx:167\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mindex.pyx:196\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:2606\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:2630\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 44",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[32], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcomment_phab_df\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mcomment_text\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m44\u001b[39;49m\u001b[43m]\u001b[49m\n",
"File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/series.py:1121\u001b[0m, in \u001b[0;36mSeries.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_values[key]\n\u001b[1;32m 1120\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m key_is_scalar:\n\u001b[0;32m-> 1121\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_value\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1123\u001b[0m \u001b[38;5;66;03m# Convert generator to list before going through hashable part\u001b[39;00m\n\u001b[1;32m 1124\u001b[0m \u001b[38;5;66;03m# (We will iterate through the generator there to check for slices)\u001b[39;00m\n\u001b[1;32m 1125\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_iterator(key):\n",
"File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/series.py:1237\u001b[0m, in \u001b[0;36mSeries._get_value\u001b[0;34m(self, label, takeable)\u001b[0m\n\u001b[1;32m 1234\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_values[label]\n\u001b[1;32m 1236\u001b[0m \u001b[38;5;66;03m# Similar to Index.get_value, but we do not fall back to positional\u001b[39;00m\n\u001b[0;32m-> 1237\u001b[0m loc \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlabel\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1239\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(loc):\n\u001b[1;32m 1240\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_values[loc]\n",
"File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/indexes/base.py:3812\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3807\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(casted_key, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[1;32m 3808\u001b[0m \u001b[38;5;28misinstance\u001b[39m(casted_key, abc\u001b[38;5;241m.\u001b[39mIterable)\n\u001b[1;32m 3809\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28many\u001b[39m(\u001b[38;5;28misinstance\u001b[39m(x, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m casted_key)\n\u001b[1;32m 3810\u001b[0m ):\n\u001b[1;32m 3811\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InvalidIndexError(key)\n\u001b[0;32m-> 3812\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m 3813\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 3814\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m 3815\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m 3816\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n\u001b[1;32m 3817\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n",
"\u001b[0;31mKeyError\u001b[0m: 44"
]
}
],
"source": [
"comment_phab_df['comment_text'][44]"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "f61845ce-d91f-4b06-9039-b507905cb972",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>task_title</th>\n",
" <th>comment_text</th>\n",
" <th>date_created</th>\n",
" <th>speaker</th>\n",
" <th>meta.affil</th>\n",
" <th>conversation_id</th>\n",
" <th>comment_type</th>\n",
" <th>status</th>\n",
" <th>meta.gerrit</th>\n",
" <th>id</th>\n",
" <th>reply_to</th>\n",
" <th>timestamp</th>\n",
" <th>is_relevant</th>\n",
" <th>processed_text</th>\n",
" <th>dependency_tree</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>?embedplayer=yes broken for videos with width ...</td>\n",
" <td>Ni!\\n\\nI am experiencing an unresponsive black...</td>\n",
" <td>1383189120</td>\n",
" <td>PHID-USER-wr7prgh3p37xrvbdr6w5</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-lfhsyqihbylzxoeftr7m</td>\n",
" <td>task_description</td>\n",
" <td>resolved</td>\n",
" <td>False</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>2013-10-31 03:12:00+00:00</td>\n",
" <td>False</td>\n",
" <td>Ni!\\n\\nI am experiencing an unresponsive black...</td>\n",
" <td>[(Ni, Ni, nsubj, experiencing, [experiencing],...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>?embedplayer=yes broken for videos with width ...</td>\n",
" <td>**mdale** wrote:\\n\\n@Ryan, I just mean you wil...</td>\n",
" <td>1383856310</td>\n",
" <td>PHID-USER-ynivjflmc2dcl6w5ut5v</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-lfhsyqihbylzxoeftr7m</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>2</td>\n",
" <td>1.0</td>\n",
" <td>2013-11-07 20:31:50+00:00</td>\n",
" <td>False</td>\n",
" <td>mdale wrote:\\n\\n@Ryan, I just mean you wil...</td>\n",
" <td>[( , , dep, mdale, [mdale, wrote], [ ], []...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>?embedplayer=yes broken for videos with width ...</td>\n",
" <td>Ni!\\n\\n=) Thanks everyone for helping verify a...</td>\n",
" <td>1383796532</td>\n",
" <td>PHID-USER-wr7prgh3p37xrvbdr6w5</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-lfhsyqihbylzxoeftr7m</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>3</td>\n",
" <td>2.0</td>\n",
" <td>2013-11-07 03:55:32+00:00</td>\n",
" <td>False</td>\n",
" <td>Ni!\\n\\n=) Thanks everyone for helping verify a...</td>\n",
" <td>[(Ni, Ni, ROOT, Ni, [], [Ni, !, \\n\\n], [!]), (...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>?embedplayer=yes broken for videos with width ...</td>\n",
" <td>&gt; So putting it back to 200px specifically for...</td>\n",
" <td>1383776933</td>\n",
" <td>PHID-USER-a5pveeqqwaddgfjiv2fq</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-lfhsyqihbylzxoeftr7m</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>4</td>\n",
" <td>3.0</td>\n",
" <td>2013-11-06 22:28:53+00:00</td>\n",
" <td>False</td>\n",
" <td>&gt; So putting it back to 200px specifically for...</td>\n",
" <td>[(&gt;, &gt;, dep, seem, [seem], [&gt;], []), (So, so, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>?embedplayer=yes broken for videos with width ...</td>\n",
" <td>Many thanks to Brian and Mark for their fine w...</td>\n",
" <td>1383775629</td>\n",
" <td>PHID-USER-dbudsaorcqut7sg3vvbi</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-lfhsyqihbylzxoeftr7m</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>5</td>\n",
" <td>4.0</td>\n",
" <td>2013-11-06 22:07:09+00:00</td>\n",
" <td>False</td>\n",
" <td>Many thanks to Brian and Mark for their fine w...</td>\n",
" <td>[(Many, many, amod, thanks, [thanks], [Many], ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46297</th>\n",
" <td>Add Taiwan in Chinese to the monuments database</td>\n",
" <td>**romaine.wiki** wrote:\\n\\nhttps://commons.wik...</td>\n",
" <td>1377925197</td>\n",
" <td>PHID-USER-ynivjflmc2dcl6w5ut5v</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-ze253b4m6dtco37373fc</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>46298</td>\n",
" <td>46297.0</td>\n",
" <td>2013-08-31 04:59:57+00:00</td>\n",
" <td>False</td>\n",
" <td>romaine.wiki wrote:\\n\\n</td>\n",
" <td>[( , , dep, romaine.wiki, [romaine.wiki, wr...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46298</th>\n",
" <td>Add Taiwan in Chinese to the monuments database</td>\n",
" <td>We're playing with the templates on https://zh...</td>\n",
" <td>1377632023</td>\n",
" <td>PHID-USER-bdyms27sdtgdvjm7zfz4</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-ze253b4m6dtco37373fc</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>46299</td>\n",
" <td>46298.0</td>\n",
" <td>2013-08-27 19:33:43+00:00</td>\n",
" <td>False</td>\n",
" <td>We're playing with the templates on Dennis ...</td>\n",
" <td>[(We, we, nsubj, playing, [playing, seems], [W...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46299</th>\n",
" <td>Add Taiwan in Chinese to the monuments database</td>\n",
" <td>The links are all listed on https://commons.wi...</td>\n",
" <td>1377427853</td>\n",
" <td>PHID-USER-bdyms27sdtgdvjm7zfz4</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-ze253b4m6dtco37373fc</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>46300</td>\n",
" <td>46299.0</td>\n",
" <td>2013-08-25 10:50:53+00:00</td>\n",
" <td>False</td>\n",
" <td>The links are all listed on . The Unique Iden...</td>\n",
" <td>[(The, the, det, links, [links, listed], [The]...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46300</th>\n",
" <td>Add Taiwan in Chinese to the monuments database</td>\n",
" <td>Looks like some lists are available, but not i...</td>\n",
" <td>1376771718</td>\n",
" <td>PHID-USER-cw4amt4ewxdze5qcjdca</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-ze253b4m6dtco37373fc</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>46301</td>\n",
" <td>46300.0</td>\n",
" <td>2013-08-17 20:35:18+00:00</td>\n",
" <td>False</td>\n",
" <td>Looks like some lists are available, but not i...</td>\n",
" <td>[(Looks, look, ROOT, Looks, [], [Looks, like, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46301</th>\n",
" <td>Add Taiwan in Chinese to the monuments database</td>\n",
" <td>We already have a lot of sources in the monume...</td>\n",
" <td>1376423842</td>\n",
" <td>PHID-USER-cw4amt4ewxdze5qcjdca</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-ze253b4m6dtco37373fc</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>46302</td>\n",
" <td>46301.0</td>\n",
" <td>2013-08-13 19:57:22+00:00</td>\n",
" <td>False</td>\n",
" <td>We already have a lot of sources in the monume...</td>\n",
" <td>[(We, we, nsubj, have, [have], [We], []), (alr...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>26300 rows × 15 columns</p>\n",
"</div>"
],
"text/plain": [
" task_title \\\n",
"0 ?embedplayer=yes broken for videos with width ... \n",
"1 ?embedplayer=yes broken for videos with width ... \n",
"2 ?embedplayer=yes broken for videos with width ... \n",
"3 ?embedplayer=yes broken for videos with width ... \n",
"4 ?embedplayer=yes broken for videos with width ... \n",
"... ... \n",
"46297 Add Taiwan in Chinese to the monuments database \n",
"46298 Add Taiwan in Chinese to the monuments database \n",
"46299 Add Taiwan in Chinese to the monuments database \n",
"46300 Add Taiwan in Chinese to the monuments database \n",
"46301 Add Taiwan in Chinese to the monuments database \n",
"\n",
" comment_text date_created \\\n",
"0 Ni!\\n\\nI am experiencing an unresponsive black... 1383189120 \n",
"1 **mdale** wrote:\\n\\n@Ryan, I just mean you wil... 1383856310 \n",
"2 Ni!\\n\\n=) Thanks everyone for helping verify a... 1383796532 \n",
"3 > So putting it back to 200px specifically for... 1383776933 \n",
"4 Many thanks to Brian and Mark for their fine w... 1383775629 \n",
"... ... ... \n",
"46297 **romaine.wiki** wrote:\\n\\nhttps://commons.wik... 1377925197 \n",
"46298 We're playing with the templates on https://zh... 1377632023 \n",
"46299 The links are all listed on https://commons.wi... 1377427853 \n",
"46300 Looks like some lists are available, but not i... 1376771718 \n",
"46301 We already have a lot of sources in the monume... 1376423842 \n",
"\n",
" speaker meta.affil \\\n",
"0 PHID-USER-wr7prgh3p37xrvbdr6w5 False \n",
"1 PHID-USER-ynivjflmc2dcl6w5ut5v False \n",
"2 PHID-USER-wr7prgh3p37xrvbdr6w5 False \n",
"3 PHID-USER-a5pveeqqwaddgfjiv2fq False \n",
"4 PHID-USER-dbudsaorcqut7sg3vvbi False \n",
"... ... ... \n",
"46297 PHID-USER-ynivjflmc2dcl6w5ut5v False \n",
"46298 PHID-USER-bdyms27sdtgdvjm7zfz4 False \n",
"46299 PHID-USER-bdyms27sdtgdvjm7zfz4 False \n",
"46300 PHID-USER-cw4amt4ewxdze5qcjdca False \n",
"46301 PHID-USER-cw4amt4ewxdze5qcjdca False \n",
"\n",
" conversation_id comment_type status \\\n",
"0 PHID-TASK-lfhsyqihbylzxoeftr7m task_description resolved \n",
"1 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n",
"2 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n",
"3 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n",
"4 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n",
"... ... ... ... \n",
"46297 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n",
"46298 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n",
"46299 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n",
"46300 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n",
"46301 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n",
"\n",
" meta.gerrit id reply_to timestamp is_relevant \\\n",
"0 False 1 NaN 2013-10-31 03:12:00+00:00 False \n",
"1 False 2 1.0 2013-11-07 20:31:50+00:00 False \n",
"2 False 3 2.0 2013-11-07 03:55:32+00:00 False \n",
"3 False 4 3.0 2013-11-06 22:28:53+00:00 False \n",
"4 False 5 4.0 2013-11-06 22:07:09+00:00 False \n",
"... ... ... ... ... ... \n",
"46297 False 46298 46297.0 2013-08-31 04:59:57+00:00 False \n",
"46298 False 46299 46298.0 2013-08-27 19:33:43+00:00 False \n",
"46299 False 46300 46299.0 2013-08-25 10:50:53+00:00 False \n",
"46300 False 46301 46300.0 2013-08-17 20:35:18+00:00 False \n",
"46301 False 46302 46301.0 2013-08-13 19:57:22+00:00 False \n",
"\n",
" processed_text \\\n",
"0 Ni!\\n\\nI am experiencing an unresponsive black... \n",
"1 mdale wrote:\\n\\n@Ryan, I just mean you wil... \n",
"2 Ni!\\n\\n=) Thanks everyone for helping verify a... \n",
"3 > So putting it back to 200px specifically for... \n",
"4 Many thanks to Brian and Mark for their fine w... \n",
"... ... \n",
"46297 romaine.wiki wrote:\\n\\n \n",
"46298 We're playing with the templates on Dennis ... \n",
"46299 The links are all listed on . The Unique Iden... \n",
"46300 Looks like some lists are available, but not i... \n",
"46301 We already have a lot of sources in the monume... \n",
"\n",
" dependency_tree \n",
"0 [(Ni, Ni, nsubj, experiencing, [experiencing],... \n",
"1 [( , , dep, mdale, [mdale, wrote], [ ], []... \n",
"2 [(Ni, Ni, ROOT, Ni, [], [Ni, !, \\n\\n], [!]), (... \n",
"3 [(>, >, dep, seem, [seem], [>], []), (So, so, ... \n",
"4 [(Many, many, amod, thanks, [thanks], [Many], ... \n",
"... ... \n",
"46297 [( , , dep, romaine.wiki, [romaine.wiki, wr... \n",
"46298 [(We, we, nsubj, playing, [playing, seems], [W... \n",
"46299 [(The, the, det, links, [links, listed], [The]... \n",
"46300 [(Looks, look, ROOT, Looks, [], [Looks, like, ... \n",
"46301 [(We, we, nsubj, have, [have], [We], []), (alr... \n",
"\n",
"[26300 rows x 15 columns]"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"comment_phab_df"
]
},
{
"cell_type": "code",
"execution_count": 27,
@ -908,7 +1331,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.21"
"version": "3.9.18"
}
},
"nbformat": 4,