updating analysis of *relevant* phab tickets for https case
This commit is contained in:
parent
1fba61b75b
commit
5fe41d576d
@ -13,7 +13,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"execution_count": 9,
|
||||
"id": "e4f0b3f0-5255-46f1-822f-e455087ba315",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -24,7 +24,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"execution_count": 10,
|
||||
"id": "ac5e624b-08a4-4ede-bc96-cfc26c3edac3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -32,7 +32,9 @@
|
||||
"def http_relevant(text):\n",
|
||||
" if pd.isnull(text):\n",
|
||||
" return False\n",
|
||||
"\n",
|
||||
" # TODO: expanded dictionary for relevancy\n",
|
||||
" # http, ip, login, auth, SSL, TLS, certificate \n",
|
||||
" \n",
|
||||
" for word in text.split():\n",
|
||||
" if \"://\" not in word.lower() and \"http\" in word.lower():\n",
|
||||
" return True\n",
|
||||
@ -41,7 +43,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"execution_count": 11,
|
||||
"id": "d449164e-1d28-4580-9eb1-f0f69978f114",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -49,7 +51,7 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/tmp/ipykernel_11370/1288881096.py:35: SettingWithCopyWarning: \n",
|
||||
"/tmp/ipykernel_22429/86623999.py:36: SettingWithCopyWarning: \n",
|
||||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||||
"\n",
|
||||
@ -76,13 +78,17 @@
|
||||
"\n",
|
||||
"# after 12-1-2012 before 12-1-2013\n",
|
||||
"phab_df['timestamp'] = pd.to_datetime(phab_df['date_created'], unit='s', origin='unix', utc=True)\n",
|
||||
"filtered_phab_df = phab_df[(phab_df['date_created'] < 1385856000) & (phab_df['date_created'] > 1354320000)]\n",
|
||||
"#filtered_phab_df = phab_df[(phab_df['date_created'] < 1385856000) & (phab_df['date_created'] > 1354320000)]\n",
|
||||
"filtered_phab_df = phab_df[(phab_df['date_created'] < 1381691276) & (phab_df['date_created'] > 1379099276)]\n",
|
||||
"\n",
|
||||
"#removing headless conversations\n",
|
||||
"task_phab_df = filtered_phab_df[filtered_phab_df['comment_type']==\"task_description\"]\n",
|
||||
"headed_task_phids = task_phab_df['conversation_id'].unique()\n",
|
||||
"filtered_phab_df = filtered_phab_df[filtered_phab_df['conversation_id'].isin(headed_task_phids)]\n",
|
||||
"\n",
|
||||
"#TODO: filter out the sourceforge migration \n",
|
||||
"# Originally from: http://sourceforge.net in the task task_summary\n",
|
||||
"\n",
|
||||
"#removing gerrit comments \n",
|
||||
"mid_comment_phab_df = filtered_phab_df[filtered_phab_df['meta.gerrit'] != True]\n",
|
||||
"\n",
|
||||
@ -95,13 +101,13 @@
|
||||
"task_phab_df['is_relevant'] = task_phab_df['conversation_id'].isin(relevant_conversation_ids)\n",
|
||||
"mid_comment_phab_df['is_relevant'] = mid_comment_phab_df['conversation_id'].isin(relevant_conversation_ids)\n",
|
||||
"\n",
|
||||
"#comment_phab_df = mid_comment_phab_df[mid_comment_phab_df['is_relevant'] == True]\n",
|
||||
"comment_phab_df = mid_comment_phab_df"
|
||||
"comment_phab_df = mid_comment_phab_df[mid_comment_phab_df['is_relevant'] == True]\n",
|
||||
"#comment_phab_df = mid_comment_phab_df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"execution_count": 12,
|
||||
"id": "942344db-c8f5-4ed6-a757-c97f8454f18b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -109,9 +115,9 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Unique conversation_ids: 6139\n",
|
||||
"Unique ids: 26300\n",
|
||||
"Unique speakers: 506\n"
|
||||
"Unique conversation_ids: 96\n",
|
||||
"Unique ids: 361\n",
|
||||
"Unique speakers: 47\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -127,7 +133,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"execution_count": 13,
|
||||
"id": "d226d781-b002-4842-a3ae-92d4851a5878",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -144,7 +150,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 14,
|
||||
"id": "3ae40d24-bbe8-49c3-a3a9-70bde1b4d559",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -152,7 +158,7 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/tmp/ipykernel_11370/2783900859.py:1: SettingWithCopyWarning: \n",
|
||||
"/tmp/ipykernel_22429/2783900859.py:1: SettingWithCopyWarning: \n",
|
||||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||||
"\n",
|
||||
@ -177,7 +183,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": 15,
|
||||
"id": "a8469b16-4ae6-4b06-bf1b-1f2f6c736cab",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -206,7 +212,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 16,
|
||||
"id": "8b9a12f9-71bf-4bc9-bcfd-c73aab4be920",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -214,7 +220,7 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/tmp/ipykernel_11370/2805711855.py:1: SettingWithCopyWarning: \n",
|
||||
"/tmp/ipykernel_22429/2805711855.py:1: SettingWithCopyWarning: \n",
|
||||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||||
"\n",
|
||||
@ -237,6 +243,423 @@
|
||||
"#comment_phab_df['resolved_dependency_tree'] = comment_phab_df['processed_resolved_text'].apply(extract_dependency_tree)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "370a2767-04f8-4d0b-9b94-9c6a0b408822",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"2612 Recently (starting maybe 2 days ago), some goo...\n",
|
||||
"2989 Although the \"Always use a secure connection w...\n",
|
||||
"3080 Originally from: http://sourceforge.net/p/pywi...\n",
|
||||
"3084 Originally from: http://sourceforge.net/p/pywi...\n",
|
||||
"3096 Originally from: http://sourceforge.net/p/pywi...\n",
|
||||
" ... \n",
|
||||
"44209 Originally from: http://sourceforge.net/p/pywi...\n",
|
||||
"44217 Originally from: http://sourceforge.net/p/pywi...\n",
|
||||
"44265 Originally from: http://sourceforge.net/p/pywi...\n",
|
||||
"44277 Originally from: http://sourceforge.net/p/pywi...\n",
|
||||
"44316 Originally from: http://sourceforge.net/p/pywi...\n",
|
||||
"Name: comment_text, Length: 96, dtype: object"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"comment_phab_df[comment_phab_df['comment_type'] == 'task_description']['comment_text']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "5f138688-3d1a-4a27-b16d-d8aa438dafea",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "KeyError",
|
||||
"evalue": "44",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
|
||||
"File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/indexes/base.py:3805\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3804\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3805\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3806\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n",
|
||||
"File \u001b[0;32mindex.pyx:167\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
|
||||
"File \u001b[0;32mindex.pyx:196\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
|
||||
"File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:2606\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n",
|
||||
"File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:2630\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n",
|
||||
"\u001b[0;31mKeyError\u001b[0m: 44",
|
||||
"\nThe above exception was the direct cause of the following exception:\n",
|
||||
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[32], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcomment_phab_df\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mcomment_text\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m44\u001b[39;49m\u001b[43m]\u001b[49m\n",
|
||||
"File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/series.py:1121\u001b[0m, in \u001b[0;36mSeries.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_values[key]\n\u001b[1;32m 1120\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m key_is_scalar:\n\u001b[0;32m-> 1121\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_value\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1123\u001b[0m \u001b[38;5;66;03m# Convert generator to list before going through hashable part\u001b[39;00m\n\u001b[1;32m 1124\u001b[0m \u001b[38;5;66;03m# (We will iterate through the generator there to check for slices)\u001b[39;00m\n\u001b[1;32m 1125\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_iterator(key):\n",
|
||||
"File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/series.py:1237\u001b[0m, in \u001b[0;36mSeries._get_value\u001b[0;34m(self, label, takeable)\u001b[0m\n\u001b[1;32m 1234\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_values[label]\n\u001b[1;32m 1236\u001b[0m \u001b[38;5;66;03m# Similar to Index.get_value, but we do not fall back to positional\u001b[39;00m\n\u001b[0;32m-> 1237\u001b[0m loc \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlabel\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1239\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(loc):\n\u001b[1;32m 1240\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_values[loc]\n",
|
||||
"File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/indexes/base.py:3812\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3807\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(casted_key, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[1;32m 3808\u001b[0m \u001b[38;5;28misinstance\u001b[39m(casted_key, abc\u001b[38;5;241m.\u001b[39mIterable)\n\u001b[1;32m 3809\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28many\u001b[39m(\u001b[38;5;28misinstance\u001b[39m(x, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m casted_key)\n\u001b[1;32m 3810\u001b[0m ):\n\u001b[1;32m 3811\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InvalidIndexError(key)\n\u001b[0;32m-> 3812\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m 3813\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 3814\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m 3815\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m 3816\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n\u001b[1;32m 3817\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n",
|
||||
"\u001b[0;31mKeyError\u001b[0m: 44"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"comment_phab_df['comment_text'][44]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"id": "f61845ce-d91f-4b06-9039-b507905cb972",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>task_title</th>\n",
|
||||
" <th>comment_text</th>\n",
|
||||
" <th>date_created</th>\n",
|
||||
" <th>speaker</th>\n",
|
||||
" <th>meta.affil</th>\n",
|
||||
" <th>conversation_id</th>\n",
|
||||
" <th>comment_type</th>\n",
|
||||
" <th>status</th>\n",
|
||||
" <th>meta.gerrit</th>\n",
|
||||
" <th>id</th>\n",
|
||||
" <th>reply_to</th>\n",
|
||||
" <th>timestamp</th>\n",
|
||||
" <th>is_relevant</th>\n",
|
||||
" <th>processed_text</th>\n",
|
||||
" <th>dependency_tree</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>?embedplayer=yes broken for videos with width ...</td>\n",
|
||||
" <td>Ni!\\n\\nI am experiencing an unresponsive black...</td>\n",
|
||||
" <td>1383189120</td>\n",
|
||||
" <td>PHID-USER-wr7prgh3p37xrvbdr6w5</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>PHID-TASK-lfhsyqihbylzxoeftr7m</td>\n",
|
||||
" <td>task_description</td>\n",
|
||||
" <td>resolved</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>2013-10-31 03:12:00+00:00</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Ni!\\n\\nI am experiencing an unresponsive black...</td>\n",
|
||||
" <td>[(Ni, Ni, nsubj, experiencing, [experiencing],...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>?embedplayer=yes broken for videos with width ...</td>\n",
|
||||
" <td>**mdale** wrote:\\n\\n@Ryan, I just mean you wil...</td>\n",
|
||||
" <td>1383856310</td>\n",
|
||||
" <td>PHID-USER-ynivjflmc2dcl6w5ut5v</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>PHID-TASK-lfhsyqihbylzxoeftr7m</td>\n",
|
||||
" <td>task_subcomment</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>2013-11-07 20:31:50+00:00</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>mdale wrote:\\n\\n@Ryan, I just mean you wil...</td>\n",
|
||||
" <td>[( , , dep, mdale, [mdale, wrote], [ ], []...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>?embedplayer=yes broken for videos with width ...</td>\n",
|
||||
" <td>Ni!\\n\\n=) Thanks everyone for helping verify a...</td>\n",
|
||||
" <td>1383796532</td>\n",
|
||||
" <td>PHID-USER-wr7prgh3p37xrvbdr6w5</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>PHID-TASK-lfhsyqihbylzxoeftr7m</td>\n",
|
||||
" <td>task_subcomment</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>2013-11-07 03:55:32+00:00</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Ni!\\n\\n=) Thanks everyone for helping verify a...</td>\n",
|
||||
" <td>[(Ni, Ni, ROOT, Ni, [], [Ni, !, \\n\\n], [!]), (...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>?embedplayer=yes broken for videos with width ...</td>\n",
|
||||
" <td>> So putting it back to 200px specifically for...</td>\n",
|
||||
" <td>1383776933</td>\n",
|
||||
" <td>PHID-USER-a5pveeqqwaddgfjiv2fq</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>PHID-TASK-lfhsyqihbylzxoeftr7m</td>\n",
|
||||
" <td>task_subcomment</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>2013-11-06 22:28:53+00:00</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>> So putting it back to 200px specifically for...</td>\n",
|
||||
" <td>[(>, >, dep, seem, [seem], [>], []), (So, so, ...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>?embedplayer=yes broken for videos with width ...</td>\n",
|
||||
" <td>Many thanks to Brian and Mark for their fine w...</td>\n",
|
||||
" <td>1383775629</td>\n",
|
||||
" <td>PHID-USER-dbudsaorcqut7sg3vvbi</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>PHID-TASK-lfhsyqihbylzxoeftr7m</td>\n",
|
||||
" <td>task_subcomment</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>4.0</td>\n",
|
||||
" <td>2013-11-06 22:07:09+00:00</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Many thanks to Brian and Mark for their fine w...</td>\n",
|
||||
" <td>[(Many, many, amod, thanks, [thanks], [Many], ...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>46297</th>\n",
|
||||
" <td>Add Taiwan in Chinese to the monuments database</td>\n",
|
||||
" <td>**romaine.wiki** wrote:\\n\\nhttps://commons.wik...</td>\n",
|
||||
" <td>1377925197</td>\n",
|
||||
" <td>PHID-USER-ynivjflmc2dcl6w5ut5v</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>PHID-TASK-ze253b4m6dtco37373fc</td>\n",
|
||||
" <td>task_subcomment</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>46298</td>\n",
|
||||
" <td>46297.0</td>\n",
|
||||
" <td>2013-08-31 04:59:57+00:00</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>romaine.wiki wrote:\\n\\n</td>\n",
|
||||
" <td>[( , , dep, romaine.wiki, [romaine.wiki, wr...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>46298</th>\n",
|
||||
" <td>Add Taiwan in Chinese to the monuments database</td>\n",
|
||||
" <td>We're playing with the templates on https://zh...</td>\n",
|
||||
" <td>1377632023</td>\n",
|
||||
" <td>PHID-USER-bdyms27sdtgdvjm7zfz4</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>PHID-TASK-ze253b4m6dtco37373fc</td>\n",
|
||||
" <td>task_subcomment</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>46299</td>\n",
|
||||
" <td>46298.0</td>\n",
|
||||
" <td>2013-08-27 19:33:43+00:00</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>We're playing with the templates on Dennis ...</td>\n",
|
||||
" <td>[(We, we, nsubj, playing, [playing, seems], [W...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>46299</th>\n",
|
||||
" <td>Add Taiwan in Chinese to the monuments database</td>\n",
|
||||
" <td>The links are all listed on https://commons.wi...</td>\n",
|
||||
" <td>1377427853</td>\n",
|
||||
" <td>PHID-USER-bdyms27sdtgdvjm7zfz4</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>PHID-TASK-ze253b4m6dtco37373fc</td>\n",
|
||||
" <td>task_subcomment</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>46300</td>\n",
|
||||
" <td>46299.0</td>\n",
|
||||
" <td>2013-08-25 10:50:53+00:00</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>The links are all listed on . The Unique Iden...</td>\n",
|
||||
" <td>[(The, the, det, links, [links, listed], [The]...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>46300</th>\n",
|
||||
" <td>Add Taiwan in Chinese to the monuments database</td>\n",
|
||||
" <td>Looks like some lists are available, but not i...</td>\n",
|
||||
" <td>1376771718</td>\n",
|
||||
" <td>PHID-USER-cw4amt4ewxdze5qcjdca</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>PHID-TASK-ze253b4m6dtco37373fc</td>\n",
|
||||
" <td>task_subcomment</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>46301</td>\n",
|
||||
" <td>46300.0</td>\n",
|
||||
" <td>2013-08-17 20:35:18+00:00</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Looks like some lists are available, but not i...</td>\n",
|
||||
" <td>[(Looks, look, ROOT, Looks, [], [Looks, like, ...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>46301</th>\n",
|
||||
" <td>Add Taiwan in Chinese to the monuments database</td>\n",
|
||||
" <td>We already have a lot of sources in the monume...</td>\n",
|
||||
" <td>1376423842</td>\n",
|
||||
" <td>PHID-USER-cw4amt4ewxdze5qcjdca</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>PHID-TASK-ze253b4m6dtco37373fc</td>\n",
|
||||
" <td>task_subcomment</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>46302</td>\n",
|
||||
" <td>46301.0</td>\n",
|
||||
" <td>2013-08-13 19:57:22+00:00</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>We already have a lot of sources in the monume...</td>\n",
|
||||
" <td>[(We, we, nsubj, have, [have], [We], []), (alr...</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>26300 rows × 15 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" task_title \\\n",
|
||||
"0 ?embedplayer=yes broken for videos with width ... \n",
|
||||
"1 ?embedplayer=yes broken for videos with width ... \n",
|
||||
"2 ?embedplayer=yes broken for videos with width ... \n",
|
||||
"3 ?embedplayer=yes broken for videos with width ... \n",
|
||||
"4 ?embedplayer=yes broken for videos with width ... \n",
|
||||
"... ... \n",
|
||||
"46297 Add Taiwan in Chinese to the monuments database \n",
|
||||
"46298 Add Taiwan in Chinese to the monuments database \n",
|
||||
"46299 Add Taiwan in Chinese to the monuments database \n",
|
||||
"46300 Add Taiwan in Chinese to the monuments database \n",
|
||||
"46301 Add Taiwan in Chinese to the monuments database \n",
|
||||
"\n",
|
||||
" comment_text date_created \\\n",
|
||||
"0 Ni!\\n\\nI am experiencing an unresponsive black... 1383189120 \n",
|
||||
"1 **mdale** wrote:\\n\\n@Ryan, I just mean you wil... 1383856310 \n",
|
||||
"2 Ni!\\n\\n=) Thanks everyone for helping verify a... 1383796532 \n",
|
||||
"3 > So putting it back to 200px specifically for... 1383776933 \n",
|
||||
"4 Many thanks to Brian and Mark for their fine w... 1383775629 \n",
|
||||
"... ... ... \n",
|
||||
"46297 **romaine.wiki** wrote:\\n\\nhttps://commons.wik... 1377925197 \n",
|
||||
"46298 We're playing with the templates on https://zh... 1377632023 \n",
|
||||
"46299 The links are all listed on https://commons.wi... 1377427853 \n",
|
||||
"46300 Looks like some lists are available, but not i... 1376771718 \n",
|
||||
"46301 We already have a lot of sources in the monume... 1376423842 \n",
|
||||
"\n",
|
||||
" speaker meta.affil \\\n",
|
||||
"0 PHID-USER-wr7prgh3p37xrvbdr6w5 False \n",
|
||||
"1 PHID-USER-ynivjflmc2dcl6w5ut5v False \n",
|
||||
"2 PHID-USER-wr7prgh3p37xrvbdr6w5 False \n",
|
||||
"3 PHID-USER-a5pveeqqwaddgfjiv2fq False \n",
|
||||
"4 PHID-USER-dbudsaorcqut7sg3vvbi False \n",
|
||||
"... ... ... \n",
|
||||
"46297 PHID-USER-ynivjflmc2dcl6w5ut5v False \n",
|
||||
"46298 PHID-USER-bdyms27sdtgdvjm7zfz4 False \n",
|
||||
"46299 PHID-USER-bdyms27sdtgdvjm7zfz4 False \n",
|
||||
"46300 PHID-USER-cw4amt4ewxdze5qcjdca False \n",
|
||||
"46301 PHID-USER-cw4amt4ewxdze5qcjdca False \n",
|
||||
"\n",
|
||||
" conversation_id comment_type status \\\n",
|
||||
"0 PHID-TASK-lfhsyqihbylzxoeftr7m task_description resolved \n",
|
||||
"1 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n",
|
||||
"2 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n",
|
||||
"3 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n",
|
||||
"4 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n",
|
||||
"... ... ... ... \n",
|
||||
"46297 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n",
|
||||
"46298 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n",
|
||||
"46299 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n",
|
||||
"46300 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n",
|
||||
"46301 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n",
|
||||
"\n",
|
||||
" meta.gerrit id reply_to timestamp is_relevant \\\n",
|
||||
"0 False 1 NaN 2013-10-31 03:12:00+00:00 False \n",
|
||||
"1 False 2 1.0 2013-11-07 20:31:50+00:00 False \n",
|
||||
"2 False 3 2.0 2013-11-07 03:55:32+00:00 False \n",
|
||||
"3 False 4 3.0 2013-11-06 22:28:53+00:00 False \n",
|
||||
"4 False 5 4.0 2013-11-06 22:07:09+00:00 False \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"46297 False 46298 46297.0 2013-08-31 04:59:57+00:00 False \n",
|
||||
"46298 False 46299 46298.0 2013-08-27 19:33:43+00:00 False \n",
|
||||
"46299 False 46300 46299.0 2013-08-25 10:50:53+00:00 False \n",
|
||||
"46300 False 46301 46300.0 2013-08-17 20:35:18+00:00 False \n",
|
||||
"46301 False 46302 46301.0 2013-08-13 19:57:22+00:00 False \n",
|
||||
"\n",
|
||||
" processed_text \\\n",
|
||||
"0 Ni!\\n\\nI am experiencing an unresponsive black... \n",
|
||||
"1 mdale wrote:\\n\\n@Ryan, I just mean you wil... \n",
|
||||
"2 Ni!\\n\\n=) Thanks everyone for helping verify a... \n",
|
||||
"3 > So putting it back to 200px specifically for... \n",
|
||||
"4 Many thanks to Brian and Mark for their fine w... \n",
|
||||
"... ... \n",
|
||||
"46297 romaine.wiki wrote:\\n\\n \n",
|
||||
"46298 We're playing with the templates on Dennis ... \n",
|
||||
"46299 The links are all listed on . The Unique Iden... \n",
|
||||
"46300 Looks like some lists are available, but not i... \n",
|
||||
"46301 We already have a lot of sources in the monume... \n",
|
||||
"\n",
|
||||
" dependency_tree \n",
|
||||
"0 [(Ni, Ni, nsubj, experiencing, [experiencing],... \n",
|
||||
"1 [( , , dep, mdale, [mdale, wrote], [ ], []... \n",
|
||||
"2 [(Ni, Ni, ROOT, Ni, [], [Ni, !, \\n\\n], [!]), (... \n",
|
||||
"3 [(>, >, dep, seem, [seem], [>], []), (So, so, ... \n",
|
||||
"4 [(Many, many, amod, thanks, [thanks], [Many], ... \n",
|
||||
"... ... \n",
|
||||
"46297 [( , , dep, romaine.wiki, [romaine.wiki, wr... \n",
|
||||
"46298 [(We, we, nsubj, playing, [playing, seems], [W... \n",
|
||||
"46299 [(The, the, det, links, [links, listed], [The]... \n",
|
||||
"46300 [(Looks, look, ROOT, Looks, [], [Looks, like, ... \n",
|
||||
"46301 [(We, we, nsubj, have, [have], [We], []), (alr... \n",
|
||||
"\n",
|
||||
"[26300 rows x 15 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 36,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"comment_phab_df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
@ -908,7 +1331,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.21"
|
||||
"version": "3.9.18"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -13,7 +13,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"execution_count": 9,
|
||||
"id": "e4f0b3f0-5255-46f1-822f-e455087ba315",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -24,7 +24,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"execution_count": 10,
|
||||
"id": "ac5e624b-08a4-4ede-bc96-cfc26c3edac3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -32,7 +32,9 @@
|
||||
"def http_relevant(text):\n",
|
||||
" if pd.isnull(text):\n",
|
||||
" return False\n",
|
||||
"\n",
|
||||
" # TODO: expanded dictionary for relevancy\n",
|
||||
" # http, ip, login, auth, SSL, TLS, certificate \n",
|
||||
" \n",
|
||||
" for word in text.split():\n",
|
||||
" if \"://\" not in word.lower() and \"http\" in word.lower():\n",
|
||||
" return True\n",
|
||||
@ -41,7 +43,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"execution_count": 11,
|
||||
"id": "d449164e-1d28-4580-9eb1-f0f69978f114",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -49,7 +51,7 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/tmp/ipykernel_11370/1288881096.py:35: SettingWithCopyWarning: \n",
|
||||
"/tmp/ipykernel_22429/86623999.py:36: SettingWithCopyWarning: \n",
|
||||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||||
"\n",
|
||||
@ -76,13 +78,17 @@
|
||||
"\n",
|
||||
"# after 12-1-2012 before 12-1-2013\n",
|
||||
"phab_df['timestamp'] = pd.to_datetime(phab_df['date_created'], unit='s', origin='unix', utc=True)\n",
|
||||
"filtered_phab_df = phab_df[(phab_df['date_created'] < 1385856000) & (phab_df['date_created'] > 1354320000)]\n",
|
||||
"#filtered_phab_df = phab_df[(phab_df['date_created'] < 1385856000) & (phab_df['date_created'] > 1354320000)]\n",
|
||||
"filtered_phab_df = phab_df[(phab_df['date_created'] < 1381691276) & (phab_df['date_created'] > 1379099276)]\n",
|
||||
"\n",
|
||||
"#removing headless conversations\n",
|
||||
"task_phab_df = filtered_phab_df[filtered_phab_df['comment_type']==\"task_description\"]\n",
|
||||
"headed_task_phids = task_phab_df['conversation_id'].unique()\n",
|
||||
"filtered_phab_df = filtered_phab_df[filtered_phab_df['conversation_id'].isin(headed_task_phids)]\n",
|
||||
"\n",
|
||||
"#TODO: filter out the sourceforge migration \n",
|
||||
"# Originally from: http://sourceforge.net in the task task_summary\n",
|
||||
"\n",
|
||||
"#removing gerrit comments \n",
|
||||
"mid_comment_phab_df = filtered_phab_df[filtered_phab_df['meta.gerrit'] != True]\n",
|
||||
"\n",
|
||||
@ -95,13 +101,13 @@
|
||||
"task_phab_df['is_relevant'] = task_phab_df['conversation_id'].isin(relevant_conversation_ids)\n",
|
||||
"mid_comment_phab_df['is_relevant'] = mid_comment_phab_df['conversation_id'].isin(relevant_conversation_ids)\n",
|
||||
"\n",
|
||||
"#comment_phab_df = mid_comment_phab_df[mid_comment_phab_df['is_relevant'] == True]\n",
|
||||
"comment_phab_df = mid_comment_phab_df"
|
||||
"comment_phab_df = mid_comment_phab_df[mid_comment_phab_df['is_relevant'] == True]\n",
|
||||
"#comment_phab_df = mid_comment_phab_df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"execution_count": 12,
|
||||
"id": "942344db-c8f5-4ed6-a757-c97f8454f18b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -109,9 +115,9 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Unique conversation_ids: 6139\n",
|
||||
"Unique ids: 26300\n",
|
||||
"Unique speakers: 506\n"
|
||||
"Unique conversation_ids: 96\n",
|
||||
"Unique ids: 361\n",
|
||||
"Unique speakers: 47\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -127,7 +133,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"execution_count": 13,
|
||||
"id": "d226d781-b002-4842-a3ae-92d4851a5878",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -144,7 +150,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 14,
|
||||
"id": "3ae40d24-bbe8-49c3-a3a9-70bde1b4d559",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -152,7 +158,7 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/tmp/ipykernel_11370/2783900859.py:1: SettingWithCopyWarning: \n",
|
||||
"/tmp/ipykernel_22429/2783900859.py:1: SettingWithCopyWarning: \n",
|
||||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||||
"\n",
|
||||
@ -177,7 +183,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": 15,
|
||||
"id": "a8469b16-4ae6-4b06-bf1b-1f2f6c736cab",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -206,7 +212,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 16,
|
||||
"id": "8b9a12f9-71bf-4bc9-bcfd-c73aab4be920",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -214,7 +220,7 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/tmp/ipykernel_11370/2805711855.py:1: SettingWithCopyWarning: \n",
|
||||
"/tmp/ipykernel_22429/2805711855.py:1: SettingWithCopyWarning: \n",
|
||||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||||
"\n",
|
||||
@ -237,6 +243,423 @@
|
||||
"#comment_phab_df['resolved_dependency_tree'] = comment_phab_df['processed_resolved_text'].apply(extract_dependency_tree)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "370a2767-04f8-4d0b-9b94-9c6a0b408822",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"2612 Recently (starting maybe 2 days ago), some goo...\n",
|
||||
"2989 Although the \"Always use a secure connection w...\n",
|
||||
"3080 Originally from: http://sourceforge.net/p/pywi...\n",
|
||||
"3084 Originally from: http://sourceforge.net/p/pywi...\n",
|
||||
"3096 Originally from: http://sourceforge.net/p/pywi...\n",
|
||||
" ... \n",
|
||||
"44209 Originally from: http://sourceforge.net/p/pywi...\n",
|
||||
"44217 Originally from: http://sourceforge.net/p/pywi...\n",
|
||||
"44265 Originally from: http://sourceforge.net/p/pywi...\n",
|
||||
"44277 Originally from: http://sourceforge.net/p/pywi...\n",
|
||||
"44316 Originally from: http://sourceforge.net/p/pywi...\n",
|
||||
"Name: comment_text, Length: 96, dtype: object"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"comment_phab_df[comment_phab_df['comment_type'] == 'task_description']['comment_text']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "5f138688-3d1a-4a27-b16d-d8aa438dafea",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "KeyError",
|
||||
"evalue": "44",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
|
||||
"File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/indexes/base.py:3805\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3804\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3805\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3806\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n",
|
||||
"File \u001b[0;32mindex.pyx:167\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
|
||||
"File \u001b[0;32mindex.pyx:196\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
|
||||
"File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:2606\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n",
|
||||
"File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:2630\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n",
|
||||
"\u001b[0;31mKeyError\u001b[0m: 44",
|
||||
"\nThe above exception was the direct cause of the following exception:\n",
|
||||
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[32], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcomment_phab_df\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mcomment_text\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m44\u001b[39;49m\u001b[43m]\u001b[49m\n",
|
||||
"File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/series.py:1121\u001b[0m, in \u001b[0;36mSeries.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_values[key]\n\u001b[1;32m 1120\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m key_is_scalar:\n\u001b[0;32m-> 1121\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_value\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1123\u001b[0m \u001b[38;5;66;03m# Convert generator to list before going through hashable part\u001b[39;00m\n\u001b[1;32m 1124\u001b[0m \u001b[38;5;66;03m# (We will iterate through the generator there to check for slices)\u001b[39;00m\n\u001b[1;32m 1125\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_iterator(key):\n",
|
||||
"File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/series.py:1237\u001b[0m, in \u001b[0;36mSeries._get_value\u001b[0;34m(self, label, takeable)\u001b[0m\n\u001b[1;32m 1234\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_values[label]\n\u001b[1;32m 1236\u001b[0m \u001b[38;5;66;03m# Similar to Index.get_value, but we do not fall back to positional\u001b[39;00m\n\u001b[0;32m-> 1237\u001b[0m loc \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlabel\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1239\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(loc):\n\u001b[1;32m 1240\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_values[loc]\n",
|
||||
"File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/indexes/base.py:3812\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3807\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(casted_key, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[1;32m 3808\u001b[0m \u001b[38;5;28misinstance\u001b[39m(casted_key, abc\u001b[38;5;241m.\u001b[39mIterable)\n\u001b[1;32m 3809\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28many\u001b[39m(\u001b[38;5;28misinstance\u001b[39m(x, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m casted_key)\n\u001b[1;32m 3810\u001b[0m ):\n\u001b[1;32m 3811\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InvalidIndexError(key)\n\u001b[0;32m-> 3812\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m 3813\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 3814\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m 3815\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m 3816\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n\u001b[1;32m 3817\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n",
|
||||
"\u001b[0;31mKeyError\u001b[0m: 44"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"comment_phab_df['comment_text'][44]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"id": "f61845ce-d91f-4b06-9039-b507905cb972",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>task_title</th>\n",
|
||||
" <th>comment_text</th>\n",
|
||||
" <th>date_created</th>\n",
|
||||
" <th>speaker</th>\n",
|
||||
" <th>meta.affil</th>\n",
|
||||
" <th>conversation_id</th>\n",
|
||||
" <th>comment_type</th>\n",
|
||||
" <th>status</th>\n",
|
||||
" <th>meta.gerrit</th>\n",
|
||||
" <th>id</th>\n",
|
||||
" <th>reply_to</th>\n",
|
||||
" <th>timestamp</th>\n",
|
||||
" <th>is_relevant</th>\n",
|
||||
" <th>processed_text</th>\n",
|
||||
" <th>dependency_tree</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>?embedplayer=yes broken for videos with width ...</td>\n",
|
||||
" <td>Ni!\\n\\nI am experiencing an unresponsive black...</td>\n",
|
||||
" <td>1383189120</td>\n",
|
||||
" <td>PHID-USER-wr7prgh3p37xrvbdr6w5</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>PHID-TASK-lfhsyqihbylzxoeftr7m</td>\n",
|
||||
" <td>task_description</td>\n",
|
||||
" <td>resolved</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>2013-10-31 03:12:00+00:00</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Ni!\\n\\nI am experiencing an unresponsive black...</td>\n",
|
||||
" <td>[(Ni, Ni, nsubj, experiencing, [experiencing],...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>?embedplayer=yes broken for videos with width ...</td>\n",
|
||||
" <td>**mdale** wrote:\\n\\n@Ryan, I just mean you wil...</td>\n",
|
||||
" <td>1383856310</td>\n",
|
||||
" <td>PHID-USER-ynivjflmc2dcl6w5ut5v</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>PHID-TASK-lfhsyqihbylzxoeftr7m</td>\n",
|
||||
" <td>task_subcomment</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>2013-11-07 20:31:50+00:00</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>mdale wrote:\\n\\n@Ryan, I just mean you wil...</td>\n",
|
||||
" <td>[( , , dep, mdale, [mdale, wrote], [ ], []...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>?embedplayer=yes broken for videos with width ...</td>\n",
|
||||
" <td>Ni!\\n\\n=) Thanks everyone for helping verify a...</td>\n",
|
||||
" <td>1383796532</td>\n",
|
||||
" <td>PHID-USER-wr7prgh3p37xrvbdr6w5</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>PHID-TASK-lfhsyqihbylzxoeftr7m</td>\n",
|
||||
" <td>task_subcomment</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>2013-11-07 03:55:32+00:00</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Ni!\\n\\n=) Thanks everyone for helping verify a...</td>\n",
|
||||
" <td>[(Ni, Ni, ROOT, Ni, [], [Ni, !, \\n\\n], [!]), (...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>?embedplayer=yes broken for videos with width ...</td>\n",
|
||||
" <td>> So putting it back to 200px specifically for...</td>\n",
|
||||
" <td>1383776933</td>\n",
|
||||
" <td>PHID-USER-a5pveeqqwaddgfjiv2fq</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>PHID-TASK-lfhsyqihbylzxoeftr7m</td>\n",
|
||||
" <td>task_subcomment</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>2013-11-06 22:28:53+00:00</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>> So putting it back to 200px specifically for...</td>\n",
|
||||
" <td>[(>, >, dep, seem, [seem], [>], []), (So, so, ...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>?embedplayer=yes broken for videos with width ...</td>\n",
|
||||
" <td>Many thanks to Brian and Mark for their fine w...</td>\n",
|
||||
" <td>1383775629</td>\n",
|
||||
" <td>PHID-USER-dbudsaorcqut7sg3vvbi</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>PHID-TASK-lfhsyqihbylzxoeftr7m</td>\n",
|
||||
" <td>task_subcomment</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>4.0</td>\n",
|
||||
" <td>2013-11-06 22:07:09+00:00</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Many thanks to Brian and Mark for their fine w...</td>\n",
|
||||
" <td>[(Many, many, amod, thanks, [thanks], [Many], ...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>46297</th>\n",
|
||||
" <td>Add Taiwan in Chinese to the monuments database</td>\n",
|
||||
" <td>**romaine.wiki** wrote:\\n\\nhttps://commons.wik...</td>\n",
|
||||
" <td>1377925197</td>\n",
|
||||
" <td>PHID-USER-ynivjflmc2dcl6w5ut5v</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>PHID-TASK-ze253b4m6dtco37373fc</td>\n",
|
||||
" <td>task_subcomment</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>46298</td>\n",
|
||||
" <td>46297.0</td>\n",
|
||||
" <td>2013-08-31 04:59:57+00:00</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>romaine.wiki wrote:\\n\\n</td>\n",
|
||||
" <td>[( , , dep, romaine.wiki, [romaine.wiki, wr...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>46298</th>\n",
|
||||
" <td>Add Taiwan in Chinese to the monuments database</td>\n",
|
||||
" <td>We're playing with the templates on https://zh...</td>\n",
|
||||
" <td>1377632023</td>\n",
|
||||
" <td>PHID-USER-bdyms27sdtgdvjm7zfz4</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>PHID-TASK-ze253b4m6dtco37373fc</td>\n",
|
||||
" <td>task_subcomment</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>46299</td>\n",
|
||||
" <td>46298.0</td>\n",
|
||||
" <td>2013-08-27 19:33:43+00:00</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>We're playing with the templates on Dennis ...</td>\n",
|
||||
" <td>[(We, we, nsubj, playing, [playing, seems], [W...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>46299</th>\n",
|
||||
" <td>Add Taiwan in Chinese to the monuments database</td>\n",
|
||||
" <td>The links are all listed on https://commons.wi...</td>\n",
|
||||
" <td>1377427853</td>\n",
|
||||
" <td>PHID-USER-bdyms27sdtgdvjm7zfz4</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>PHID-TASK-ze253b4m6dtco37373fc</td>\n",
|
||||
" <td>task_subcomment</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>46300</td>\n",
|
||||
" <td>46299.0</td>\n",
|
||||
" <td>2013-08-25 10:50:53+00:00</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>The links are all listed on . The Unique Iden...</td>\n",
|
||||
" <td>[(The, the, det, links, [links, listed], [The]...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>46300</th>\n",
|
||||
" <td>Add Taiwan in Chinese to the monuments database</td>\n",
|
||||
" <td>Looks like some lists are available, but not i...</td>\n",
|
||||
" <td>1376771718</td>\n",
|
||||
" <td>PHID-USER-cw4amt4ewxdze5qcjdca</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>PHID-TASK-ze253b4m6dtco37373fc</td>\n",
|
||||
" <td>task_subcomment</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>46301</td>\n",
|
||||
" <td>46300.0</td>\n",
|
||||
" <td>2013-08-17 20:35:18+00:00</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Looks like some lists are available, but not i...</td>\n",
|
||||
" <td>[(Looks, look, ROOT, Looks, [], [Looks, like, ...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>46301</th>\n",
|
||||
" <td>Add Taiwan in Chinese to the monuments database</td>\n",
|
||||
" <td>We already have a lot of sources in the monume...</td>\n",
|
||||
" <td>1376423842</td>\n",
|
||||
" <td>PHID-USER-cw4amt4ewxdze5qcjdca</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>PHID-TASK-ze253b4m6dtco37373fc</td>\n",
|
||||
" <td>task_subcomment</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>46302</td>\n",
|
||||
" <td>46301.0</td>\n",
|
||||
" <td>2013-08-13 19:57:22+00:00</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>We already have a lot of sources in the monume...</td>\n",
|
||||
" <td>[(We, we, nsubj, have, [have], [We], []), (alr...</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>26300 rows × 15 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" task_title \\\n",
|
||||
"0 ?embedplayer=yes broken for videos with width ... \n",
|
||||
"1 ?embedplayer=yes broken for videos with width ... \n",
|
||||
"2 ?embedplayer=yes broken for videos with width ... \n",
|
||||
"3 ?embedplayer=yes broken for videos with width ... \n",
|
||||
"4 ?embedplayer=yes broken for videos with width ... \n",
|
||||
"... ... \n",
|
||||
"46297 Add Taiwan in Chinese to the monuments database \n",
|
||||
"46298 Add Taiwan in Chinese to the monuments database \n",
|
||||
"46299 Add Taiwan in Chinese to the monuments database \n",
|
||||
"46300 Add Taiwan in Chinese to the monuments database \n",
|
||||
"46301 Add Taiwan in Chinese to the monuments database \n",
|
||||
"\n",
|
||||
" comment_text date_created \\\n",
|
||||
"0 Ni!\\n\\nI am experiencing an unresponsive black... 1383189120 \n",
|
||||
"1 **mdale** wrote:\\n\\n@Ryan, I just mean you wil... 1383856310 \n",
|
||||
"2 Ni!\\n\\n=) Thanks everyone for helping verify a... 1383796532 \n",
|
||||
"3 > So putting it back to 200px specifically for... 1383776933 \n",
|
||||
"4 Many thanks to Brian and Mark for their fine w... 1383775629 \n",
|
||||
"... ... ... \n",
|
||||
"46297 **romaine.wiki** wrote:\\n\\nhttps://commons.wik... 1377925197 \n",
|
||||
"46298 We're playing with the templates on https://zh... 1377632023 \n",
|
||||
"46299 The links are all listed on https://commons.wi... 1377427853 \n",
|
||||
"46300 Looks like some lists are available, but not i... 1376771718 \n",
|
||||
"46301 We already have a lot of sources in the monume... 1376423842 \n",
|
||||
"\n",
|
||||
" speaker meta.affil \\\n",
|
||||
"0 PHID-USER-wr7prgh3p37xrvbdr6w5 False \n",
|
||||
"1 PHID-USER-ynivjflmc2dcl6w5ut5v False \n",
|
||||
"2 PHID-USER-wr7prgh3p37xrvbdr6w5 False \n",
|
||||
"3 PHID-USER-a5pveeqqwaddgfjiv2fq False \n",
|
||||
"4 PHID-USER-dbudsaorcqut7sg3vvbi False \n",
|
||||
"... ... ... \n",
|
||||
"46297 PHID-USER-ynivjflmc2dcl6w5ut5v False \n",
|
||||
"46298 PHID-USER-bdyms27sdtgdvjm7zfz4 False \n",
|
||||
"46299 PHID-USER-bdyms27sdtgdvjm7zfz4 False \n",
|
||||
"46300 PHID-USER-cw4amt4ewxdze5qcjdca False \n",
|
||||
"46301 PHID-USER-cw4amt4ewxdze5qcjdca False \n",
|
||||
"\n",
|
||||
" conversation_id comment_type status \\\n",
|
||||
"0 PHID-TASK-lfhsyqihbylzxoeftr7m task_description resolved \n",
|
||||
"1 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n",
|
||||
"2 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n",
|
||||
"3 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n",
|
||||
"4 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n",
|
||||
"... ... ... ... \n",
|
||||
"46297 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n",
|
||||
"46298 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n",
|
||||
"46299 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n",
|
||||
"46300 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n",
|
||||
"46301 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n",
|
||||
"\n",
|
||||
" meta.gerrit id reply_to timestamp is_relevant \\\n",
|
||||
"0 False 1 NaN 2013-10-31 03:12:00+00:00 False \n",
|
||||
"1 False 2 1.0 2013-11-07 20:31:50+00:00 False \n",
|
||||
"2 False 3 2.0 2013-11-07 03:55:32+00:00 False \n",
|
||||
"3 False 4 3.0 2013-11-06 22:28:53+00:00 False \n",
|
||||
"4 False 5 4.0 2013-11-06 22:07:09+00:00 False \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"46297 False 46298 46297.0 2013-08-31 04:59:57+00:00 False \n",
|
||||
"46298 False 46299 46298.0 2013-08-27 19:33:43+00:00 False \n",
|
||||
"46299 False 46300 46299.0 2013-08-25 10:50:53+00:00 False \n",
|
||||
"46300 False 46301 46300.0 2013-08-17 20:35:18+00:00 False \n",
|
||||
"46301 False 46302 46301.0 2013-08-13 19:57:22+00:00 False \n",
|
||||
"\n",
|
||||
" processed_text \\\n",
|
||||
"0 Ni!\\n\\nI am experiencing an unresponsive black... \n",
|
||||
"1 mdale wrote:\\n\\n@Ryan, I just mean you wil... \n",
|
||||
"2 Ni!\\n\\n=) Thanks everyone for helping verify a... \n",
|
||||
"3 > So putting it back to 200px specifically for... \n",
|
||||
"4 Many thanks to Brian and Mark for their fine w... \n",
|
||||
"... ... \n",
|
||||
"46297 romaine.wiki wrote:\\n\\n \n",
|
||||
"46298 We're playing with the templates on Dennis ... \n",
|
||||
"46299 The links are all listed on . The Unique Iden... \n",
|
||||
"46300 Looks like some lists are available, but not i... \n",
|
||||
"46301 We already have a lot of sources in the monume... \n",
|
||||
"\n",
|
||||
" dependency_tree \n",
|
||||
"0 [(Ni, Ni, nsubj, experiencing, [experiencing],... \n",
|
||||
"1 [( , , dep, mdale, [mdale, wrote], [ ], []... \n",
|
||||
"2 [(Ni, Ni, ROOT, Ni, [], [Ni, !, \\n\\n], [!]), (... \n",
|
||||
"3 [(>, >, dep, seem, [seem], [>], []), (So, so, ... \n",
|
||||
"4 [(Many, many, amod, thanks, [thanks], [Many], ... \n",
|
||||
"... ... \n",
|
||||
"46297 [( , , dep, romaine.wiki, [romaine.wiki, wr... \n",
|
||||
"46298 [(We, we, nsubj, playing, [playing, seems], [W... \n",
|
||||
"46299 [(The, the, det, links, [links, listed], [The]... \n",
|
||||
"46300 [(Looks, look, ROOT, Looks, [], [Looks, like, ... \n",
|
||||
"46301 [(We, we, nsubj, have, [have], [We], []), (alr... \n",
|
||||
"\n",
|
||||
"[26300 rows x 15 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 36,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"comment_phab_df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
@ -908,7 +1331,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.21"
|
||||
"version": "3.9.18"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
Loading…
Reference in New Issue
Block a user