From 5fe41d576dfd202422e56b0a71e26324787cc5aa Mon Sep 17 00:00:00 2001 From: Matthew Gaughan Date: Sun, 13 Apr 2025 12:44:48 -0700 Subject: [PATCH] updating analysis of *relevant* phab tickets for https case --- .../040425_phab_comments-checkpoint.ipynb | 461 +++++++++++++++++- .../case2/040425_phab_comments.ipynb | 461 +++++++++++++++++- 2 files changed, 884 insertions(+), 38 deletions(-) diff --git a/text_analysis/case2/.ipynb_checkpoints/040425_phab_comments-checkpoint.ipynb b/text_analysis/case2/.ipynb_checkpoints/040425_phab_comments-checkpoint.ipynb index 9d98c6c..dec8139 100644 --- a/text_analysis/case2/.ipynb_checkpoints/040425_phab_comments-checkpoint.ipynb +++ b/text_analysis/case2/.ipynb_checkpoints/040425_phab_comments-checkpoint.ipynb @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 9, "id": "e4f0b3f0-5255-46f1-822f-e455087ba315", "metadata": {}, "outputs": [], @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 10, "id": "ac5e624b-08a4-4ede-bc96-cfc26c3edac3", "metadata": {}, "outputs": [], @@ -32,7 +32,9 @@ "def http_relevant(text):\n", " if pd.isnull(text):\n", " return False\n", - "\n", + " # TODO: expanded dictionary for relevancy\n", + " # http, ip, login, auth, SSL, TLS, certificate \n", + " \n", " for word in text.split():\n", " if \"://\" not in word.lower() and \"http\" in word.lower():\n", " return True\n", @@ -41,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 11, "id": "d449164e-1d28-4580-9eb1-f0f69978f114", "metadata": {}, "outputs": [ @@ -49,7 +51,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_11370/1288881096.py:35: SettingWithCopyWarning: \n", + "/tmp/ipykernel_22429/86623999.py:36: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", @@ -76,13 +78,17 @@ "\n", "# after 12-1-2012 before 12-1-2013\n", "phab_df['timestamp'] = pd.to_datetime(phab_df['date_created'], unit='s', origin='unix', utc=True)\n", - "filtered_phab_df = phab_df[(phab_df['date_created'] < 1385856000) & (phab_df['date_created'] > 1354320000)]\n", + "#filtered_phab_df = phab_df[(phab_df['date_created'] < 1385856000) & (phab_df['date_created'] > 1354320000)]\n", + "filtered_phab_df = phab_df[(phab_df['date_created'] < 1381691276) & (phab_df['date_created'] > 1379099276)]\n", "\n", "#removing headless conversations\n", "task_phab_df = filtered_phab_df[filtered_phab_df['comment_type']==\"task_description\"]\n", "headed_task_phids = task_phab_df['conversation_id'].unique()\n", "filtered_phab_df = filtered_phab_df[filtered_phab_df['conversation_id'].isin(headed_task_phids)]\n", "\n", + "#TODO: filter out the sourceforge migration \n", + "# Originally from: http://sourceforge.net in the task task_summary\n", + "\n", "#removing gerrit comments \n", "mid_comment_phab_df = filtered_phab_df[filtered_phab_df['meta.gerrit'] != True]\n", "\n", @@ -95,13 +101,13 @@ "task_phab_df['is_relevant'] = task_phab_df['conversation_id'].isin(relevant_conversation_ids)\n", "mid_comment_phab_df['is_relevant'] = mid_comment_phab_df['conversation_id'].isin(relevant_conversation_ids)\n", "\n", - "#comment_phab_df = mid_comment_phab_df[mid_comment_phab_df['is_relevant'] == True]\n", - "comment_phab_df = mid_comment_phab_df" + "comment_phab_df = mid_comment_phab_df[mid_comment_phab_df['is_relevant'] == True]\n", + "#comment_phab_df = mid_comment_phab_df" ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 12, "id": "942344db-c8f5-4ed6-a757-c97f8454f18b", "metadata": {}, "outputs": [ @@ -109,9 +115,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "Unique conversation_ids: 6139\n", - "Unique ids: 26300\n", - "Unique speakers: 506\n" + "Unique conversation_ids: 96\n", + "Unique ids: 361\n", + "Unique speakers: 47\n" ] } ], @@ -127,7 +133,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 13, "id": "d226d781-b002-4842-a3ae-92d4851a5878", "metadata": {}, "outputs": [], @@ -144,7 +150,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 14, "id": "3ae40d24-bbe8-49c3-a3a9-70bde1b4d559", "metadata": {}, "outputs": [ @@ -152,7 +158,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_11370/2783900859.py:1: SettingWithCopyWarning: \n", + "/tmp/ipykernel_22429/2783900859.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", @@ -177,7 +183,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 15, "id": "a8469b16-4ae6-4b06-bf1b-1f2f6c736cab", "metadata": {}, "outputs": [], @@ -206,7 +212,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 16, "id": "8b9a12f9-71bf-4bc9-bcfd-c73aab4be920", "metadata": {}, "outputs": [ @@ -214,7 +220,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_11370/2805711855.py:1: SettingWithCopyWarning: \n", + "/tmp/ipykernel_22429/2805711855.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", @@ -237,6 +243,423 @@ "#comment_phab_df['resolved_dependency_tree'] = comment_phab_df['processed_resolved_text'].apply(extract_dependency_tree)" ] }, + { + "cell_type": "code", + "execution_count": 21, + "id": "370a2767-04f8-4d0b-9b94-9c6a0b408822", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2612 Recently (starting maybe 2 days ago), some goo...\n", + "2989 Although the \"Always use a secure connection w...\n", + "3080 Originally from: http://sourceforge.net/p/pywi...\n", + "3084 Originally from: http://sourceforge.net/p/pywi...\n", + "3096 Originally from: http://sourceforge.net/p/pywi...\n", + " ... \n", + "44209 Originally from: http://sourceforge.net/p/pywi...\n", + "44217 Originally from: http://sourceforge.net/p/pywi...\n", + "44265 Originally from: http://sourceforge.net/p/pywi...\n", + "44277 Originally from: http://sourceforge.net/p/pywi...\n", + "44316 Originally from: http://sourceforge.net/p/pywi...\n", + "Name: comment_text, Length: 96, dtype: object" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "comment_phab_df[comment_phab_df['comment_type'] == 'task_description']['comment_text']" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "5f138688-3d1a-4a27-b16d-d8aa438dafea", + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "44", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/indexes/base.py:3805\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3804\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3805\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3806\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n", + "File \u001b[0;32mindex.pyx:167\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mindex.pyx:196\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:2606\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:2630\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: 44", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[32], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcomment_phab_df\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mcomment_text\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m44\u001b[39;49m\u001b[43m]\u001b[49m\n", + "File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/series.py:1121\u001b[0m, in \u001b[0;36mSeries.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_values[key]\n\u001b[1;32m 1120\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m key_is_scalar:\n\u001b[0;32m-> 1121\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_value\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1123\u001b[0m \u001b[38;5;66;03m# Convert generator to list before going through hashable part\u001b[39;00m\n\u001b[1;32m 1124\u001b[0m \u001b[38;5;66;03m# (We will iterate through the generator there to check for slices)\u001b[39;00m\n\u001b[1;32m 1125\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_iterator(key):\n", + "File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/series.py:1237\u001b[0m, in \u001b[0;36mSeries._get_value\u001b[0;34m(self, label, takeable)\u001b[0m\n\u001b[1;32m 1234\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_values[label]\n\u001b[1;32m 1236\u001b[0m \u001b[38;5;66;03m# Similar to Index.get_value, but we do not fall back to positional\u001b[39;00m\n\u001b[0;32m-> 1237\u001b[0m loc \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlabel\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1239\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(loc):\n\u001b[1;32m 1240\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_values[loc]\n", + "File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/indexes/base.py:3812\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3807\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(casted_key, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[1;32m 3808\u001b[0m \u001b[38;5;28misinstance\u001b[39m(casted_key, abc\u001b[38;5;241m.\u001b[39mIterable)\n\u001b[1;32m 3809\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28many\u001b[39m(\u001b[38;5;28misinstance\u001b[39m(x, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m casted_key)\n\u001b[1;32m 3810\u001b[0m ):\n\u001b[1;32m 3811\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InvalidIndexError(key)\n\u001b[0;32m-> 3812\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m 3813\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 3814\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m 3815\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m 3816\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n\u001b[1;32m 3817\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n", + "\u001b[0;31mKeyError\u001b[0m: 44" + ] + } + ], + "source": [ + "comment_phab_df['comment_text'][44]" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "f61845ce-d91f-4b06-9039-b507905cb972", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
task_titlecomment_textdate_createdspeakermeta.affilconversation_idcomment_typestatusmeta.gerritidreply_totimestampis_relevantprocessed_textdependency_tree
0?embedplayer=yes broken for videos with width ...Ni!\\n\\nI am experiencing an unresponsive black...1383189120PHID-USER-wr7prgh3p37xrvbdr6w5FalsePHID-TASK-lfhsyqihbylzxoeftr7mtask_descriptionresolvedFalse1NaN2013-10-31 03:12:00+00:00FalseNi!\\n\\nI am experiencing an unresponsive black...[(Ni, Ni, nsubj, experiencing, [experiencing],...
1?embedplayer=yes broken for videos with width ...**mdale** wrote:\\n\\n@Ryan, I just mean you wil...1383856310PHID-USER-ynivjflmc2dcl6w5ut5vFalsePHID-TASK-lfhsyqihbylzxoeftr7mtask_subcommentNaNFalse21.02013-11-07 20:31:50+00:00Falsemdale wrote:\\n\\n@Ryan, I just mean you wil...[( , , dep, mdale, [mdale, wrote], [ ], []...
2?embedplayer=yes broken for videos with width ...Ni!\\n\\n=) Thanks everyone for helping verify a...1383796532PHID-USER-wr7prgh3p37xrvbdr6w5FalsePHID-TASK-lfhsyqihbylzxoeftr7mtask_subcommentNaNFalse32.02013-11-07 03:55:32+00:00FalseNi!\\n\\n=) Thanks everyone for helping verify a...[(Ni, Ni, ROOT, Ni, [], [Ni, !, \\n\\n], [!]), (...
3?embedplayer=yes broken for videos with width ...> So putting it back to 200px specifically for...1383776933PHID-USER-a5pveeqqwaddgfjiv2fqFalsePHID-TASK-lfhsyqihbylzxoeftr7mtask_subcommentNaNFalse43.02013-11-06 22:28:53+00:00False> So putting it back to 200px specifically for...[(>, >, dep, seem, [seem], [>], []), (So, so, ...
4?embedplayer=yes broken for videos with width ...Many thanks to Brian and Mark for their fine w...1383775629PHID-USER-dbudsaorcqut7sg3vvbiFalsePHID-TASK-lfhsyqihbylzxoeftr7mtask_subcommentNaNFalse54.02013-11-06 22:07:09+00:00FalseMany thanks to Brian and Mark for their fine w...[(Many, many, amod, thanks, [thanks], [Many], ...
................................................
46297Add Taiwan in Chinese to the monuments database**romaine.wiki** wrote:\\n\\nhttps://commons.wik...1377925197PHID-USER-ynivjflmc2dcl6w5ut5vFalsePHID-TASK-ze253b4m6dtco37373fctask_subcommentNaNFalse4629846297.02013-08-31 04:59:57+00:00Falseromaine.wiki wrote:\\n\\n[( , , dep, romaine.wiki, [romaine.wiki, wr...
46298Add Taiwan in Chinese to the monuments databaseWe're playing with the templates on https://zh...1377632023PHID-USER-bdyms27sdtgdvjm7zfz4FalsePHID-TASK-ze253b4m6dtco37373fctask_subcommentNaNFalse4629946298.02013-08-27 19:33:43+00:00FalseWe're playing with the templates on Dennis ...[(We, we, nsubj, playing, [playing, seems], [W...
46299Add Taiwan in Chinese to the monuments databaseThe links are all listed on https://commons.wi...1377427853PHID-USER-bdyms27sdtgdvjm7zfz4FalsePHID-TASK-ze253b4m6dtco37373fctask_subcommentNaNFalse4630046299.02013-08-25 10:50:53+00:00FalseThe links are all listed on . The Unique Iden...[(The, the, det, links, [links, listed], [The]...
46300Add Taiwan in Chinese to the monuments databaseLooks like some lists are available, but not i...1376771718PHID-USER-cw4amt4ewxdze5qcjdcaFalsePHID-TASK-ze253b4m6dtco37373fctask_subcommentNaNFalse4630146300.02013-08-17 20:35:18+00:00FalseLooks like some lists are available, but not i...[(Looks, look, ROOT, Looks, [], [Looks, like, ...
46301Add Taiwan in Chinese to the monuments databaseWe already have a lot of sources in the monume...1376423842PHID-USER-cw4amt4ewxdze5qcjdcaFalsePHID-TASK-ze253b4m6dtco37373fctask_subcommentNaNFalse4630246301.02013-08-13 19:57:22+00:00FalseWe already have a lot of sources in the monume...[(We, we, nsubj, have, [have], [We], []), (alr...
\n", + "

26300 rows × 15 columns

\n", + "
" + ], + "text/plain": [ + " task_title \\\n", + "0 ?embedplayer=yes broken for videos with width ... \n", + "1 ?embedplayer=yes broken for videos with width ... \n", + "2 ?embedplayer=yes broken for videos with width ... \n", + "3 ?embedplayer=yes broken for videos with width ... \n", + "4 ?embedplayer=yes broken for videos with width ... \n", + "... ... \n", + "46297 Add Taiwan in Chinese to the monuments database \n", + "46298 Add Taiwan in Chinese to the monuments database \n", + "46299 Add Taiwan in Chinese to the monuments database \n", + "46300 Add Taiwan in Chinese to the monuments database \n", + "46301 Add Taiwan in Chinese to the monuments database \n", + "\n", + " comment_text date_created \\\n", + "0 Ni!\\n\\nI am experiencing an unresponsive black... 1383189120 \n", + "1 **mdale** wrote:\\n\\n@Ryan, I just mean you wil... 1383856310 \n", + "2 Ni!\\n\\n=) Thanks everyone for helping verify a... 1383796532 \n", + "3 > So putting it back to 200px specifically for... 1383776933 \n", + "4 Many thanks to Brian and Mark for their fine w... 1383775629 \n", + "... ... ... \n", + "46297 **romaine.wiki** wrote:\\n\\nhttps://commons.wik... 1377925197 \n", + "46298 We're playing with the templates on https://zh... 1377632023 \n", + "46299 The links are all listed on https://commons.wi... 1377427853 \n", + "46300 Looks like some lists are available, but not i... 1376771718 \n", + "46301 We already have a lot of sources in the monume... 1376423842 \n", + "\n", + " speaker meta.affil \\\n", + "0 PHID-USER-wr7prgh3p37xrvbdr6w5 False \n", + "1 PHID-USER-ynivjflmc2dcl6w5ut5v False \n", + "2 PHID-USER-wr7prgh3p37xrvbdr6w5 False \n", + "3 PHID-USER-a5pveeqqwaddgfjiv2fq False \n", + "4 PHID-USER-dbudsaorcqut7sg3vvbi False \n", + "... ... ... \n", + "46297 PHID-USER-ynivjflmc2dcl6w5ut5v False \n", + "46298 PHID-USER-bdyms27sdtgdvjm7zfz4 False \n", + "46299 PHID-USER-bdyms27sdtgdvjm7zfz4 False \n", + "46300 PHID-USER-cw4amt4ewxdze5qcjdca False \n", + "46301 PHID-USER-cw4amt4ewxdze5qcjdca False \n", + "\n", + " conversation_id comment_type status \\\n", + "0 PHID-TASK-lfhsyqihbylzxoeftr7m task_description resolved \n", + "1 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n", + "2 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n", + "3 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n", + "4 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n", + "... ... ... ... \n", + "46297 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n", + "46298 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n", + "46299 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n", + "46300 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n", + "46301 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n", + "\n", + " meta.gerrit id reply_to timestamp is_relevant \\\n", + "0 False 1 NaN 2013-10-31 03:12:00+00:00 False \n", + "1 False 2 1.0 2013-11-07 20:31:50+00:00 False \n", + "2 False 3 2.0 2013-11-07 03:55:32+00:00 False \n", + "3 False 4 3.0 2013-11-06 22:28:53+00:00 False \n", + "4 False 5 4.0 2013-11-06 22:07:09+00:00 False \n", + "... ... ... ... ... ... \n", + "46297 False 46298 46297.0 2013-08-31 04:59:57+00:00 False \n", + "46298 False 46299 46298.0 2013-08-27 19:33:43+00:00 False \n", + "46299 False 46300 46299.0 2013-08-25 10:50:53+00:00 False \n", + "46300 False 46301 46300.0 2013-08-17 20:35:18+00:00 False \n", + "46301 False 46302 46301.0 2013-08-13 19:57:22+00:00 False \n", + "\n", + " processed_text \\\n", + "0 Ni!\\n\\nI am experiencing an unresponsive black... \n", + "1 mdale wrote:\\n\\n@Ryan, I just mean you wil... \n", + "2 Ni!\\n\\n=) Thanks everyone for helping verify a... \n", + "3 > So putting it back to 200px specifically for... \n", + "4 Many thanks to Brian and Mark for their fine w... \n", + "... ... \n", + "46297 romaine.wiki wrote:\\n\\n \n", + "46298 We're playing with the templates on Dennis ... \n", + "46299 The links are all listed on . The Unique Iden... \n", + "46300 Looks like some lists are available, but not i... \n", + "46301 We already have a lot of sources in the monume... \n", + "\n", + " dependency_tree \n", + "0 [(Ni, Ni, nsubj, experiencing, [experiencing],... \n", + "1 [( , , dep, mdale, [mdale, wrote], [ ], []... \n", + "2 [(Ni, Ni, ROOT, Ni, [], [Ni, !, \\n\\n], [!]), (... \n", + "3 [(>, >, dep, seem, [seem], [>], []), (So, so, ... \n", + "4 [(Many, many, amod, thanks, [thanks], [Many], ... \n", + "... ... \n", + "46297 [( , , dep, romaine.wiki, [romaine.wiki, wr... \n", + "46298 [(We, we, nsubj, playing, [playing, seems], [W... \n", + "46299 [(The, the, det, links, [links, listed], [The]... \n", + "46300 [(Looks, look, ROOT, Looks, [], [Looks, like, ... \n", + "46301 [(We, we, nsubj, have, [have], [We], []), (alr... \n", + "\n", + "[26300 rows x 15 columns]" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "comment_phab_df" + ] + }, { "cell_type": "code", "execution_count": 27, @@ -908,7 +1331,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.21" + "version": "3.9.18" } }, "nbformat": 4, diff --git a/text_analysis/case2/040425_phab_comments.ipynb b/text_analysis/case2/040425_phab_comments.ipynb index 9d98c6c..dec8139 100644 --- a/text_analysis/case2/040425_phab_comments.ipynb +++ b/text_analysis/case2/040425_phab_comments.ipynb @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 9, "id": "e4f0b3f0-5255-46f1-822f-e455087ba315", "metadata": {}, "outputs": [], @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 10, "id": "ac5e624b-08a4-4ede-bc96-cfc26c3edac3", "metadata": {}, "outputs": [], @@ -32,7 +32,9 @@ "def http_relevant(text):\n", " if pd.isnull(text):\n", " return False\n", - "\n", + " # TODO: expanded dictionary for relevancy\n", + " # http, ip, login, auth, SSL, TLS, certificate \n", + " \n", " for word in text.split():\n", " if \"://\" not in word.lower() and \"http\" in word.lower():\n", " return True\n", @@ -41,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 11, "id": "d449164e-1d28-4580-9eb1-f0f69978f114", "metadata": {}, "outputs": [ @@ -49,7 +51,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_11370/1288881096.py:35: SettingWithCopyWarning: \n", + "/tmp/ipykernel_22429/86623999.py:36: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", @@ -76,13 +78,17 @@ "\n", "# after 12-1-2012 before 12-1-2013\n", "phab_df['timestamp'] = pd.to_datetime(phab_df['date_created'], unit='s', origin='unix', utc=True)\n", - "filtered_phab_df = phab_df[(phab_df['date_created'] < 1385856000) & (phab_df['date_created'] > 1354320000)]\n", + "#filtered_phab_df = phab_df[(phab_df['date_created'] < 1385856000) & (phab_df['date_created'] > 1354320000)]\n", + "filtered_phab_df = phab_df[(phab_df['date_created'] < 1381691276) & (phab_df['date_created'] > 1379099276)]\n", "\n", "#removing headless conversations\n", "task_phab_df = filtered_phab_df[filtered_phab_df['comment_type']==\"task_description\"]\n", "headed_task_phids = task_phab_df['conversation_id'].unique()\n", "filtered_phab_df = filtered_phab_df[filtered_phab_df['conversation_id'].isin(headed_task_phids)]\n", "\n", + "#TODO: filter out the sourceforge migration \n", + "# Originally from: http://sourceforge.net in the task task_summary\n", + "\n", "#removing gerrit comments \n", "mid_comment_phab_df = filtered_phab_df[filtered_phab_df['meta.gerrit'] != True]\n", "\n", @@ -95,13 +101,13 @@ "task_phab_df['is_relevant'] = task_phab_df['conversation_id'].isin(relevant_conversation_ids)\n", "mid_comment_phab_df['is_relevant'] = mid_comment_phab_df['conversation_id'].isin(relevant_conversation_ids)\n", "\n", - "#comment_phab_df = mid_comment_phab_df[mid_comment_phab_df['is_relevant'] == True]\n", - "comment_phab_df = mid_comment_phab_df" + "comment_phab_df = mid_comment_phab_df[mid_comment_phab_df['is_relevant'] == True]\n", + "#comment_phab_df = mid_comment_phab_df" ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 12, "id": "942344db-c8f5-4ed6-a757-c97f8454f18b", "metadata": {}, "outputs": [ @@ -109,9 +115,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "Unique conversation_ids: 6139\n", - "Unique ids: 26300\n", - "Unique speakers: 506\n" + "Unique conversation_ids: 96\n", + "Unique ids: 361\n", + "Unique speakers: 47\n" ] } ], @@ -127,7 +133,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 13, "id": "d226d781-b002-4842-a3ae-92d4851a5878", "metadata": {}, "outputs": [], @@ -144,7 +150,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 14, "id": "3ae40d24-bbe8-49c3-a3a9-70bde1b4d559", "metadata": {}, "outputs": [ @@ -152,7 +158,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_11370/2783900859.py:1: SettingWithCopyWarning: \n", + "/tmp/ipykernel_22429/2783900859.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", @@ -177,7 +183,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 15, "id": "a8469b16-4ae6-4b06-bf1b-1f2f6c736cab", "metadata": {}, "outputs": [], @@ -206,7 +212,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 16, "id": "8b9a12f9-71bf-4bc9-bcfd-c73aab4be920", "metadata": {}, "outputs": [ @@ -214,7 +220,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_11370/2805711855.py:1: SettingWithCopyWarning: \n", + "/tmp/ipykernel_22429/2805711855.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", @@ -237,6 +243,423 @@ "#comment_phab_df['resolved_dependency_tree'] = comment_phab_df['processed_resolved_text'].apply(extract_dependency_tree)" ] }, + { + "cell_type": "code", + "execution_count": 21, + "id": "370a2767-04f8-4d0b-9b94-9c6a0b408822", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2612 Recently (starting maybe 2 days ago), some goo...\n", + "2989 Although the \"Always use a secure connection w...\n", + "3080 Originally from: http://sourceforge.net/p/pywi...\n", + "3084 Originally from: http://sourceforge.net/p/pywi...\n", + "3096 Originally from: http://sourceforge.net/p/pywi...\n", + " ... \n", + "44209 Originally from: http://sourceforge.net/p/pywi...\n", + "44217 Originally from: http://sourceforge.net/p/pywi...\n", + "44265 Originally from: http://sourceforge.net/p/pywi...\n", + "44277 Originally from: http://sourceforge.net/p/pywi...\n", + "44316 Originally from: http://sourceforge.net/p/pywi...\n", + "Name: comment_text, Length: 96, dtype: object" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "comment_phab_df[comment_phab_df['comment_type'] == 'task_description']['comment_text']" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "5f138688-3d1a-4a27-b16d-d8aa438dafea", + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "44", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/indexes/base.py:3805\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3804\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3805\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3806\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n", + "File \u001b[0;32mindex.pyx:167\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mindex.pyx:196\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:2606\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:2630\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: 44", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[32], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcomment_phab_df\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mcomment_text\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m44\u001b[39;49m\u001b[43m]\u001b[49m\n", + "File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/series.py:1121\u001b[0m, in \u001b[0;36mSeries.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_values[key]\n\u001b[1;32m 1120\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m key_is_scalar:\n\u001b[0;32m-> 1121\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_value\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1123\u001b[0m \u001b[38;5;66;03m# Convert generator to list before going through hashable part\u001b[39;00m\n\u001b[1;32m 1124\u001b[0m \u001b[38;5;66;03m# (We will iterate through the generator there to check for slices)\u001b[39;00m\n\u001b[1;32m 1125\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_iterator(key):\n", + "File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/series.py:1237\u001b[0m, in \u001b[0;36mSeries._get_value\u001b[0;34m(self, label, takeable)\u001b[0m\n\u001b[1;32m 1234\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_values[label]\n\u001b[1;32m 1236\u001b[0m \u001b[38;5;66;03m# Similar to Index.get_value, but we do not fall back to positional\u001b[39;00m\n\u001b[0;32m-> 1237\u001b[0m loc \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlabel\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1239\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(loc):\n\u001b[1;32m 1240\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_values[loc]\n", + "File \u001b[0;32m/gscratch/scrubbed/mjilg/envs/jupyter3-notebook/lib/python3.9/site-packages/pandas/core/indexes/base.py:3812\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3807\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(casted_key, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[1;32m 3808\u001b[0m \u001b[38;5;28misinstance\u001b[39m(casted_key, abc\u001b[38;5;241m.\u001b[39mIterable)\n\u001b[1;32m 3809\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28many\u001b[39m(\u001b[38;5;28misinstance\u001b[39m(x, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m casted_key)\n\u001b[1;32m 3810\u001b[0m ):\n\u001b[1;32m 3811\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InvalidIndexError(key)\n\u001b[0;32m-> 3812\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m 3813\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 3814\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m 3815\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m 3816\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n\u001b[1;32m 3817\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n", + "\u001b[0;31mKeyError\u001b[0m: 44" + ] + } + ], + "source": [ + "comment_phab_df['comment_text'][44]" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "f61845ce-d91f-4b06-9039-b507905cb972", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
task_titlecomment_textdate_createdspeakermeta.affilconversation_idcomment_typestatusmeta.gerritidreply_totimestampis_relevantprocessed_textdependency_tree
0?embedplayer=yes broken for videos with width ...Ni!\\n\\nI am experiencing an unresponsive black...1383189120PHID-USER-wr7prgh3p37xrvbdr6w5FalsePHID-TASK-lfhsyqihbylzxoeftr7mtask_descriptionresolvedFalse1NaN2013-10-31 03:12:00+00:00FalseNi!\\n\\nI am experiencing an unresponsive black...[(Ni, Ni, nsubj, experiencing, [experiencing],...
1?embedplayer=yes broken for videos with width ...**mdale** wrote:\\n\\n@Ryan, I just mean you wil...1383856310PHID-USER-ynivjflmc2dcl6w5ut5vFalsePHID-TASK-lfhsyqihbylzxoeftr7mtask_subcommentNaNFalse21.02013-11-07 20:31:50+00:00Falsemdale wrote:\\n\\n@Ryan, I just mean you wil...[( , , dep, mdale, [mdale, wrote], [ ], []...
2?embedplayer=yes broken for videos with width ...Ni!\\n\\n=) Thanks everyone for helping verify a...1383796532PHID-USER-wr7prgh3p37xrvbdr6w5FalsePHID-TASK-lfhsyqihbylzxoeftr7mtask_subcommentNaNFalse32.02013-11-07 03:55:32+00:00FalseNi!\\n\\n=) Thanks everyone for helping verify a...[(Ni, Ni, ROOT, Ni, [], [Ni, !, \\n\\n], [!]), (...
3?embedplayer=yes broken for videos with width ...> So putting it back to 200px specifically for...1383776933PHID-USER-a5pveeqqwaddgfjiv2fqFalsePHID-TASK-lfhsyqihbylzxoeftr7mtask_subcommentNaNFalse43.02013-11-06 22:28:53+00:00False> So putting it back to 200px specifically for...[(>, >, dep, seem, [seem], [>], []), (So, so, ...
4?embedplayer=yes broken for videos with width ...Many thanks to Brian and Mark for their fine w...1383775629PHID-USER-dbudsaorcqut7sg3vvbiFalsePHID-TASK-lfhsyqihbylzxoeftr7mtask_subcommentNaNFalse54.02013-11-06 22:07:09+00:00FalseMany thanks to Brian and Mark for their fine w...[(Many, many, amod, thanks, [thanks], [Many], ...
................................................
46297Add Taiwan in Chinese to the monuments database**romaine.wiki** wrote:\\n\\nhttps://commons.wik...1377925197PHID-USER-ynivjflmc2dcl6w5ut5vFalsePHID-TASK-ze253b4m6dtco37373fctask_subcommentNaNFalse4629846297.02013-08-31 04:59:57+00:00Falseromaine.wiki wrote:\\n\\n[( , , dep, romaine.wiki, [romaine.wiki, wr...
46298Add Taiwan in Chinese to the monuments databaseWe're playing with the templates on https://zh...1377632023PHID-USER-bdyms27sdtgdvjm7zfz4FalsePHID-TASK-ze253b4m6dtco37373fctask_subcommentNaNFalse4629946298.02013-08-27 19:33:43+00:00FalseWe're playing with the templates on Dennis ...[(We, we, nsubj, playing, [playing, seems], [W...
46299Add Taiwan in Chinese to the monuments databaseThe links are all listed on https://commons.wi...1377427853PHID-USER-bdyms27sdtgdvjm7zfz4FalsePHID-TASK-ze253b4m6dtco37373fctask_subcommentNaNFalse4630046299.02013-08-25 10:50:53+00:00FalseThe links are all listed on . The Unique Iden...[(The, the, det, links, [links, listed], [The]...
46300Add Taiwan in Chinese to the monuments databaseLooks like some lists are available, but not i...1376771718PHID-USER-cw4amt4ewxdze5qcjdcaFalsePHID-TASK-ze253b4m6dtco37373fctask_subcommentNaNFalse4630146300.02013-08-17 20:35:18+00:00FalseLooks like some lists are available, but not i...[(Looks, look, ROOT, Looks, [], [Looks, like, ...
46301Add Taiwan in Chinese to the monuments databaseWe already have a lot of sources in the monume...1376423842PHID-USER-cw4amt4ewxdze5qcjdcaFalsePHID-TASK-ze253b4m6dtco37373fctask_subcommentNaNFalse4630246301.02013-08-13 19:57:22+00:00FalseWe already have a lot of sources in the monume...[(We, we, nsubj, have, [have], [We], []), (alr...
\n", + "

26300 rows × 15 columns

\n", + "
" + ], + "text/plain": [ + " task_title \\\n", + "0 ?embedplayer=yes broken for videos with width ... \n", + "1 ?embedplayer=yes broken for videos with width ... \n", + "2 ?embedplayer=yes broken for videos with width ... \n", + "3 ?embedplayer=yes broken for videos with width ... \n", + "4 ?embedplayer=yes broken for videos with width ... \n", + "... ... \n", + "46297 Add Taiwan in Chinese to the monuments database \n", + "46298 Add Taiwan in Chinese to the monuments database \n", + "46299 Add Taiwan in Chinese to the monuments database \n", + "46300 Add Taiwan in Chinese to the monuments database \n", + "46301 Add Taiwan in Chinese to the monuments database \n", + "\n", + " comment_text date_created \\\n", + "0 Ni!\\n\\nI am experiencing an unresponsive black... 1383189120 \n", + "1 **mdale** wrote:\\n\\n@Ryan, I just mean you wil... 1383856310 \n", + "2 Ni!\\n\\n=) Thanks everyone for helping verify a... 1383796532 \n", + "3 > So putting it back to 200px specifically for... 1383776933 \n", + "4 Many thanks to Brian and Mark for their fine w... 1383775629 \n", + "... ... ... \n", + "46297 **romaine.wiki** wrote:\\n\\nhttps://commons.wik... 1377925197 \n", + "46298 We're playing with the templates on https://zh... 1377632023 \n", + "46299 The links are all listed on https://commons.wi... 1377427853 \n", + "46300 Looks like some lists are available, but not i... 1376771718 \n", + "46301 We already have a lot of sources in the monume... 1376423842 \n", + "\n", + " speaker meta.affil \\\n", + "0 PHID-USER-wr7prgh3p37xrvbdr6w5 False \n", + "1 PHID-USER-ynivjflmc2dcl6w5ut5v False \n", + "2 PHID-USER-wr7prgh3p37xrvbdr6w5 False \n", + "3 PHID-USER-a5pveeqqwaddgfjiv2fq False \n", + "4 PHID-USER-dbudsaorcqut7sg3vvbi False \n", + "... ... ... \n", + "46297 PHID-USER-ynivjflmc2dcl6w5ut5v False \n", + "46298 PHID-USER-bdyms27sdtgdvjm7zfz4 False \n", + "46299 PHID-USER-bdyms27sdtgdvjm7zfz4 False \n", + "46300 PHID-USER-cw4amt4ewxdze5qcjdca False \n", + "46301 PHID-USER-cw4amt4ewxdze5qcjdca False \n", + "\n", + " conversation_id comment_type status \\\n", + "0 PHID-TASK-lfhsyqihbylzxoeftr7m task_description resolved \n", + "1 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n", + "2 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n", + "3 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n", + "4 PHID-TASK-lfhsyqihbylzxoeftr7m task_subcomment NaN \n", + "... ... ... ... \n", + "46297 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n", + "46298 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n", + "46299 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n", + "46300 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n", + "46301 PHID-TASK-ze253b4m6dtco37373fc task_subcomment NaN \n", + "\n", + " meta.gerrit id reply_to timestamp is_relevant \\\n", + "0 False 1 NaN 2013-10-31 03:12:00+00:00 False \n", + "1 False 2 1.0 2013-11-07 20:31:50+00:00 False \n", + "2 False 3 2.0 2013-11-07 03:55:32+00:00 False \n", + "3 False 4 3.0 2013-11-06 22:28:53+00:00 False \n", + "4 False 5 4.0 2013-11-06 22:07:09+00:00 False \n", + "... ... ... ... ... ... \n", + "46297 False 46298 46297.0 2013-08-31 04:59:57+00:00 False \n", + "46298 False 46299 46298.0 2013-08-27 19:33:43+00:00 False \n", + "46299 False 46300 46299.0 2013-08-25 10:50:53+00:00 False \n", + "46300 False 46301 46300.0 2013-08-17 20:35:18+00:00 False \n", + "46301 False 46302 46301.0 2013-08-13 19:57:22+00:00 False \n", + "\n", + " processed_text \\\n", + "0 Ni!\\n\\nI am experiencing an unresponsive black... \n", + "1 mdale wrote:\\n\\n@Ryan, I just mean you wil... \n", + "2 Ni!\\n\\n=) Thanks everyone for helping verify a... \n", + "3 > So putting it back to 200px specifically for... \n", + "4 Many thanks to Brian and Mark for their fine w... \n", + "... ... \n", + "46297 romaine.wiki wrote:\\n\\n \n", + "46298 We're playing with the templates on Dennis ... \n", + "46299 The links are all listed on . The Unique Iden... \n", + "46300 Looks like some lists are available, but not i... \n", + "46301 We already have a lot of sources in the monume... \n", + "\n", + " dependency_tree \n", + "0 [(Ni, Ni, nsubj, experiencing, [experiencing],... \n", + "1 [( , , dep, mdale, [mdale, wrote], [ ], []... \n", + "2 [(Ni, Ni, ROOT, Ni, [], [Ni, !, \\n\\n], [!]), (... \n", + "3 [(>, >, dep, seem, [seem], [>], []), (So, so, ... \n", + "4 [(Many, many, amod, thanks, [thanks], [Many], ... \n", + "... ... \n", + "46297 [( , , dep, romaine.wiki, [romaine.wiki, wr... \n", + "46298 [(We, we, nsubj, playing, [playing, seems], [W... \n", + "46299 [(The, the, det, links, [links, listed], [The]... \n", + "46300 [(Looks, look, ROOT, Looks, [], [Looks, like, ... \n", + "46301 [(We, we, nsubj, have, [have], [We], []), (alr... \n", + "\n", + "[26300 rows x 15 columns]" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "comment_phab_df" + ] + }, { "cell_type": "code", "execution_count": 27, @@ -908,7 +1331,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.21" + "version": "3.9.18" } }, "nbformat": 4,