488 lines
20 KiB
Plaintext
488 lines
20 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "fcc726a8-44a4-48cf-a1cd-937b05bd4d08",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "1fceca29-48c1-4ba3-93ba-88724dea22a7",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"first_resolved_path = \"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/051725_coref_rel_phab_comments_to_2014.csv\"\n",
|
||
"first_resolved_df = pd.read_csv(first_resolved_path)\n",
|
||
"second_resolved_path = \"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/051725_coref_rel_phab_comments_2014_to_2015.csv\"\n",
|
||
"second_resolved_df = pd.read_csv(second_resolved_path)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "f26c31e7-bee1-4100-821f-769e5b1791bd",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"8621"
|
||
]
|
||
},
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"len(second_resolved_df)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "dfa81ca2-4d66-4679-bc3e-192d0cac67fa",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"5007"
|
||
]
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"len(first_resolved_df)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "6dc11bda-f0f6-4eb6-96f5-02ed9a3492ba",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"13628"
|
||
]
|
||
},
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"combined_df = pd.concat([first_resolved_df, second_resolved_df])\n",
|
||
"unique_df = combined_df.drop_duplicates()\n",
|
||
"len(unique_df)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "0c903199-8159-455c-aa7f-e57ef07ce03e",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>task_title</th>\n",
|
||
" <th>comment_text</th>\n",
|
||
" <th>date_created</th>\n",
|
||
" <th>speaker</th>\n",
|
||
" <th>meta.affil</th>\n",
|
||
" <th>conversation_id</th>\n",
|
||
" <th>comment_type</th>\n",
|
||
" <th>status</th>\n",
|
||
" <th>meta.gerrit</th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>reply_to</th>\n",
|
||
" <th>timestamp</th>\n",
|
||
" <th>is_relevant</th>\n",
|
||
" <th>is_migrated</th>\n",
|
||
" <th>text</th>\n",
|
||
" <th>resolved_text</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>User with unattached accounts unable to login ...</td>\n",
|
||
" <td>User:NickK reported in IRC that they're gettin...</td>\n",
|
||
" <td>1411541280</td>\n",
|
||
" <td>PHID-USER-v7vgzvvcw7v2umf737ri</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>PHID-TASK-mio2uq45ny7mms72syut</td>\n",
|
||
" <td>task_description</td>\n",
|
||
" <td>resolved</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>243215</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2014-09-24 06:48:00+00:00</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>User:NickK reported in IRC that they're gettin...</td>\n",
|
||
" <td>User:NickK reported in IRC that they're gettin...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>User with unattached accounts unable to login ...</td>\n",
|
||
" <td>Revert has been deployed.</td>\n",
|
||
" <td>1411573104</td>\n",
|
||
" <td>PHID-USER-v7vgzvvcw7v2umf737ri</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>PHID-TASK-mio2uq45ny7mms72syut</td>\n",
|
||
" <td>task_subcomment</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>243216</td>\n",
|
||
" <td>243215.0</td>\n",
|
||
" <td>2014-09-24 15:38:24+00:00</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Revert has been deployed.</td>\n",
|
||
" <td>Revert has been deployed.</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>User with unattached accounts unable to login ...</td>\n",
|
||
" <td>**gerritadmin** wrote:\\n\\nChange 162550 merged...</td>\n",
|
||
" <td>1411572378</td>\n",
|
||
" <td>PHID-USER-ynivjflmc2dcl6w5ut5v</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>PHID-TASK-mio2uq45ny7mms72syut</td>\n",
|
||
" <td>task_subcomment</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>243217</td>\n",
|
||
" <td>243216.0</td>\n",
|
||
" <td>2014-09-24 15:26:18+00:00</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>**gerritadmin** wrote:\\n\\nChange 162550 merged...</td>\n",
|
||
" <td>**gerritadmin** wrote:\\n\\nChange 162550 merged...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>User with unattached accounts unable to login ...</td>\n",
|
||
" <td>(In reply to Kunal Mehta (Legoktm) from commen...</td>\n",
|
||
" <td>1411545535</td>\n",
|
||
" <td>PHID-USER-v7bwpq3rs3zdxegibdbh</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>PHID-TASK-mio2uq45ny7mms72syut</td>\n",
|
||
" <td>task_subcomment</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>243218</td>\n",
|
||
" <td>243217.0</td>\n",
|
||
" <td>2014-09-24 07:58:55+00:00</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>(In reply to Kunal Mehta (Legoktm) from commen...</td>\n",
|
||
" <td>(In reply to Kunal Mehta (Legoktm) from commen...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>User with unattached accounts unable to login ...</td>\n",
|
||
" <td>**gerritadmin** wrote:\\n\\nChange 162549 merged...</td>\n",
|
||
" <td>1411542640</td>\n",
|
||
" <td>PHID-USER-ynivjflmc2dcl6w5ut5v</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>PHID-TASK-mio2uq45ny7mms72syut</td>\n",
|
||
" <td>task_subcomment</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>243219</td>\n",
|
||
" <td>243218.0</td>\n",
|
||
" <td>2014-09-24 07:10:40+00:00</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>**gerritadmin** wrote:\\n\\nChange 162549 merged...</td>\n",
|
||
" <td>**gerritadmin** wrote:\\n\\nChange 162549 merged...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8616</th>\n",
|
||
" <td>OAuth login refers to mediawiki.org:/ instead ...</td>\n",
|
||
" <td>> When I registered, phabricator linked mediaw...</td>\n",
|
||
" <td>1413205650</td>\n",
|
||
" <td>PHID-USER-hgn5uw2jafgjgfvxibhh</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>PHID-TASK-yeaxsfxhhtbn26koo5fi</td>\n",
|
||
" <td>task_subcomment</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>378799</td>\n",
|
||
" <td>378798.0</td>\n",
|
||
" <td>2014-10-13 13:07:30+00:00</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>> When I registered, phabricator linked mediaw...</td>\n",
|
||
" <td>> When I registered, phabricator linked mediaw...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8617</th>\n",
|
||
" <td>OAuth login refers to mediawiki.org:/ instead ...</td>\n",
|
||
" <td>See {T574} for a related discussion.</td>\n",
|
||
" <td>1412958953</td>\n",
|
||
" <td>PHID-USER-lluzkul4z7us4sxkayss</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>PHID-TASK-yeaxsfxhhtbn26koo5fi</td>\n",
|
||
" <td>task_subcomment</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>378800</td>\n",
|
||
" <td>378799.0</td>\n",
|
||
" <td>2014-10-10 16:35:53+00:00</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>See {T574} for a related discussion.</td>\n",
|
||
" <td>See {T574} for a related discussion.</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8618</th>\n",
|
||
" <td>Improvements to Wikimedia SUL login dialog UI:...</td>\n",
|
||
" <td>Some improvements to the Wikimedia SUL dialog:...</td>\n",
|
||
" <td>1412362816</td>\n",
|
||
" <td>PHID-USER-lluzkul4z7us4sxkayss</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>PHID-TASK-j6czqxlv5fzcx3tmq23n</td>\n",
|
||
" <td>task_description</td>\n",
|
||
" <td>declined</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>378858</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2014-10-03 19:00:16+00:00</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Some improvements to the Wikimedia SUL dialog:...</td>\n",
|
||
" <td>Some improvements to the Wikimedia SUL dialog:...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8619</th>\n",
|
||
" <td>Improvements to Wikimedia SUL login dialog UI:...</td>\n",
|
||
" <td>I guess the same restrictions as in T543 apply...</td>\n",
|
||
" <td>1412415111</td>\n",
|
||
" <td>PHID-USER-lluzkul4z7us4sxkayss</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>PHID-TASK-j6czqxlv5fzcx3tmq23n</td>\n",
|
||
" <td>task_subcomment</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>378860</td>\n",
|
||
" <td>378859.0</td>\n",
|
||
" <td>2014-10-04 09:31:51+00:00</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>I guess the same restrictions as in T543 apply...</td>\n",
|
||
" <td>I guess the same restrictions as in T543 apply...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8620</th>\n",
|
||
" <td>Improvements to Wikimedia SUL login dialog UI:...</td>\n",
|
||
" <td>It's not entirely trivial to change</td>\n",
|
||
" <td>1412366627</td>\n",
|
||
" <td>PHID-USER-fn7qnpccfbitivgtw2rt</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>PHID-TASK-j6czqxlv5fzcx3tmq23n</td>\n",
|
||
" <td>task_subcomment</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>378861</td>\n",
|
||
" <td>378860.0</td>\n",
|
||
" <td>2014-10-03 20:03:47+00:00</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>It's not entirely trivial to change</td>\n",
|
||
" <td>It's not entirely trivial to change</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>13628 rows × 16 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" task_title \\\n",
|
||
"0 User with unattached accounts unable to login ... \n",
|
||
"1 User with unattached accounts unable to login ... \n",
|
||
"2 User with unattached accounts unable to login ... \n",
|
||
"3 User with unattached accounts unable to login ... \n",
|
||
"4 User with unattached accounts unable to login ... \n",
|
||
"... ... \n",
|
||
"8616 OAuth login refers to mediawiki.org:/ instead ... \n",
|
||
"8617 OAuth login refers to mediawiki.org:/ instead ... \n",
|
||
"8618 Improvements to Wikimedia SUL login dialog UI:... \n",
|
||
"8619 Improvements to Wikimedia SUL login dialog UI:... \n",
|
||
"8620 Improvements to Wikimedia SUL login dialog UI:... \n",
|
||
"\n",
|
||
" comment_text date_created \\\n",
|
||
"0 User:NickK reported in IRC that they're gettin... 1411541280 \n",
|
||
"1 Revert has been deployed. 1411573104 \n",
|
||
"2 **gerritadmin** wrote:\\n\\nChange 162550 merged... 1411572378 \n",
|
||
"3 (In reply to Kunal Mehta (Legoktm) from commen... 1411545535 \n",
|
||
"4 **gerritadmin** wrote:\\n\\nChange 162549 merged... 1411542640 \n",
|
||
"... ... ... \n",
|
||
"8616 > When I registered, phabricator linked mediaw... 1413205650 \n",
|
||
"8617 See {T574} for a related discussion. 1412958953 \n",
|
||
"8618 Some improvements to the Wikimedia SUL dialog:... 1412362816 \n",
|
||
"8619 I guess the same restrictions as in T543 apply... 1412415111 \n",
|
||
"8620 It's not entirely trivial to change 1412366627 \n",
|
||
"\n",
|
||
" speaker meta.affil \\\n",
|
||
"0 PHID-USER-v7vgzvvcw7v2umf737ri False \n",
|
||
"1 PHID-USER-v7vgzvvcw7v2umf737ri False \n",
|
||
"2 PHID-USER-ynivjflmc2dcl6w5ut5v False \n",
|
||
"3 PHID-USER-v7bwpq3rs3zdxegibdbh False \n",
|
||
"4 PHID-USER-ynivjflmc2dcl6w5ut5v False \n",
|
||
"... ... ... \n",
|
||
"8616 PHID-USER-hgn5uw2jafgjgfvxibhh False \n",
|
||
"8617 PHID-USER-lluzkul4z7us4sxkayss False \n",
|
||
"8618 PHID-USER-lluzkul4z7us4sxkayss False \n",
|
||
"8619 PHID-USER-lluzkul4z7us4sxkayss False \n",
|
||
"8620 PHID-USER-fn7qnpccfbitivgtw2rt False \n",
|
||
"\n",
|
||
" conversation_id comment_type status meta.gerrit \\\n",
|
||
"0 PHID-TASK-mio2uq45ny7mms72syut task_description resolved False \n",
|
||
"1 PHID-TASK-mio2uq45ny7mms72syut task_subcomment NaN False \n",
|
||
"2 PHID-TASK-mio2uq45ny7mms72syut task_subcomment NaN False \n",
|
||
"3 PHID-TASK-mio2uq45ny7mms72syut task_subcomment NaN False \n",
|
||
"4 PHID-TASK-mio2uq45ny7mms72syut task_subcomment NaN False \n",
|
||
"... ... ... ... ... \n",
|
||
"8616 PHID-TASK-yeaxsfxhhtbn26koo5fi task_subcomment NaN False \n",
|
||
"8617 PHID-TASK-yeaxsfxhhtbn26koo5fi task_subcomment NaN False \n",
|
||
"8618 PHID-TASK-j6czqxlv5fzcx3tmq23n task_description declined False \n",
|
||
"8619 PHID-TASK-j6czqxlv5fzcx3tmq23n task_subcomment NaN False \n",
|
||
"8620 PHID-TASK-j6czqxlv5fzcx3tmq23n task_subcomment NaN False \n",
|
||
"\n",
|
||
" id reply_to timestamp is_relevant is_migrated \\\n",
|
||
"0 243215 NaN 2014-09-24 06:48:00+00:00 True False \n",
|
||
"1 243216 243215.0 2014-09-24 15:38:24+00:00 True False \n",
|
||
"2 243217 243216.0 2014-09-24 15:26:18+00:00 True False \n",
|
||
"3 243218 243217.0 2014-09-24 07:58:55+00:00 True False \n",
|
||
"4 243219 243218.0 2014-09-24 07:10:40+00:00 True False \n",
|
||
"... ... ... ... ... ... \n",
|
||
"8616 378799 378798.0 2014-10-13 13:07:30+00:00 True False \n",
|
||
"8617 378800 378799.0 2014-10-10 16:35:53+00:00 True False \n",
|
||
"8618 378858 NaN 2014-10-03 19:00:16+00:00 True False \n",
|
||
"8619 378860 378859.0 2014-10-04 09:31:51+00:00 True False \n",
|
||
"8620 378861 378860.0 2014-10-03 20:03:47+00:00 True False \n",
|
||
"\n",
|
||
" text \\\n",
|
||
"0 User:NickK reported in IRC that they're gettin... \n",
|
||
"1 Revert has been deployed. \n",
|
||
"2 **gerritadmin** wrote:\\n\\nChange 162550 merged... \n",
|
||
"3 (In reply to Kunal Mehta (Legoktm) from commen... \n",
|
||
"4 **gerritadmin** wrote:\\n\\nChange 162549 merged... \n",
|
||
"... ... \n",
|
||
"8616 > When I registered, phabricator linked mediaw... \n",
|
||
"8617 See {T574} for a related discussion. \n",
|
||
"8618 Some improvements to the Wikimedia SUL dialog:... \n",
|
||
"8619 I guess the same restrictions as in T543 apply... \n",
|
||
"8620 It's not entirely trivial to change \n",
|
||
"\n",
|
||
" resolved_text \n",
|
||
"0 User:NickK reported in IRC that they're gettin... \n",
|
||
"1 Revert has been deployed. \n",
|
||
"2 **gerritadmin** wrote:\\n\\nChange 162550 merged... \n",
|
||
"3 (In reply to Kunal Mehta (Legoktm) from commen... \n",
|
||
"4 **gerritadmin** wrote:\\n\\nChange 162549 merged... \n",
|
||
"... ... \n",
|
||
"8616 > When I registered, phabricator linked mediaw... \n",
|
||
"8617 See {T574} for a related discussion. \n",
|
||
"8618 Some improvements to the Wikimedia SUL dialog:... \n",
|
||
"8619 I guess the same restrictions as in T543 apply... \n",
|
||
"8620 It's not entirely trivial to change \n",
|
||
"\n",
|
||
"[13628 rows x 16 columns]"
|
||
]
|
||
},
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"unique_df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "0c392d70-6236-4dfe-b6d4-bbe3f422b09e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"unique_df.to_csv(\"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/0050825_coref-rel-first.csv\", index=False)"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.11"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|