1
0

updating things for new key term searches across phabricator tasks

This commit is contained in:
Matthew Gaughan 2025-05-08 13:49:50 -07:00
parent c3ef44a402
commit 7a28e0e079
12 changed files with 4340 additions and 1925 deletions

View File

@ -15,3 +15,4 @@ When done using RStudio Server, terminate the job by:
2. Issue the following command on the login node:
scancel -f 25681892
slurmstepd: error: *** JOB 25681892 ON n3439 CANCELLED AT 2025-05-01T23:08:23 DUE TO TIME LIMIT ***

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,487 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "fcc726a8-44a4-48cf-a1cd-937b05bd4d08",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "1fceca29-48c1-4ba3-93ba-88724dea22a7",
"metadata": {},
"outputs": [],
"source": [
"first_resolved_path = \"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/051725_coref_rel_phab_comments_to_2014.csv\"\n",
"first_resolved_df = pd.read_csv(first_resolved_path)\n",
"second_resolved_path = \"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/051725_coref_rel_phab_comments_2014_to_2015.csv\"\n",
"second_resolved_df = pd.read_csv(second_resolved_path)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "f26c31e7-bee1-4100-821f-769e5b1791bd",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"8621"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(second_resolved_df)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "dfa81ca2-4d66-4679-bc3e-192d0cac67fa",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"5007"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(first_resolved_df)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "6dc11bda-f0f6-4eb6-96f5-02ed9a3492ba",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"13628"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"combined_df = pd.concat([first_resolved_df, second_resolved_df])\n",
"unique_df = combined_df.drop_duplicates()\n",
"len(unique_df)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "0c903199-8159-455c-aa7f-e57ef07ce03e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>task_title</th>\n",
" <th>comment_text</th>\n",
" <th>date_created</th>\n",
" <th>speaker</th>\n",
" <th>meta.affil</th>\n",
" <th>conversation_id</th>\n",
" <th>comment_type</th>\n",
" <th>status</th>\n",
" <th>meta.gerrit</th>\n",
" <th>id</th>\n",
" <th>reply_to</th>\n",
" <th>timestamp</th>\n",
" <th>is_relevant</th>\n",
" <th>is_migrated</th>\n",
" <th>text</th>\n",
" <th>resolved_text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>User with unattached accounts unable to login ...</td>\n",
" <td>User:NickK reported in IRC that they're gettin...</td>\n",
" <td>1411541280</td>\n",
" <td>PHID-USER-v7vgzvvcw7v2umf737ri</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-mio2uq45ny7mms72syut</td>\n",
" <td>task_description</td>\n",
" <td>resolved</td>\n",
" <td>False</td>\n",
" <td>243215</td>\n",
" <td>NaN</td>\n",
" <td>2014-09-24 06:48:00+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>User:NickK reported in IRC that they're gettin...</td>\n",
" <td>User:NickK reported in IRC that they're gettin...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>User with unattached accounts unable to login ...</td>\n",
" <td>Revert has been deployed.</td>\n",
" <td>1411573104</td>\n",
" <td>PHID-USER-v7vgzvvcw7v2umf737ri</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-mio2uq45ny7mms72syut</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>243216</td>\n",
" <td>243215.0</td>\n",
" <td>2014-09-24 15:38:24+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>Revert has been deployed.</td>\n",
" <td>Revert has been deployed.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>User with unattached accounts unable to login ...</td>\n",
" <td>**gerritadmin** wrote:\\n\\nChange 162550 merged...</td>\n",
" <td>1411572378</td>\n",
" <td>PHID-USER-ynivjflmc2dcl6w5ut5v</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-mio2uq45ny7mms72syut</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>243217</td>\n",
" <td>243216.0</td>\n",
" <td>2014-09-24 15:26:18+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>**gerritadmin** wrote:\\n\\nChange 162550 merged...</td>\n",
" <td>**gerritadmin** wrote:\\n\\nChange 162550 merged...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>User with unattached accounts unable to login ...</td>\n",
" <td>(In reply to Kunal Mehta (Legoktm) from commen...</td>\n",
" <td>1411545535</td>\n",
" <td>PHID-USER-v7bwpq3rs3zdxegibdbh</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-mio2uq45ny7mms72syut</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>243218</td>\n",
" <td>243217.0</td>\n",
" <td>2014-09-24 07:58:55+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>(In reply to Kunal Mehta (Legoktm) from commen...</td>\n",
" <td>(In reply to Kunal Mehta (Legoktm) from commen...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>User with unattached accounts unable to login ...</td>\n",
" <td>**gerritadmin** wrote:\\n\\nChange 162549 merged...</td>\n",
" <td>1411542640</td>\n",
" <td>PHID-USER-ynivjflmc2dcl6w5ut5v</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-mio2uq45ny7mms72syut</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>243219</td>\n",
" <td>243218.0</td>\n",
" <td>2014-09-24 07:10:40+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>**gerritadmin** wrote:\\n\\nChange 162549 merged...</td>\n",
" <td>**gerritadmin** wrote:\\n\\nChange 162549 merged...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8616</th>\n",
" <td>OAuth login refers to mediawiki.org:/ instead ...</td>\n",
" <td>&gt; When I registered, phabricator linked mediaw...</td>\n",
" <td>1413205650</td>\n",
" <td>PHID-USER-hgn5uw2jafgjgfvxibhh</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-yeaxsfxhhtbn26koo5fi</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>378799</td>\n",
" <td>378798.0</td>\n",
" <td>2014-10-13 13:07:30+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>&gt; When I registered, phabricator linked mediaw...</td>\n",
" <td>&gt; When I registered, phabricator linked mediaw...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8617</th>\n",
" <td>OAuth login refers to mediawiki.org:/ instead ...</td>\n",
" <td>See {T574} for a related discussion.</td>\n",
" <td>1412958953</td>\n",
" <td>PHID-USER-lluzkul4z7us4sxkayss</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-yeaxsfxhhtbn26koo5fi</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>378800</td>\n",
" <td>378799.0</td>\n",
" <td>2014-10-10 16:35:53+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>See {T574} for a related discussion.</td>\n",
" <td>See {T574} for a related discussion.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8618</th>\n",
" <td>Improvements to Wikimedia SUL login dialog UI:...</td>\n",
" <td>Some improvements to the Wikimedia SUL dialog:...</td>\n",
" <td>1412362816</td>\n",
" <td>PHID-USER-lluzkul4z7us4sxkayss</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-j6czqxlv5fzcx3tmq23n</td>\n",
" <td>task_description</td>\n",
" <td>declined</td>\n",
" <td>False</td>\n",
" <td>378858</td>\n",
" <td>NaN</td>\n",
" <td>2014-10-03 19:00:16+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>Some improvements to the Wikimedia SUL dialog:...</td>\n",
" <td>Some improvements to the Wikimedia SUL dialog:...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8619</th>\n",
" <td>Improvements to Wikimedia SUL login dialog UI:...</td>\n",
" <td>I guess the same restrictions as in T543 apply...</td>\n",
" <td>1412415111</td>\n",
" <td>PHID-USER-lluzkul4z7us4sxkayss</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-j6czqxlv5fzcx3tmq23n</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>378860</td>\n",
" <td>378859.0</td>\n",
" <td>2014-10-04 09:31:51+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>I guess the same restrictions as in T543 apply...</td>\n",
" <td>I guess the same restrictions as in T543 apply...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8620</th>\n",
" <td>Improvements to Wikimedia SUL login dialog UI:...</td>\n",
" <td>It's not entirely trivial to change</td>\n",
" <td>1412366627</td>\n",
" <td>PHID-USER-fn7qnpccfbitivgtw2rt</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-j6czqxlv5fzcx3tmq23n</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>378861</td>\n",
" <td>378860.0</td>\n",
" <td>2014-10-03 20:03:47+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>It's not entirely trivial to change</td>\n",
" <td>It's not entirely trivial to change</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>13628 rows × 16 columns</p>\n",
"</div>"
],
"text/plain": [
" task_title \\\n",
"0 User with unattached accounts unable to login ... \n",
"1 User with unattached accounts unable to login ... \n",
"2 User with unattached accounts unable to login ... \n",
"3 User with unattached accounts unable to login ... \n",
"4 User with unattached accounts unable to login ... \n",
"... ... \n",
"8616 OAuth login refers to mediawiki.org:/ instead ... \n",
"8617 OAuth login refers to mediawiki.org:/ instead ... \n",
"8618 Improvements to Wikimedia SUL login dialog UI:... \n",
"8619 Improvements to Wikimedia SUL login dialog UI:... \n",
"8620 Improvements to Wikimedia SUL login dialog UI:... \n",
"\n",
" comment_text date_created \\\n",
"0 User:NickK reported in IRC that they're gettin... 1411541280 \n",
"1 Revert has been deployed. 1411573104 \n",
"2 **gerritadmin** wrote:\\n\\nChange 162550 merged... 1411572378 \n",
"3 (In reply to Kunal Mehta (Legoktm) from commen... 1411545535 \n",
"4 **gerritadmin** wrote:\\n\\nChange 162549 merged... 1411542640 \n",
"... ... ... \n",
"8616 > When I registered, phabricator linked mediaw... 1413205650 \n",
"8617 See {T574} for a related discussion. 1412958953 \n",
"8618 Some improvements to the Wikimedia SUL dialog:... 1412362816 \n",
"8619 I guess the same restrictions as in T543 apply... 1412415111 \n",
"8620 It's not entirely trivial to change 1412366627 \n",
"\n",
" speaker meta.affil \\\n",
"0 PHID-USER-v7vgzvvcw7v2umf737ri False \n",
"1 PHID-USER-v7vgzvvcw7v2umf737ri False \n",
"2 PHID-USER-ynivjflmc2dcl6w5ut5v False \n",
"3 PHID-USER-v7bwpq3rs3zdxegibdbh False \n",
"4 PHID-USER-ynivjflmc2dcl6w5ut5v False \n",
"... ... ... \n",
"8616 PHID-USER-hgn5uw2jafgjgfvxibhh False \n",
"8617 PHID-USER-lluzkul4z7us4sxkayss False \n",
"8618 PHID-USER-lluzkul4z7us4sxkayss False \n",
"8619 PHID-USER-lluzkul4z7us4sxkayss False \n",
"8620 PHID-USER-fn7qnpccfbitivgtw2rt False \n",
"\n",
" conversation_id comment_type status meta.gerrit \\\n",
"0 PHID-TASK-mio2uq45ny7mms72syut task_description resolved False \n",
"1 PHID-TASK-mio2uq45ny7mms72syut task_subcomment NaN False \n",
"2 PHID-TASK-mio2uq45ny7mms72syut task_subcomment NaN False \n",
"3 PHID-TASK-mio2uq45ny7mms72syut task_subcomment NaN False \n",
"4 PHID-TASK-mio2uq45ny7mms72syut task_subcomment NaN False \n",
"... ... ... ... ... \n",
"8616 PHID-TASK-yeaxsfxhhtbn26koo5fi task_subcomment NaN False \n",
"8617 PHID-TASK-yeaxsfxhhtbn26koo5fi task_subcomment NaN False \n",
"8618 PHID-TASK-j6czqxlv5fzcx3tmq23n task_description declined False \n",
"8619 PHID-TASK-j6czqxlv5fzcx3tmq23n task_subcomment NaN False \n",
"8620 PHID-TASK-j6czqxlv5fzcx3tmq23n task_subcomment NaN False \n",
"\n",
" id reply_to timestamp is_relevant is_migrated \\\n",
"0 243215 NaN 2014-09-24 06:48:00+00:00 True False \n",
"1 243216 243215.0 2014-09-24 15:38:24+00:00 True False \n",
"2 243217 243216.0 2014-09-24 15:26:18+00:00 True False \n",
"3 243218 243217.0 2014-09-24 07:58:55+00:00 True False \n",
"4 243219 243218.0 2014-09-24 07:10:40+00:00 True False \n",
"... ... ... ... ... ... \n",
"8616 378799 378798.0 2014-10-13 13:07:30+00:00 True False \n",
"8617 378800 378799.0 2014-10-10 16:35:53+00:00 True False \n",
"8618 378858 NaN 2014-10-03 19:00:16+00:00 True False \n",
"8619 378860 378859.0 2014-10-04 09:31:51+00:00 True False \n",
"8620 378861 378860.0 2014-10-03 20:03:47+00:00 True False \n",
"\n",
" text \\\n",
"0 User:NickK reported in IRC that they're gettin... \n",
"1 Revert has been deployed. \n",
"2 **gerritadmin** wrote:\\n\\nChange 162550 merged... \n",
"3 (In reply to Kunal Mehta (Legoktm) from commen... \n",
"4 **gerritadmin** wrote:\\n\\nChange 162549 merged... \n",
"... ... \n",
"8616 > When I registered, phabricator linked mediaw... \n",
"8617 See {T574} for a related discussion. \n",
"8618 Some improvements to the Wikimedia SUL dialog:... \n",
"8619 I guess the same restrictions as in T543 apply... \n",
"8620 It's not entirely trivial to change \n",
"\n",
" resolved_text \n",
"0 User:NickK reported in IRC that they're gettin... \n",
"1 Revert has been deployed. \n",
"2 **gerritadmin** wrote:\\n\\nChange 162550 merged... \n",
"3 (In reply to Kunal Mehta (Legoktm) from commen... \n",
"4 **gerritadmin** wrote:\\n\\nChange 162549 merged... \n",
"... ... \n",
"8616 > When I registered, phabricator linked mediaw... \n",
"8617 See {T574} for a related discussion. \n",
"8618 Some improvements to the Wikimedia SUL dialog:... \n",
"8619 I guess the same restrictions as in T543 apply... \n",
"8620 It's not entirely trivial to change \n",
"\n",
"[13628 rows x 16 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"unique_df"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "0c392d70-6236-4dfe-b6d4-bbe3f422b09e",
"metadata": {},
"outputs": [],
"source": [
"unique_df.to_csv(\"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/0050825_coref-rel-first.csv\", index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because one or more lines are too long

View File

@ -10,9 +10,9 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
"/gscratch/scrubbed/mjilg/envs/coref-notebook/lib/python3.7/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n",
"/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML\n",
"/gscratch/scrubbed/mjilg/envs/coref-notebook/lib/python3.7/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML\n",
" warnings.warn(\"Can't initialize NVML\")\n"
]
}
@ -29,7 +29,7 @@
"metadata": {},
"outputs": [],
"source": [
"phab_path = \"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/0415_http_phab_comments.csv\"\n",
"phab_path = \"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/0422_http_phab_comments.csv\"\n",
"phab_df = pd.read_csv(phab_path)"
]
},
@ -61,7 +61,7 @@
" if \"tls\" in word.lower():\n",
" return True\n",
" #cert\n",
" if word.lower().startswith(\"cert\"):\n",
" if word.lower().startswith(\"cert\") and not word.lower().startswith(\"certain\"):\n",
" return True\n",
" return False\n",
"\n",
@ -84,12 +84,12 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/ipykernel_launcher.py:41: SettingWithCopyWarning: \n",
"/gscratch/scrubbed/mjilg/envs/coref-notebook/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
"/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/ipykernel_launcher.py:44: SettingWithCopyWarning: \n",
"/gscratch/scrubbed/mjilg/envs/coref-notebook/lib/python3.7/site-packages/ipykernel_launcher.py:45: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
@ -114,10 +114,11 @@
" 'isGerrit': 'meta.gerrit'\n",
"})\n",
"\n",
"# after 04-01-2015 before 10-1-2015\n",
"# after 10-01-2014 before 10-01-2015\n",
"phab_df['timestamp'] = pd.to_datetime(phab_df['date_created'], unit='s', origin='unix', utc=True)\n",
"filtered_phab_df = phab_df[(phab_df['date_created'] < 1443657600) & (phab_df['date_created'] > 1427846400)]\n",
"#filtered_phab_df = phab_df[(phab_df['date_created'] < 1381691276) & (phab_df['date_created'] > 1379975444)]\n",
"#filtered_phab_df = phab_df[(phab_df['date_created'] < 1443743999) & (phab_df['date_created'] >= 1412207999)]\n",
"# after 07-01-2013 before 10-01-2015\n",
"filtered_phab_df = phab_df[(phab_df['date_created'] < 1443743999) & (phab_df['date_created'] > 1372636800)]\n",
"\n",
"#removing headless conversations\n",
"task_phab_df = filtered_phab_df[filtered_phab_df['comment_type']==\"task_description\"]\n",
@ -151,16 +152,383 @@
{
"cell_type": "code",
"execution_count": 5,
"id": "4241cb0a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>task_title</th>\n",
" <th>comment_text</th>\n",
" <th>date_created</th>\n",
" <th>speaker</th>\n",
" <th>meta.affil</th>\n",
" <th>conversation_id</th>\n",
" <th>comment_type</th>\n",
" <th>status</th>\n",
" <th>meta.gerrit</th>\n",
" <th>id</th>\n",
" <th>reply_to</th>\n",
" <th>timestamp</th>\n",
" <th>is_relevant</th>\n",
" <th>is_migrated</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>197</th>\n",
" <td>Creation of instances broken</td>\n",
" <td>After a replace of old instances, it is not po...</td>\n",
" <td>1442753295</td>\n",
" <td>PHID-USER-qlodcndtwpolbdhncjis</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-pitdrld6mszruqmc6usf</td>\n",
" <td>task_description</td>\n",
" <td>resolved</td>\n",
" <td>False</td>\n",
" <td>198</td>\n",
" <td>NaN</td>\n",
" <td>2015-09-20 12:48:15+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>198</th>\n",
" <td>Creation of instances broken</td>\n",
" <td>Works now.</td>\n",
" <td>1442864673</td>\n",
" <td>PHID-USER-qlodcndtwpolbdhncjis</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-pitdrld6mszruqmc6usf</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>199</td>\n",
" <td>198.0</td>\n",
" <td>2015-09-21 19:44:33+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>199</th>\n",
" <td>Creation of instances broken</td>\n",
" <td>Ok, the instances are deleted now, I will recr...</td>\n",
" <td>1442864271</td>\n",
" <td>PHID-USER-qlodcndtwpolbdhncjis</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-pitdrld6mszruqmc6usf</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>200</td>\n",
" <td>199.0</td>\n",
" <td>2015-09-21 19:37:51+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>200</th>\n",
" <td>Creation of instances broken</td>\n",
" <td>The new instances have the same names as recen...</td>\n",
" <td>1442854156</td>\n",
" <td>PHID-USER-22bsa5u75jz3ci3wnplu</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-pitdrld6mszruqmc6usf</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>201</td>\n",
" <td>200.0</td>\n",
" <td>2015-09-21 16:49:16+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>201</th>\n",
" <td>Creation of instances broken</td>\n",
" <td>This happens also with jessie and presice inst...</td>\n",
" <td>1442835238</td>\n",
" <td>PHID-USER-qlodcndtwpolbdhncjis</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-pitdrld6mszruqmc6usf</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>202</td>\n",
" <td>201.0</td>\n",
" <td>2015-09-21 11:33:58+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>406887</th>\n",
" <td>Allow login using mosh as an alternative to pl...</td>\n",
" <td>*** Bug 49454 has been marked as a duplicate o...</td>\n",
" <td>1379011061</td>\n",
" <td>PHID-USER-2nnm76h4ykalvvref2ye</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-hnwvtmwgpm2oisoqaozt</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>406888</td>\n",
" <td>406887.0</td>\n",
" <td>2013-09-12 18:37:41+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>406888</th>\n",
" <td>Allow login using mosh as an alternative to pl...</td>\n",
" <td>JFTR, on Tools mosh-server processes eat up to...</td>\n",
" <td>1376245807</td>\n",
" <td>PHID-USER-vk6mlmacfhx77egryy5i</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-hnwvtmwgpm2oisoqaozt</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>406889</td>\n",
" <td>406888.0</td>\n",
" <td>2013-08-11 18:30:07+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>406889</th>\n",
" <td>Allow login using mosh as an alternative to pl...</td>\n",
" <td>This is supported on tools, but adding it to t...</td>\n",
" <td>1376185312</td>\n",
" <td>PHID-USER-h75guknmwivm6x37iute</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-hnwvtmwgpm2oisoqaozt</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>406890</td>\n",
" <td>406889.0</td>\n",
" <td>2013-08-11 01:41:52+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>406890</th>\n",
" <td>Allow login using mosh as an alternative to pl...</td>\n",
" <td>Just found out that mosh already works for too...</td>\n",
" <td>1376118400</td>\n",
" <td>PHID-USER-5dqihbanu3caaj7pigif</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-hnwvtmwgpm2oisoqaozt</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>406891</td>\n",
" <td>406890.0</td>\n",
" <td>2013-08-10 07:06:40+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>406891</th>\n",
" <td>Allow login using mosh as an alternative to pl...</td>\n",
" <td>(In reply to comment #0)\\n&gt; ssh is quite painf...</td>\n",
" <td>1376118251</td>\n",
" <td>PHID-USER-6vzzsmi22zem6yttr6vp</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-hnwvtmwgpm2oisoqaozt</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>406892</td>\n",
" <td>406891.0</td>\n",
" <td>2013-08-10 07:04:11+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>14490 rows × 14 columns</p>\n",
"</div>"
],
"text/plain": [
" task_title \\\n",
"197 Creation of instances broken \n",
"198 Creation of instances broken \n",
"199 Creation of instances broken \n",
"200 Creation of instances broken \n",
"201 Creation of instances broken \n",
"... ... \n",
"406887 Allow login using mosh as an alternative to pl... \n",
"406888 Allow login using mosh as an alternative to pl... \n",
"406889 Allow login using mosh as an alternative to pl... \n",
"406890 Allow login using mosh as an alternative to pl... \n",
"406891 Allow login using mosh as an alternative to pl... \n",
"\n",
" comment_text date_created \\\n",
"197 After a replace of old instances, it is not po... 1442753295 \n",
"198 Works now. 1442864673 \n",
"199 Ok, the instances are deleted now, I will recr... 1442864271 \n",
"200 The new instances have the same names as recen... 1442854156 \n",
"201 This happens also with jessie and presice inst... 1442835238 \n",
"... ... ... \n",
"406887 *** Bug 49454 has been marked as a duplicate o... 1379011061 \n",
"406888 JFTR, on Tools mosh-server processes eat up to... 1376245807 \n",
"406889 This is supported on tools, but adding it to t... 1376185312 \n",
"406890 Just found out that mosh already works for too... 1376118400 \n",
"406891 (In reply to comment #0)\\n> ssh is quite painf... 1376118251 \n",
"\n",
" speaker meta.affil \\\n",
"197 PHID-USER-qlodcndtwpolbdhncjis False \n",
"198 PHID-USER-qlodcndtwpolbdhncjis False \n",
"199 PHID-USER-qlodcndtwpolbdhncjis False \n",
"200 PHID-USER-22bsa5u75jz3ci3wnplu False \n",
"201 PHID-USER-qlodcndtwpolbdhncjis False \n",
"... ... ... \n",
"406887 PHID-USER-2nnm76h4ykalvvref2ye False \n",
"406888 PHID-USER-vk6mlmacfhx77egryy5i False \n",
"406889 PHID-USER-h75guknmwivm6x37iute False \n",
"406890 PHID-USER-5dqihbanu3caaj7pigif False \n",
"406891 PHID-USER-6vzzsmi22zem6yttr6vp False \n",
"\n",
" conversation_id comment_type status \\\n",
"197 PHID-TASK-pitdrld6mszruqmc6usf task_description resolved \n",
"198 PHID-TASK-pitdrld6mszruqmc6usf task_subcomment NaN \n",
"199 PHID-TASK-pitdrld6mszruqmc6usf task_subcomment NaN \n",
"200 PHID-TASK-pitdrld6mszruqmc6usf task_subcomment NaN \n",
"201 PHID-TASK-pitdrld6mszruqmc6usf task_subcomment NaN \n",
"... ... ... ... \n",
"406887 PHID-TASK-hnwvtmwgpm2oisoqaozt task_subcomment NaN \n",
"406888 PHID-TASK-hnwvtmwgpm2oisoqaozt task_subcomment NaN \n",
"406889 PHID-TASK-hnwvtmwgpm2oisoqaozt task_subcomment NaN \n",
"406890 PHID-TASK-hnwvtmwgpm2oisoqaozt task_subcomment NaN \n",
"406891 PHID-TASK-hnwvtmwgpm2oisoqaozt task_subcomment NaN \n",
"\n",
" meta.gerrit id reply_to timestamp is_relevant \\\n",
"197 False 198 NaN 2015-09-20 12:48:15+00:00 True \n",
"198 False 199 198.0 2015-09-21 19:44:33+00:00 True \n",
"199 False 200 199.0 2015-09-21 19:37:51+00:00 True \n",
"200 False 201 200.0 2015-09-21 16:49:16+00:00 True \n",
"201 False 202 201.0 2015-09-21 11:33:58+00:00 True \n",
"... ... ... ... ... ... \n",
"406887 False 406888 406887.0 2013-09-12 18:37:41+00:00 True \n",
"406888 False 406889 406888.0 2013-08-11 18:30:07+00:00 True \n",
"406889 False 406890 406889.0 2013-08-11 01:41:52+00:00 True \n",
"406890 False 406891 406890.0 2013-08-10 07:06:40+00:00 True \n",
"406891 False 406892 406891.0 2013-08-10 07:04:11+00:00 True \n",
"\n",
" is_migrated \n",
"197 False \n",
"198 False \n",
"199 False \n",
"200 False \n",
"201 False \n",
"... ... \n",
"406887 False \n",
"406888 False \n",
"406889 False \n",
"406890 False \n",
"406891 False \n",
"\n",
"[14490 rows x 14 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"comment_phab_df"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "930c4d9c",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/gscratch/scrubbed/mjilg/envs/coref-notebook/lib/python3.7/site-packages/ipykernel_launcher.py:3: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" This is separate from the ipykernel package so we can avoid doing imports until\n"
]
},
{
"data": {
"text/plain": [
"862"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prior_path = \"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/0050825_coref-rel-first.csv\"\n",
"prior_df = pd.read_csv(prior_path)\n",
"comment_phab_df['timestamp'] = pd.to_datetime(comment_phab_df['timestamp'], utc=True)\n",
"prior_df['timestamp'] = pd.to_datetime(prior_df['timestamp'], utc=True)\n",
"merged_df = comment_phab_df.merge(prior_df, how='outer', indicator=True)\n",
"len(merged_df)\n",
"only_in_comment_phab_df = merged_df[merged_df['_merge'] == 'left_only']\n",
"len(only_in_comment_phab_df)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "f32f6eed-3aeb-4b05-8d40-7ed85e7235c5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<spacy_experimental.coref.span_resolver_component.SpanResolver at 0x151b89fd18a0>"
"<spacy_experimental.coref.span_resolver_component.SpanResolver at 0x154d9952a7c0>"
]
},
"execution_count": 5,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@ -181,7 +549,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 14,
"id": "a5b062d8-2d26-4a3e-a84c-ba0eaf6eb436",
"metadata": {},
"outputs": [],
@ -195,15 +563,28 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 15,
"id": "424d35e0",
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"data": {
"text/plain": [
"John is frustrated with the VisualEditor project, he thinks it doesn't work."
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"doc"
]
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 16,
"id": "999e1656-0036-4ba2-bedf-f54493f67790",
"metadata": {},
"outputs": [],
@ -249,7 +630,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 17,
"id": "be476647-624b-4e95-ab62-9c6b08f85368",
"metadata": {},
"outputs": [],
@ -262,7 +643,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 18,
"id": "a9628b54-a1df-49cd-a365-9cba59de3421",
"metadata": {},
"outputs": [
@ -272,7 +653,7 @@
"'i hate ve.interface, ve.interface always messes up i browser'"
]
},
"execution_count": 13,
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
@ -283,7 +664,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 19,
"id": "46873641-8e88-4829-9e24-4dd5e6749bd1",
"metadata": {},
"outputs": [
@ -291,73 +672,76 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
"/gscratch/scrubbed/mjilg/envs/coref-notebook/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" \"\"\"Entry point for launching an IPython kernel.\n",
"Token indices sequence length is longer than the specified maximum sequence length for this model (712 > 512). Running this sequence through the model will result in indexing errors\n",
"Token indices sequence length is longer than the specified maximum sequence length for this model (712 > 512). Running this sequence through the model will result in indexing errors\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/tmp/ipykernel_46935/1097216843.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mcomment_phab_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'text'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcomment_phab_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'comment_text'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mcomment_phab_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'resolved_text'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcomment_phab_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'text'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresolving_comment\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, convert_dtype, args, **kwargs)\u001b[0m\n\u001b[1;32m 4355\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mfloat64\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4356\u001b[0m \"\"\"\n\u001b[0;32m-> 4357\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mSeriesApply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconvert_dtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4358\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4359\u001b[0m def _reduce(\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1041\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_str\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1042\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1043\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1044\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1045\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0magg\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1099\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1100\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;31m# type: ignore[arg-type]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1101\u001b[0;31m \u001b[0mconvert\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconvert_dtype\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1102\u001b[0m )\n\u001b[1;32m 1103\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/_libs/lib.pyx\u001b[0m in \u001b[0;36mpandas._libs.lib.map_infer\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32m/tmp/ipykernel_46935/2183590073.py\u001b[0m in \u001b[0;36mresolving_comment\u001b[0;34m(text)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mresolving_comment\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdoc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnlp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mresolved_text\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresolve_references\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdoc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresolved_text\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/spacy/language.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, text, disable, component_cfg)\u001b[0m\n\u001b[1;32m 1024\u001b[0m \u001b[0merror_handler\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mproc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_error_handler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1025\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1026\u001b[0;31m \u001b[0mdoc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mproc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdoc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcomponent_cfg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# type: ignore[call-arg]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1027\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1028\u001b[0m \u001b[0;31m# This typically happens if a component is not initialized\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/spacy/pipeline/trainable_pipe.pyx\u001b[0m in \u001b[0;36mspacy.pipeline.trainable_pipe.TrainablePipe.__call__\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/spacy_experimental/coref/coref_component.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, docs)\u001b[0m\n\u001b[1;32m 151\u001b[0m \u001b[0;32mcontinue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 152\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 153\u001b[0;31m \u001b[0mscores\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midxs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdoc\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 154\u001b[0m \u001b[0;31m# idxs is a list of mentions (start / end idxs)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 155\u001b[0m \u001b[0;31m# each item in scores includes scores and a mapping from scores to mentions\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/thinc/model.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m 332\u001b[0m \u001b[0monly\u001b[0m \u001b[0mthe\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minstead\u001b[0m \u001b[0mof\u001b[0m \u001b[0mthe\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;31m`\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 333\u001b[0m \"\"\"\n\u001b[0;32m--> 334\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mis_train\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 335\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 336\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfinish_update\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptimizer\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/thinc/layers/chain.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(model, X, is_train)\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[0mcallbacks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 53\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mlayer\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlayers\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 54\u001b[0;31m \u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minc_layer_grad\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlayer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mis_train\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mis_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 55\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minc_layer_grad\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/thinc/model.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, X, is_train)\u001b[0m\n\u001b[1;32m 308\u001b[0m \"\"\"Call the model's `forward` function, returning the output and a\n\u001b[1;32m 309\u001b[0m callback to compute the gradients via backpropagation.\"\"\"\n\u001b[0;32m--> 310\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mis_train\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mis_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 311\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 312\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0minitialize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mInT\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mOutT\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;34m\"Model\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/thinc/layers/chain.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(model, X, is_train)\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[0mcallbacks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 53\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mlayer\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlayers\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 54\u001b[0;31m \u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minc_layer_grad\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlayer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mis_train\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mis_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 55\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minc_layer_grad\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/thinc/model.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, X, is_train)\u001b[0m\n\u001b[1;32m 308\u001b[0m \"\"\"Call the model's `forward` function, returning the output and a\n\u001b[1;32m 309\u001b[0m callback to compute the gradients via backpropagation.\"\"\"\n\u001b[0;32m--> 310\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mis_train\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mis_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 311\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 312\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0minitialize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mInT\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mOutT\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;34m\"Model\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/thinc/layers/chain.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(model, X, is_train)\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[0mcallbacks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 53\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mlayer\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlayers\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 54\u001b[0;31m \u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minc_layer_grad\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlayer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mis_train\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mis_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 55\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minc_layer_grad\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/thinc/model.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, X, is_train)\u001b[0m\n\u001b[1;32m 308\u001b[0m \"\"\"Call the model's `forward` function, returning the output and a\n\u001b[1;32m 309\u001b[0m callback to compute the gradients via backpropagation.\"\"\"\n\u001b[0;32m--> 310\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mis_train\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mis_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 311\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 312\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0minitialize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mInT\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mOutT\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;34m\"Model\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/spacy_transformers/layers/transformer_model.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(model, docs, is_train)\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[0mwordpieces\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malign\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtokenizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_max_length\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 184\u001b[0m )\n\u001b[0;32m--> 185\u001b[0;31m \u001b[0mmodel_output\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbp_tensors\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtransformer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mwordpieces\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mis_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 186\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"logger\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mattrs\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 187\u001b[0m \u001b[0mlog_gpu_memory\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mattrs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"logger\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"after forward\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/thinc/model.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, X, is_train)\u001b[0m\n\u001b[1;32m 308\u001b[0m \"\"\"Call the model's `forward` function, returning the output and a\n\u001b[1;32m 309\u001b[0m callback to compute the gradients via backpropagation.\"\"\"\n\u001b[0;32m--> 310\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mis_train\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mis_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 311\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 312\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0minitialize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mInT\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mOutT\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;34m\"Model\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/thinc/layers/pytorchwrapper.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(model, X, is_train)\u001b[0m\n\u001b[1;32m 223\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 224\u001b[0m \u001b[0mXtorch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mget_dX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconvert_inputs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mis_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 225\u001b[0;31m \u001b[0mYtorch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtorch_backprop\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshims\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mXtorch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mis_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 226\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mget_dYtorch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconvert_outputs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mYtorch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mis_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 227\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/thinc/shims/pytorch.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, inputs, is_train)\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbegin_update\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 96\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 97\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m...\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 98\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 99\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/thinc/shims/pytorch.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mno_grad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 114\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcuda\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mamp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautocast\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_mixed_precision\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 115\u001b[0;31m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 116\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 117\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1192\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m 1193\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1194\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1195\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1196\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m 861\u001b[0m \u001b[0moutput_attentions\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moutput_attentions\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 862\u001b[0m \u001b[0moutput_hidden_states\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moutput_hidden_states\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 863\u001b[0;31m \u001b[0mreturn_dict\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mreturn_dict\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 864\u001b[0m )\n\u001b[1;32m 865\u001b[0m \u001b[0msequence_output\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mencoder_outputs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1192\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m 1193\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1194\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1195\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1196\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m 532\u001b[0m \u001b[0mencoder_attention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 533\u001b[0m \u001b[0mpast_key_value\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 534\u001b[0;31m \u001b[0moutput_attentions\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 535\u001b[0m )\n\u001b[1;32m 536\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1192\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m 1193\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1194\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1195\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1196\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)\u001b[0m\n\u001b[1;32m 453\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 454\u001b[0m layer_output = apply_chunking_to_forward(\n\u001b[0;32m--> 455\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeed_forward_chunk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mchunk_size_feed_forward\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseq_len_dim\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattention_output\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 456\u001b[0m )\n\u001b[1;32m 457\u001b[0m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mlayer_output\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/transformers/pytorch_utils.py\u001b[0m in \u001b[0;36mapply_chunking_to_forward\u001b[0;34m(forward_fn, chunk_size, chunk_dim, *input_tensors)\u001b[0m\n\u001b[1;32m 244\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_chunks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdim\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mchunk_dim\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 245\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 246\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput_tensors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 247\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 248\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py\u001b[0m in \u001b[0;36mfeed_forward_chunk\u001b[0;34m(self, attention_output)\u001b[0m\n\u001b[1;32m 464\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 465\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfeed_forward_chunk\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattention_output\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 466\u001b[0;31m \u001b[0mintermediate_output\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mintermediate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mattention_output\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 467\u001b[0m \u001b[0mlayer_output\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mintermediate_output\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mattention_output\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 468\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mlayer_output\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1192\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m 1193\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1194\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1195\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1196\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/transformers/models/roberta/modeling_roberta.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states)\u001b[0m\n\u001b[1;32m 362\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 363\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 364\u001b[0;31m \u001b[0mhidden_states\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdense\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhidden_states\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 365\u001b[0m \u001b[0mhidden_states\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mintermediate_act_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhidden_states\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 366\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1192\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m 1193\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1194\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1195\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1196\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/torch/nn/modules/linear.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 114\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 115\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 116\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mextra_repr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
" \"\"\"Entry point for launching an IPython kernel.\n"
]
}
],
"source": [
"comment_phab_df['text'] = comment_phab_df['comment_text'].apply(str)\n",
"comment_phab_df['resolved_text'] = comment_phab_df['text'].apply(resolving_comment)"
"only_in_comment_phab_df['text'] = only_in_comment_phab_df['comment_text'].apply(str)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 20,
"id": "79e3f7e2",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Token indices sequence length is longer than the specified maximum sequence length for this model (546 > 512). Running this sequence through the model will result in indexing errors\n",
"Token indices sequence length is longer than the specified maximum sequence length for this model (546 > 512). Running this sequence through the model will result in indexing errors\n",
"Token indices sequence length is longer than the specified maximum sequence length for this model (554 > 512). Running this sequence through the model will result in indexing errors\n",
"/gscratch/scrubbed/mjilg/envs/coref-notebook/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
}
],
"source": [
"only_in_comment_phab_df['resolved_text'] = only_in_comment_phab_df['text'].apply(resolving_comment)\n",
"only_in_comment_phab_df.to_csv(\"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/050825_coref_rel_phab_stragglers.csv\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "2b583feb-1c62-4c96-9ba0-2996d72e70d3",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"7423 [Backport was merged into 1.24wmf16 upon a tim...\n",
"7902 I guess this can be closed now as RESOLVED WOR...\n",
"7905 The upstream issue is https://github.com/jcgre...\n",
"7906 An update on this. In Amsterdam we found at th...\n",
"7907 Yes. It's used by people using pywikibot-as-a-...\n",
" ... \n",
"14465 I amended the title to the range IE8-10 becaus...\n",
"14466 If I remember correctly this problem was at le...\n",
"14467 If I remember correctly this problem was at le...\n",
"14468 After a quick test, autocomplete seems to work...\n",
"14478 Still not merged, so we can't really do much.\n",
"Name: resolved_text, Length: 862, dtype: object"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"comment_phab_df['resolved_text'][46088]"
"only_in_comment_phab_df['resolved_text']"
]
},
{
@ -367,7 +751,7 @@
"metadata": {},
"outputs": [],
"source": [
"comment_phab_df.to_csv(\"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/041525_coref_rel_phab_comments.csv\", index=False)"
"only_in_comment_phab_df.to_csv(\"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/050825_coref_rel_phab_stragglers.csv\", index=False)"
]
}
],
@ -387,7 +771,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.12"
"version": "3.11.11"
}
},
"nbformat": 4,

View File

@ -518,7 +518,7 @@
"outputs": [],
"source": [
"task_phab_df\n",
"task_phab_df.to_csv(\"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/phab_tasks.csv\", index=False)"
"#task_phab_df.to_csv(\"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/phab_tasks.csv\", index=False)"
]
},
{
@ -981,7 +981,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
"version": "3.11.11"
}
},
"nbformat": 4,

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@ -10,9 +10,9 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
"/gscratch/scrubbed/mjilg/envs/coref-notebook/lib/python3.7/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n",
"/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML\n",
"/gscratch/scrubbed/mjilg/envs/coref-notebook/lib/python3.7/site-packages/torch/cuda/__init__.py:497: UserWarning: Can't initialize NVML\n",
" warnings.warn(\"Can't initialize NVML\")\n"
]
}
@ -29,7 +29,7 @@
"metadata": {},
"outputs": [],
"source": [
"phab_path = \"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/0415_http_phab_comments.csv\"\n",
"phab_path = \"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/0422_http_phab_comments.csv\"\n",
"phab_df = pd.read_csv(phab_path)"
]
},
@ -61,7 +61,7 @@
" if \"tls\" in word.lower():\n",
" return True\n",
" #cert\n",
" if word.lower().startswith(\"cert\"):\n",
" if word.lower().startswith(\"cert\") and not word.lower().startswith(\"certain\"):\n",
" return True\n",
" return False\n",
"\n",
@ -84,12 +84,12 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/ipykernel_launcher.py:41: SettingWithCopyWarning: \n",
"/gscratch/scrubbed/mjilg/envs/coref-notebook/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
"/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/ipykernel_launcher.py:44: SettingWithCopyWarning: \n",
"/gscratch/scrubbed/mjilg/envs/coref-notebook/lib/python3.7/site-packages/ipykernel_launcher.py:45: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
@ -114,10 +114,11 @@
" 'isGerrit': 'meta.gerrit'\n",
"})\n",
"\n",
"# after 04-01-2015 before 10-1-2015\n",
"# after 10-01-2014 before 10-01-2015\n",
"phab_df['timestamp'] = pd.to_datetime(phab_df['date_created'], unit='s', origin='unix', utc=True)\n",
"filtered_phab_df = phab_df[(phab_df['date_created'] < 1443657600) & (phab_df['date_created'] > 1427846400)]\n",
"#filtered_phab_df = phab_df[(phab_df['date_created'] < 1381691276) & (phab_df['date_created'] > 1379975444)]\n",
"#filtered_phab_df = phab_df[(phab_df['date_created'] < 1443743999) & (phab_df['date_created'] >= 1412207999)]\n",
"# after 07-01-2013 before 10-01-2015\n",
"filtered_phab_df = phab_df[(phab_df['date_created'] < 1443743999) & (phab_df['date_created'] > 1372636800)]\n",
"\n",
"#removing headless conversations\n",
"task_phab_df = filtered_phab_df[filtered_phab_df['comment_type']==\"task_description\"]\n",
@ -156,8 +157,314 @@
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>task_title</th>\n",
" <th>comment_text</th>\n",
" <th>date_created</th>\n",
" <th>speaker</th>\n",
" <th>meta.affil</th>\n",
" <th>conversation_id</th>\n",
" <th>comment_type</th>\n",
" <th>status</th>\n",
" <th>meta.gerrit</th>\n",
" <th>id</th>\n",
" <th>reply_to</th>\n",
" <th>timestamp</th>\n",
" <th>is_relevant</th>\n",
" <th>is_migrated</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>197</th>\n",
" <td>Creation of instances broken</td>\n",
" <td>After a replace of old instances, it is not po...</td>\n",
" <td>1442753295</td>\n",
" <td>PHID-USER-qlodcndtwpolbdhncjis</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-pitdrld6mszruqmc6usf</td>\n",
" <td>task_description</td>\n",
" <td>resolved</td>\n",
" <td>False</td>\n",
" <td>198</td>\n",
" <td>NaN</td>\n",
" <td>2015-09-20 12:48:15+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>198</th>\n",
" <td>Creation of instances broken</td>\n",
" <td>Works now.</td>\n",
" <td>1442864673</td>\n",
" <td>PHID-USER-qlodcndtwpolbdhncjis</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-pitdrld6mszruqmc6usf</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>199</td>\n",
" <td>198.0</td>\n",
" <td>2015-09-21 19:44:33+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>199</th>\n",
" <td>Creation of instances broken</td>\n",
" <td>Ok, the instances are deleted now, I will recr...</td>\n",
" <td>1442864271</td>\n",
" <td>PHID-USER-qlodcndtwpolbdhncjis</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-pitdrld6mszruqmc6usf</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>200</td>\n",
" <td>199.0</td>\n",
" <td>2015-09-21 19:37:51+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>200</th>\n",
" <td>Creation of instances broken</td>\n",
" <td>The new instances have the same names as recen...</td>\n",
" <td>1442854156</td>\n",
" <td>PHID-USER-22bsa5u75jz3ci3wnplu</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-pitdrld6mszruqmc6usf</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>201</td>\n",
" <td>200.0</td>\n",
" <td>2015-09-21 16:49:16+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>201</th>\n",
" <td>Creation of instances broken</td>\n",
" <td>This happens also with jessie and presice inst...</td>\n",
" <td>1442835238</td>\n",
" <td>PHID-USER-qlodcndtwpolbdhncjis</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-pitdrld6mszruqmc6usf</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>202</td>\n",
" <td>201.0</td>\n",
" <td>2015-09-21 11:33:58+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>406887</th>\n",
" <td>Allow login using mosh as an alternative to pl...</td>\n",
" <td>*** Bug 49454 has been marked as a duplicate o...</td>\n",
" <td>1379011061</td>\n",
" <td>PHID-USER-2nnm76h4ykalvvref2ye</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-hnwvtmwgpm2oisoqaozt</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>406888</td>\n",
" <td>406887.0</td>\n",
" <td>2013-09-12 18:37:41+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>406888</th>\n",
" <td>Allow login using mosh as an alternative to pl...</td>\n",
" <td>JFTR, on Tools mosh-server processes eat up to...</td>\n",
" <td>1376245807</td>\n",
" <td>PHID-USER-vk6mlmacfhx77egryy5i</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-hnwvtmwgpm2oisoqaozt</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>406889</td>\n",
" <td>406888.0</td>\n",
" <td>2013-08-11 18:30:07+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>406889</th>\n",
" <td>Allow login using mosh as an alternative to pl...</td>\n",
" <td>This is supported on tools, but adding it to t...</td>\n",
" <td>1376185312</td>\n",
" <td>PHID-USER-h75guknmwivm6x37iute</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-hnwvtmwgpm2oisoqaozt</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>406890</td>\n",
" <td>406889.0</td>\n",
" <td>2013-08-11 01:41:52+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>406890</th>\n",
" <td>Allow login using mosh as an alternative to pl...</td>\n",
" <td>Just found out that mosh already works for too...</td>\n",
" <td>1376118400</td>\n",
" <td>PHID-USER-5dqihbanu3caaj7pigif</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-hnwvtmwgpm2oisoqaozt</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>406891</td>\n",
" <td>406890.0</td>\n",
" <td>2013-08-10 07:06:40+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>406891</th>\n",
" <td>Allow login using mosh as an alternative to pl...</td>\n",
" <td>(In reply to comment #0)\\n&gt; ssh is quite painf...</td>\n",
" <td>1376118251</td>\n",
" <td>PHID-USER-6vzzsmi22zem6yttr6vp</td>\n",
" <td>False</td>\n",
" <td>PHID-TASK-hnwvtmwgpm2oisoqaozt</td>\n",
" <td>task_subcomment</td>\n",
" <td>NaN</td>\n",
" <td>False</td>\n",
" <td>406892</td>\n",
" <td>406891.0</td>\n",
" <td>2013-08-10 07:04:11+00:00</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>14490 rows × 14 columns</p>\n",
"</div>"
],
"text/plain": [
"5657"
" task_title \\\n",
"197 Creation of instances broken \n",
"198 Creation of instances broken \n",
"199 Creation of instances broken \n",
"200 Creation of instances broken \n",
"201 Creation of instances broken \n",
"... ... \n",
"406887 Allow login using mosh as an alternative to pl... \n",
"406888 Allow login using mosh as an alternative to pl... \n",
"406889 Allow login using mosh as an alternative to pl... \n",
"406890 Allow login using mosh as an alternative to pl... \n",
"406891 Allow login using mosh as an alternative to pl... \n",
"\n",
" comment_text date_created \\\n",
"197 After a replace of old instances, it is not po... 1442753295 \n",
"198 Works now. 1442864673 \n",
"199 Ok, the instances are deleted now, I will recr... 1442864271 \n",
"200 The new instances have the same names as recen... 1442854156 \n",
"201 This happens also with jessie and presice inst... 1442835238 \n",
"... ... ... \n",
"406887 *** Bug 49454 has been marked as a duplicate o... 1379011061 \n",
"406888 JFTR, on Tools mosh-server processes eat up to... 1376245807 \n",
"406889 This is supported on tools, but adding it to t... 1376185312 \n",
"406890 Just found out that mosh already works for too... 1376118400 \n",
"406891 (In reply to comment #0)\\n> ssh is quite painf... 1376118251 \n",
"\n",
" speaker meta.affil \\\n",
"197 PHID-USER-qlodcndtwpolbdhncjis False \n",
"198 PHID-USER-qlodcndtwpolbdhncjis False \n",
"199 PHID-USER-qlodcndtwpolbdhncjis False \n",
"200 PHID-USER-22bsa5u75jz3ci3wnplu False \n",
"201 PHID-USER-qlodcndtwpolbdhncjis False \n",
"... ... ... \n",
"406887 PHID-USER-2nnm76h4ykalvvref2ye False \n",
"406888 PHID-USER-vk6mlmacfhx77egryy5i False \n",
"406889 PHID-USER-h75guknmwivm6x37iute False \n",
"406890 PHID-USER-5dqihbanu3caaj7pigif False \n",
"406891 PHID-USER-6vzzsmi22zem6yttr6vp False \n",
"\n",
" conversation_id comment_type status \\\n",
"197 PHID-TASK-pitdrld6mszruqmc6usf task_description resolved \n",
"198 PHID-TASK-pitdrld6mszruqmc6usf task_subcomment NaN \n",
"199 PHID-TASK-pitdrld6mszruqmc6usf task_subcomment NaN \n",
"200 PHID-TASK-pitdrld6mszruqmc6usf task_subcomment NaN \n",
"201 PHID-TASK-pitdrld6mszruqmc6usf task_subcomment NaN \n",
"... ... ... ... \n",
"406887 PHID-TASK-hnwvtmwgpm2oisoqaozt task_subcomment NaN \n",
"406888 PHID-TASK-hnwvtmwgpm2oisoqaozt task_subcomment NaN \n",
"406889 PHID-TASK-hnwvtmwgpm2oisoqaozt task_subcomment NaN \n",
"406890 PHID-TASK-hnwvtmwgpm2oisoqaozt task_subcomment NaN \n",
"406891 PHID-TASK-hnwvtmwgpm2oisoqaozt task_subcomment NaN \n",
"\n",
" meta.gerrit id reply_to timestamp is_relevant \\\n",
"197 False 198 NaN 2015-09-20 12:48:15+00:00 True \n",
"198 False 199 198.0 2015-09-21 19:44:33+00:00 True \n",
"199 False 200 199.0 2015-09-21 19:37:51+00:00 True \n",
"200 False 201 200.0 2015-09-21 16:49:16+00:00 True \n",
"201 False 202 201.0 2015-09-21 11:33:58+00:00 True \n",
"... ... ... ... ... ... \n",
"406887 False 406888 406887.0 2013-09-12 18:37:41+00:00 True \n",
"406888 False 406889 406888.0 2013-08-11 18:30:07+00:00 True \n",
"406889 False 406890 406889.0 2013-08-11 01:41:52+00:00 True \n",
"406890 False 406891 406890.0 2013-08-10 07:06:40+00:00 True \n",
"406891 False 406892 406891.0 2013-08-10 07:04:11+00:00 True \n",
"\n",
" is_migrated \n",
"197 False \n",
"198 False \n",
"199 False \n",
"200 False \n",
"201 False \n",
"... ... \n",
"406887 False \n",
"406888 False \n",
"406889 False \n",
"406890 False \n",
"406891 False \n",
"\n",
"[14490 rows x 14 columns]"
]
},
"execution_count": 5,
@ -166,22 +473,62 @@
}
],
"source": [
"len(comment_phab_df)"
"comment_phab_df"
]
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 12,
"id": "930c4d9c",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/gscratch/scrubbed/mjilg/envs/coref-notebook/lib/python3.7/site-packages/ipykernel_launcher.py:3: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" This is separate from the ipykernel package so we can avoid doing imports until\n"
]
},
{
"data": {
"text/plain": [
"862"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prior_path = \"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/0050825_coref-rel-first.csv\"\n",
"prior_df = pd.read_csv(prior_path)\n",
"comment_phab_df['timestamp'] = pd.to_datetime(comment_phab_df['timestamp'], utc=True)\n",
"prior_df['timestamp'] = pd.to_datetime(prior_df['timestamp'], utc=True)\n",
"merged_df = comment_phab_df.merge(prior_df, how='outer', indicator=True)\n",
"len(merged_df)\n",
"only_in_comment_phab_df = merged_df[merged_df['_merge'] == 'left_only']\n",
"len(only_in_comment_phab_df)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "f32f6eed-3aeb-4b05-8d40-7ed85e7235c5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<spacy_experimental.coref.span_resolver_component.SpanResolver at 0x14e629c449f0>"
"<spacy_experimental.coref.span_resolver_component.SpanResolver at 0x154d9952a7c0>"
]
},
"execution_count": 6,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@ -202,7 +549,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 14,
"id": "a5b062d8-2d26-4a3e-a84c-ba0eaf6eb436",
"metadata": {},
"outputs": [],
@ -216,15 +563,28 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 15,
"id": "424d35e0",
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"data": {
"text/plain": [
"John is frustrated with the VisualEditor project, he thinks it doesn't work."
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"doc"
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 16,
"id": "999e1656-0036-4ba2-bedf-f54493f67790",
"metadata": {},
"outputs": [],
@ -270,7 +630,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 17,
"id": "be476647-624b-4e95-ab62-9c6b08f85368",
"metadata": {},
"outputs": [],
@ -283,7 +643,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 18,
"id": "a9628b54-a1df-49cd-a365-9cba59de3421",
"metadata": {},
"outputs": [
@ -293,7 +653,7 @@
"'i hate ve.interface, ve.interface always messes up i browser'"
]
},
"execution_count": 9,
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
@ -304,7 +664,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 19,
"id": "46873641-8e88-4829-9e24-4dd5e6749bd1",
"metadata": {},
"outputs": [
@ -312,7 +672,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
"/gscratch/scrubbed/mjilg/envs/coref-notebook/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
@ -322,12 +682,12 @@
}
],
"source": [
"comment_phab_df['text'] = comment_phab_df['comment_text'].apply(str)"
"only_in_comment_phab_df['text'] = only_in_comment_phab_df['comment_text'].apply(str)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 20,
"id": "79e3f7e2",
"metadata": {},
"outputs": [
@ -335,24 +695,53 @@
"name": "stderr",
"output_type": "stream",
"text": [
"Token indices sequence length is longer than the specified maximum sequence length for this model (712 > 512). Running this sequence through the model will result in indexing errors\n",
"Token indices sequence length is longer than the specified maximum sequence length for this model (712 > 512). Running this sequence through the model will result in indexing errors\n",
"Token indices sequence length is longer than the specified maximum sequence length for this model (572 > 512). Running this sequence through the model will result in indexing errors\n"
"Token indices sequence length is longer than the specified maximum sequence length for this model (546 > 512). Running this sequence through the model will result in indexing errors\n",
"Token indices sequence length is longer than the specified maximum sequence length for this model (546 > 512). Running this sequence through the model will result in indexing errors\n",
"Token indices sequence length is longer than the specified maximum sequence length for this model (554 > 512). Running this sequence through the model will result in indexing errors\n",
"/gscratch/scrubbed/mjilg/envs/coref-notebook/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
}
],
"source": [
"comment_phab_df['resolved_text'] = comment_phab_df['text'].apply(resolving_comment)"
"only_in_comment_phab_df['resolved_text'] = only_in_comment_phab_df['text'].apply(resolving_comment)\n",
"only_in_comment_phab_df.to_csv(\"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/050825_coref_rel_phab_stragglers.csv\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 33,
"id": "2b583feb-1c62-4c96-9ba0-2996d72e70d3",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"7423 [Backport was merged into 1.24wmf16 upon a tim...\n",
"7902 I guess this can be closed now as RESOLVED WOR...\n",
"7905 The upstream issue is https://github.com/jcgre...\n",
"7906 An update on this. In Amsterdam we found at th...\n",
"7907 Yes. It's used by people using pywikibot-as-a-...\n",
" ... \n",
"14465 I amended the title to the range IE8-10 becaus...\n",
"14466 If I remember correctly this problem was at le...\n",
"14467 If I remember correctly this problem was at le...\n",
"14468 After a quick test, autocomplete seems to work...\n",
"14478 Still not merged, so we can't really do much.\n",
"Name: resolved_text, Length: 862, dtype: object"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"comment_phab_df['resolved_text'][46088]"
"only_in_comment_phab_df['resolved_text']"
]
},
{
@ -362,7 +751,7 @@
"metadata": {},
"outputs": [],
"source": [
"comment_phab_df.to_csv(\"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/041525_coref_rel_phab_comments.csv\", index=False)"
"only_in_comment_phab_df.to_csv(\"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/050825_coref_rel_phab_stragglers.csv\", index=False)"
]
}
],
@ -382,7 +771,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.12"
"version": "3.11.11"
}
},
"nbformat": 4,