updated work for some m2 writing tomorrow

2025-05-24 16:59:03 -07:00 · 2025-05-24 16:59:03 -07:00 · fd1479775d
commit fd1479775d
parent 3573afbc1a
14 changed files with 1309 additions and 944 deletions
--- a/.sh_history
+++ b/.sh_history
@ -173,3 +173,6 @@ ls ../case3
 cd ..
 ls
 ls case1
+ls
+cd case2
+ls
--- a/commit_analysis/plotting/ww-bots-plot-script.R
+++ b/commit_analysis/plotting/ww-bots-plot-script.R
@ -46,35 +46,35 @@ new_unaff_authors <- new_authors_long_df |>
             fill=commit_seniority)) +
  geom_col(position='dodge') +
  labs(x = "Relative Week", y = "Commits", fill="Contributor Tenure (New contributors <= 5 commits before deployment announcement)") +
-  geom_vline(data = long_df |> filter(source == "c1"), 
+  geom_vline(data = combined_df |> filter(source == "c1"), 
             aes(xintercept = -29), 
-             linetype = "dotted", color = "black", linewidth = 1) +
-  geom_vline(data = long_df |> filter(source == "c1"), 
+             linetype = "dotted", color = "black", linewidth = 0.5) +
+  geom_vline(data = combined_df |> filter(source == "c1"), 
             aes(xintercept = -9), 
-             linetype = "dotted", color = "black", linewidth = 1) +
+             linetype = "dotted", color = "black", linewidth = 0.5) +
  geom_vline(data = combined_df |> filter(source == "c1"), 
             aes(xintercept = -4), 
-             linetype = "3313", color = "black", linewidth = 1) +
-  geom_vline(data = long_df |> filter(source == "c2"), 
-             aes(xintercept = -99), 
-             linetype = "dotted", color = "black", linewidth = 1) +
+             linetype = "3313", color = "black", linewidth = 0.5) +
  geom_vline(data = combined_df |> filter(source == "c2"), 
-             aes(xintercept = -4), 
-             linetype = "3313", color = "black", linewidth = 1) +
-  geom_vline(data = long_df |> filter(source == "c3"), 
+             aes(xintercept = -99), 
+             linetype = "dotted", color = "black", linewidth = 0.5) +
+  geom_vline(data = combined_df |> filter(source == "c2"), 
+             aes(xintercept = -4),   
+             linetype = "3313", color = "black", linewidth = 0.5) +
+  geom_vline(data = combined_df |> filter(source == "c3"), 
             aes(xintercept = -97), 
-             linetype = "dotted", color = "black", linewidth = 1) +
+             linetype = "dotted", color = "black", linewidth = 0.5) +
  geom_vline(data = combined_df |> filter(source == "c3"), 
             aes(xintercept = -3), 
-             linetype = "3313", color = "black", linewidth = 1) +
-  geom_text(data = data.frame(source = "c1", relative_week = -40, lengthened_commit_count = 90), 
+             linetype = "3313", color = "black", linewidth = 0.5) +
+  geom_text(data = data.frame(source = "c1", relative_week = -39, lengthened_commit_count = 80), 
            aes(x = relative_week, y = lengthened_commit_count, label = "Opt-In Testing Deployment"), 
            inherit.aes = FALSE, color = "black", size = 4) +
-  geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 1) + # Add vertical line at week 0
-  geom_text(data = data.frame(source = "c1", relative_week = 7, lengthened_commit_count = 90), 
+  geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) + # Add vertical line at week 0
+  geom_text(data = data.frame(source = "c2", relative_week = 7, lengthened_commit_count = 80), 
            aes(x = relative_week, y = lengthened_commit_count, label = "Wide Deployment"), 
            inherit.aes = FALSE, color = "black", size = 4) +
-  geom_text(data = data.frame(source = "c3", relative_week = -15, lengthened_commit_count = 100), 
+  geom_text(data = data.frame(source = "c3", relative_week = -15, lengthened_commit_count = 80), 
            aes(x = relative_week, y = lengthened_commit_count, label = "Wide Deployment Announcement"), 
            inherit.aes = FALSE, color = "black", size = 4) +
  scale_fill_manual(values = c("returning_unaff_commit_count" = "#FFC107",  # Color for "Returning Contributors"
@ -96,14 +96,14 @@ new_unaff_authors <- new_authors_long_df |>
    strip.text = element_text(size = 14)# Increase legend title font size
  ) +
  facet_wrap(~source, nrow = 3, labeller = labeller(source = c(
-    "c1" = "VisualEditor",
-    "c2" = "HTTPS-as-default",
-    "c3" = "HTTP-deprecation"
+    "c1" = "VisualEditor (2013)",
+    "c2" = "HTTPS-as-default (2013)",
+    "c3" = "HTTP-deprecation (2015)"
  )))

 new_unaff_authors

-ggsave(filename = "ww-0501-bot-commits-faceted.png", plot = new_unaff_authors, width = 15, height = 9, dpi = 800)
+ggsave(filename = "d1-m2-bot-commits-faceted.png", plot = new_unaff_authors, width = 15, height = 9, dpi = 800)

 unaff_authors <- new_authors_long_df |>
  ggplot(aes(x=relative_week,
--- a/commit_analysis/plotting/ww-plot-script.R
+++ b/commit_analysis/plotting/ww-plot-script.R
@ -38,25 +38,37 @@ commit_authors <- long_df |>
             fill=factor(commit_type))) +
  geom_col(position='dodge') +
  labs(x = "Relative Week", y = "Commits", fill="Commit Author") +
-  geom_vline(data = long_df |> filter(source == "c1"), 
+  geom_vline(data = combined_df |> filter(source == "c1"), 
             aes(xintercept = -29), 
-             linetype = "dotted", color = "black", linewidth = 1) +
-  geom_vline(data = long_df |> filter(source == "c1"), 
+             linetype = "dotted", color = "black", linewidth = 0.5) +
+  geom_vline(data = combined_df |> filter(source == "c1"), 
             aes(xintercept = -9), 
-             linetype = "dotted", color = "black", linewidth = 1) +
-  geom_vline(data = long_df |> filter(source == "c2"), 
+             linetype = "dotted", color = "black", linewidth = 0.5) +
+  geom_vline(data = combined_df |> filter(source == "c1"), 
+             aes(xintercept = -4), 
+             linetype = "3313", color = "black", linewidth = 0.5) +
+  geom_vline(data = combined_df |> filter(source == "c2"), 
             aes(xintercept = -99), 
-             linetype = "dotted", color = "black", linewidth = 1) +
-  geom_vline(data = long_df |> filter(source == "c3"), 
+             linetype = "dotted", color = "black", linewidth = 0.5) +
+  geom_vline(data = combined_df |> filter(source == "c2"), 
+             aes(xintercept = -4),   
+             linetype = "3313", color = "black", linewidth = 0.5) +
+  geom_vline(data = combined_df |> filter(source == "c3"), 
             aes(xintercept = -97), 
-             linetype = "dotted", color = "black", linewidth = 1) +
-  geom_text(data = data.frame(source = "c1", relative_week = -40, lengthened_commit_count = 50), 
+             linetype = "dotted", color = "black", linewidth = 0.5) +
+  geom_vline(data = combined_df |> filter(source == "c3"), 
+             aes(xintercept = -3), 
+             linetype = "3313", color = "black", linewidth = 0.5) +
+  geom_text(data = data.frame(source = "c1", relative_week = -39, lengthened_commit_count = 50), 
            aes(x = relative_week, y = lengthened_commit_count, label = "Opt-In Testing Deployment"), 
            inherit.aes = FALSE, color = "black", size = 4) +
-  geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 1) + # Add vertical line at week 0
-  geom_text(data = data.frame(source = "c1", relative_week = 7, lengthened_commit_count = 50), 
+  geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) + # Add vertical line at week 0
+  geom_text(data = data.frame(source = "c2", relative_week = 7, lengthened_commit_count = 50), 
            aes(x = relative_week, y = lengthened_commit_count, label = "Wide Deployment"), 
            inherit.aes = FALSE, color = "black", size = 4) +
+  geom_text(data = data.frame(source = "c3", relative_week = -15, lengthened_commit_count = 50), 
+            aes(x = relative_week, y = lengthened_commit_count, label = "Wide Deployment Announcement"), 
+            inherit.aes = FALSE, color = "black", size = 4) +
  scale_fill_manual(values = affiliationColors,
                    labels = c("unaff_commit_count" = "Unaffiliated",
                               "wikimedia_commit_count" = "WMF-affiliated")) +
@ -74,10 +86,10 @@ commit_authors <- long_df |>
    strip.text = element_text(size = 14)# Increase legend title font size
  ) +
  facet_wrap(~source, nrow = 3, labeller = labeller(source = c(
-    "c1" = "VisualEditor (commits to extensions/visualeditor)",
-    "c2" = "HTTPS-as-default (relevant commits to mediawiki/core)",
-    "c3" = "HTTP-deprecation (relevant commits to mediawiki/core)"
+    "c1" = "VisualEditor (2013) [commits to extensions/visualeditor]",
+    "c2" = "HTTPS-as-default (2013) [relevant commits to mediawiki/core]",
+    "c3" = "HTTP-deprecation (2015) [relevant commits to mediawiki/core]"
  )))
 commit_authors

-ggsave(filename = "ww-0501-commits-faceted.png", plot = commit_authors, width = 15, height = 9, dpi = 800)
+ggsave(filename = "d1-m2-commits-faceted.png", plot = commit_authors, width = 15, height = 9, dpi = 800)
--- a/m2-figures/d1-m2-bot-commits-faceted.png
+++ b/m2-figures/d1-m2-bot-commits-faceted.png
--- a/m2-figures/d1-m2-commits-faceted.png
+++ b/m2-figures/d1-m2-commits-faceted.png
--- a/m2-figures/d1-m2-tasks-faceted.png
+++ b/m2-figures/d1-m2-tasks-faceted.png
--- a/mgaughan-rstudio-server_26402644.out
+++ b/mgaughan-rstudio-server_26402644.out
@ -1,18 +1,17 @@
 1. SSH tunnel from your workstation using the following command:

-   ssh -N -L 8787:n3439:39175 mjilg@klone.hyak.uw.edu
+   ssh -N -L 8787:n3439:38329 mjilg@klone.hyak.uw.edu

   and point your web browser to http://localhost:8787

 2. log in to RStudio Server using the following credentials:

   user: mjilg
-   password: twImEJor5ex498HTzJjx
+   password: YXXLCjS/064zAiagiRdx

 When done using RStudio Server, terminate the job by:

 1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
 2. Issue the following command on the login node:

-      scancel -f 25681892
-slurmstepd: error: *** JOB 25681892 ON n3439 CANCELLED AT 2025-05-01T23:08:23 DUE TO TIME LIMIT ***
+      scancel -f 26402644
--- a/phab_analysis/case2/040425_phab_comments.ipynb
+++ b/phab_analysis/case2/040425_phab_comments.ipynb
@ -80,13 +80,13 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "/tmp/ipykernel_55861/3758790231.py:41: SettingWithCopyWarning: \n",
+      "/tmp/ipykernel_76053/3758790231.py:41: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  mid_comment_phab_df['is_relevant'] = mid_comment_phab_df['conversation_id'].isin(relevant_conversation_ids)\n",
-      "/tmp/ipykernel_55861/3758790231.py:44: SettingWithCopyWarning: \n",
+      "/tmp/ipykernel_76053/3758790231.py:44: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
@ -148,7 +148,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
   "id": "942344db-c8f5-4ed6-a757-c97f8454f18b",
   "metadata": {},
   "outputs": [
@ -172,6 +172,29 @@
    "print(f\"Unique speakers: {unique_speakers}\")"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "b9229ca3-afb9-4eec-a173-f30be8c4729b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "given_date = pd.Timestamp(\"2013-08-28\").tz_localize(None)\n",
+    "task_phab_df['timestamp'] = pd.to_datetime(task_phab_df['timestamp'], unit='s').dt.tz_localize(None)\n",
+    "task_phab_df['week_bin'] = ((task_phab_df['timestamp'] - given_date).dt.days // 7)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "24205386-d18f-4fb7-b37d-e81c0a5ba532",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "task_phab_df\n",
+    "task_phab_df.to_csv(\"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/phab_tasks.csv\", index=False)"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 7,
@ -1024,7 +1047,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.7.12"
+   "version": "3.11.11"
  }
 },
 "nbformat": 4,
--- a/phab_analysis/case2/041525_resolved_phab_comments.ipynb
+++ b/phab_analysis/case2/041525_resolved_phab_comments.ipynb
--- a/phab_analysis/case2/c2_resolved_phab.ipynb
+++ b/phab_analysis/case2/c2_resolved_phab.ipynb
--- a/phab_analysis/case2/coref_resolution-https.ipynb
+++ b/phab_analysis/case2/coref_resolution-https.ipynb
@ -24,7 +24,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "id": "f6448c6f-2b5d-45f5-a32e-b3b47c16ef85",
   "metadata": {},
   "outputs": [],
@ -35,7 +35,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "id": "e30e81ad",
   "metadata": {},
   "outputs": [],
@ -75,7 +75,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
   "id": "f359805f",
   "metadata": {},
   "outputs": [
@ -149,7 +149,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
   "id": "ffd0b263",
   "metadata": {},
   "outputs": [
@ -175,17 +175,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 9,
   "id": "f32f6eed-3aeb-4b05-8d40-7ed85e7235c5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "<spacy_experimental.coref.span_resolver_component.SpanResolver at 0x1495ecba4bb0>"
+       "<spacy_experimental.coref.span_resolver_component.SpanResolver at 0x14cab225fd00>"
      ]
     },
-     "execution_count": 6,
+     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -206,7 +206,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 10,
   "id": "a5b062d8-2d26-4a3e-a84c-ba0eaf6eb436",
   "metadata": {},
   "outputs": [],
@ -220,26 +220,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
-   "id": "424d35e0",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "John is frustrated with the VisualEditor project, he thinks it doesn't work."
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 11,
   "id": "999e1656-0036-4ba2-bedf-f54493f67790",
   "metadata": {},
   "outputs": [],
@ -285,7 +266,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 12,
   "id": "be476647-624b-4e95-ab62-9c6b08f85368",
   "metadata": {},
   "outputs": [],
@ -298,7 +279,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 13,
   "id": "a9628b54-a1df-49cd-a365-9cba59de3421",
   "metadata": {},
   "outputs": [
@ -308,7 +289,7 @@
       "'i hate ve.interface, ve.interface always messes up i browser'"
      ]
     },
-     "execution_count": 10,
+     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -334,54 +315,13 @@
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n",
      "Token indices sequence length is longer than the specified maximum sequence length for this model (911 > 512). Running this sequence through the model will result in indexing errors\n",
-      "Token indices sequence length is longer than the specified maximum sequence length for this model (911 > 512). Running this sequence through the model will result in indexing errors\n",
-      "Token indices sequence length is longer than the specified maximum sequence length for this model (904 > 512). Running this sequence through the model will result in indexing errors\n"
+      "Token indices sequence length is longer than the specified maximum sequence length for this model (911 > 512). Running this sequence through the model will result in indexing errors\n"
     ]
    }
   ],
   "source": [
    "comment_phab_df['text'] = comment_phab_df['comment_text'].apply(str)\n",
-    "comment_phab_df['resolved_text'] = comment_phab_df['text'].apply(resolving_comment)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "2b583feb-1c62-4c96-9ba0-2996d72e70d3",
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "KeyError",
-     "evalue": "46088",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
-      "\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m   3360\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3361\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3362\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
-      "\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
-      "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n",
-      "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n",
-      "\u001b[0;31mKeyError\u001b[0m: 46088",
-      "\nThe above exception was the direct cause of the following exception:\n",
-      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
-      "\u001b[0;32m/tmp/ipykernel_61233/1116300830.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcomment_phab_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'resolved_text'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m46088\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m    940\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    941\u001b[0m         \u001b[0;32melif\u001b[0m \u001b[0mkey_is_scalar\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 942\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    943\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    944\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mis_hashable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m_get_value\u001b[0;34m(self, label, takeable)\u001b[0m\n\u001b[1;32m   1049\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1050\u001b[0m         \u001b[0;31m# Similar to Index.get_value, but we do not fall back to positional\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1051\u001b[0;31m         \u001b[0mloc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1052\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_values_for_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1053\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m   3361\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3362\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3363\u001b[0;31m                 \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3364\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3365\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0misna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhasnans\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mKeyError\u001b[0m: 46088"
-     ]
-    }
-   ],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "92bf47ae",
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    "comment_phab_df['resolved_text'] = comment_phab_df['text'].apply(resolving_comment)\n",
    "comment_phab_df.to_csv(\"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/051825_coref_rel_phab_comments.csv\", index=False)"
   ]
  }
@ -402,7 +342,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.7.12"
+   "version": "3.11.11"
  }
 },
 "nbformat": 4,
--- a/phab_analysis/case3/041525_phab_comments.ipynb
+++ b/phab_analysis/case3/041525_phab_comments.ipynb
@ -148,7 +148,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
   "id": "942344db-c8f5-4ed6-a757-c97f8454f18b",
   "metadata": {},
   "outputs": [
@ -172,6 +172,29 @@
    "print(f\"Unique speakers: {unique_speakers}\")"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "0ef35632-ed07-478e-94ab-525169b82783",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "given_date = pd.Timestamp(\"2015-07-02\").tz_localize(None)\n",
+    "task_phab_df['timestamp'] = pd.to_datetime(task_phab_df['timestamp'], unit='s').dt.tz_localize(None)\n",
+    "task_phab_df['week_bin'] = ((task_phab_df['timestamp'] - given_date).dt.days // 7)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "1e7bda13-4c2d-413e-b3c6-9c4b38e6cb07",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "task_phab_df\n",
+    "task_phab_df.to_csv(\"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/phab_tasks.csv\", index=False)"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 7,
--- a/phab_analysis/case3/050825_join_resolved_files.ipynb
+++ b/phab_analysis/case3/050825_join_resolved_files.ipynb
@ -1168,7 +1168,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.11"
+   "version": "3.7.12"
  }
 },
 "nbformat": 4,
--- a/ww-task-plot-script.R
+++ b/ww-task-plot-script.R
@ -3,6 +3,9 @@ library(tidyverse)
 c1_count <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/phab_tasks.csv"
 c1_input_df <- read.csv(c1_count , header = TRUE) 

+c2_count <- "/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/phab_tasks.csv"
+c2_input_df <- read.csv(c2_count , header = TRUE) 
+
 c3_count <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/phab_tasks.csv"
 c3_input_df <- read.csv(c3_count , header = TRUE) 

@ -10,17 +13,16 @@ c1_unique_counts <- c1_input_df %>%
  group_by(meta.affil, week_bin) %>%
  summarise(unique_count = n_distinct(conversation_id), .groups = "drop")

+c2_unique_counts <- c2_input_df %>%
+  group_by(meta.affil, week_bin) %>%
+  summarise(unique_count = n_distinct(conversation_id), .groups = "drop")
+
 c3_unique_counts <- c3_input_df %>%
  group_by(meta.affil, week_bin) %>%
  summarise(unique_count = n_distinct(conversation_id), .groups = "drop")

 c1_unique_counts <- c1_unique_counts%>% mutate(source = "c1")
-c2_unique_counts <- data.frame(
-  meta.affil = rep("False", 117), 
-  week_bin = -103:13,          
-  unique_count = rep(0, 117),  
-  source = rep("c2", 117)       
-)
+c2_unique_counts <- c2_unique_counts %>% mutate(source = "c2")
 c3_unique_counts <- c3_unique_counts %>% mutate(source = "c3")

 combined_df <- bind_rows(c1_unique_counts, c2_unique_counts, c3_unique_counts)
@ -37,28 +39,34 @@ commit_authors <- combined_df |>
  ggplot(aes(x=week_bin, 
             y=unique_count, 
             fill=factor(meta.affil))) +
-  geom_col(position='dodge') +
-  labs(x = "Relative Week", y = "Tasks", fill="Task Author") +
+  geom_col(position='dodge2') +
+  labs(x = "Relative Week", y = "New Tasks Created", fill="Task Author") +
  geom_vline(data = combined_df |> filter(source == "c1"), 
             aes(xintercept = -29), 
-             linetype = "dotted", color = "black", linewidth = 1) +
+             linetype = "dotted", color = "black", linewidth = 0.5) +
  geom_vline(data = combined_df |> filter(source == "c1"), 
             aes(xintercept = -9), 
-             linetype = "dotted", color = "black", linewidth = 1) +
+             linetype = "dotted", color = "black", linewidth = 0.5) +
  geom_vline(data = combined_df |> filter(source == "c1"), 
             aes(xintercept = -4), 
-             linetype = "3313", color = "black", linewidth = 1) +
+             linetype = "3313", color = "black", linewidth = 0.5) +
+  geom_vline(data = combined_df |> filter(source == "c2"), 
+             aes(xintercept = -99), 
+             linetype = "dotted", color = "black", linewidth = 0.5) +
+  geom_vline(data = combined_df |> filter(source == "c2"), 
+             aes(xintercept = -4),   
+             linetype = "3313", color = "black", linewidth = 0.5) +
  geom_vline(data = combined_df |> filter(source == "c3"), 
             aes(xintercept = -97), 
-             linetype = "dotted", color = "black", linewidth = 1) +
+             linetype = "dotted", color = "black", linewidth = 0.5) +
  geom_vline(data = combined_df |> filter(source == "c3"), 
             aes(xintercept = -3), 
-             linetype = "3313", color = "black", linewidth = 1) +
-  geom_text(data = data.frame(source = "c1", relative_week = -40, lengthened_commit_count = 130), 
+             linetype = "3313", color = "black", linewidth = 0.5) +
+  geom_text(data = data.frame(source = "c1", relative_week = -39, lengthened_commit_count = 130), 
            aes(x = relative_week, y = lengthened_commit_count, label = "Opt-In Testing Deployment"), 
            inherit.aes = FALSE, color = "black", size = 4) +
-  geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 1) + # Add vertical line at week 0
-  geom_text(data = data.frame(source = "c1", relative_week = 7, lengthened_commit_count = 130), 
+  geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) + # Add vertical line at week 0
+  geom_text(data = data.frame(source = "c2", relative_week = 7, lengthened_commit_count = 130), 
            aes(x = relative_week, y = lengthened_commit_count, label = "Wide Deployment"), 
            inherit.aes = FALSE, color = "black", size = 4) +
  geom_text(data = data.frame(source = "c3", relative_week = -15, lengthened_commit_count = 130), 
@ -81,10 +89,10 @@ commit_authors <- combined_df |>
    strip.text = element_text(size = 14)# Increase legend title font size
  ) +
  facet_wrap(~source, nrow = 3, labeller = labeller(source = c(
-    "c1" = "VisualEditor",
-    "c2" = "HTTPS-as-default",
-    "c3" = "HTTP-deprecation"
+    "c1" = "VisualEditor (2013)",
+    "c2" = "HTTPS-as-default (2013)",
+    "c3" = "HTTP-deprecation (2015)"
  )))
 commit_authors

-ggsave(filename = "ww-0501-tasks-faceted.png", plot = commit_authors, width = 15, height = 9, dpi = 800)
+ggsave(filename = "d1-m2-tasks-faceted.png", plot = commit_authors, width = 15, height = 9, dpi = 800)