1
0

updated work for some m2 writing tomorrow

This commit is contained in:
Matthew Gaughan 2025-05-24 16:59:03 -07:00
parent 3573afbc1a
commit fd1479775d
14 changed files with 1309 additions and 944 deletions

View File

@ -173,3 +173,6 @@ ls ../case3
cd ..
ls
ls case1
ls
cd case2
ls

View File

@ -46,35 +46,35 @@ new_unaff_authors <- new_authors_long_df |>
fill=commit_seniority)) +
geom_col(position='dodge') +
labs(x = "Relative Week", y = "Commits", fill="Contributor Tenure (New contributors <= 5 commits before deployment announcement)") +
geom_vline(data = long_df |> filter(source == "c1"),
geom_vline(data = combined_df |> filter(source == "c1"),
aes(xintercept = -29),
linetype = "dotted", color = "black", linewidth = 1) +
geom_vline(data = long_df |> filter(source == "c1"),
linetype = "dotted", color = "black", linewidth = 0.5) +
geom_vline(data = combined_df |> filter(source == "c1"),
aes(xintercept = -9),
linetype = "dotted", color = "black", linewidth = 1) +
linetype = "dotted", color = "black", linewidth = 0.5) +
geom_vline(data = combined_df |> filter(source == "c1"),
aes(xintercept = -4),
linetype = "3313", color = "black", linewidth = 1) +
geom_vline(data = long_df |> filter(source == "c2"),
aes(xintercept = -99),
linetype = "dotted", color = "black", linewidth = 1) +
linetype = "3313", color = "black", linewidth = 0.5) +
geom_vline(data = combined_df |> filter(source == "c2"),
aes(xintercept = -4),
linetype = "3313", color = "black", linewidth = 1) +
geom_vline(data = long_df |> filter(source == "c3"),
aes(xintercept = -99),
linetype = "dotted", color = "black", linewidth = 0.5) +
geom_vline(data = combined_df |> filter(source == "c2"),
aes(xintercept = -4),
linetype = "3313", color = "black", linewidth = 0.5) +
geom_vline(data = combined_df |> filter(source == "c3"),
aes(xintercept = -97),
linetype = "dotted", color = "black", linewidth = 1) +
linetype = "dotted", color = "black", linewidth = 0.5) +
geom_vline(data = combined_df |> filter(source == "c3"),
aes(xintercept = -3),
linetype = "3313", color = "black", linewidth = 1) +
geom_text(data = data.frame(source = "c1", relative_week = -40, lengthened_commit_count = 90),
linetype = "3313", color = "black", linewidth = 0.5) +
geom_text(data = data.frame(source = "c1", relative_week = -39, lengthened_commit_count = 80),
aes(x = relative_week, y = lengthened_commit_count, label = "Opt-In Testing Deployment"),
inherit.aes = FALSE, color = "black", size = 4) +
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 1) + # Add vertical line at week 0
geom_text(data = data.frame(source = "c1", relative_week = 7, lengthened_commit_count = 90),
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) + # Add vertical line at week 0
geom_text(data = data.frame(source = "c2", relative_week = 7, lengthened_commit_count = 80),
aes(x = relative_week, y = lengthened_commit_count, label = "Wide Deployment"),
inherit.aes = FALSE, color = "black", size = 4) +
geom_text(data = data.frame(source = "c3", relative_week = -15, lengthened_commit_count = 100),
geom_text(data = data.frame(source = "c3", relative_week = -15, lengthened_commit_count = 80),
aes(x = relative_week, y = lengthened_commit_count, label = "Wide Deployment Announcement"),
inherit.aes = FALSE, color = "black", size = 4) +
scale_fill_manual(values = c("returning_unaff_commit_count" = "#FFC107", # Color for "Returning Contributors"
@ -96,14 +96,14 @@ new_unaff_authors <- new_authors_long_df |>
strip.text = element_text(size = 14)# Increase legend title font size
) +
facet_wrap(~source, nrow = 3, labeller = labeller(source = c(
"c1" = "VisualEditor",
"c2" = "HTTPS-as-default",
"c3" = "HTTP-deprecation"
"c1" = "VisualEditor (2013)",
"c2" = "HTTPS-as-default (2013)",
"c3" = "HTTP-deprecation (2015)"
)))
new_unaff_authors
ggsave(filename = "ww-0501-bot-commits-faceted.png", plot = new_unaff_authors, width = 15, height = 9, dpi = 800)
ggsave(filename = "d1-m2-bot-commits-faceted.png", plot = new_unaff_authors, width = 15, height = 9, dpi = 800)
unaff_authors <- new_authors_long_df |>
ggplot(aes(x=relative_week,

View File

@ -38,25 +38,37 @@ commit_authors <- long_df |>
fill=factor(commit_type))) +
geom_col(position='dodge') +
labs(x = "Relative Week", y = "Commits", fill="Commit Author") +
geom_vline(data = long_df |> filter(source == "c1"),
geom_vline(data = combined_df |> filter(source == "c1"),
aes(xintercept = -29),
linetype = "dotted", color = "black", linewidth = 1) +
geom_vline(data = long_df |> filter(source == "c1"),
linetype = "dotted", color = "black", linewidth = 0.5) +
geom_vline(data = combined_df |> filter(source == "c1"),
aes(xintercept = -9),
linetype = "dotted", color = "black", linewidth = 1) +
geom_vline(data = long_df |> filter(source == "c2"),
linetype = "dotted", color = "black", linewidth = 0.5) +
geom_vline(data = combined_df |> filter(source == "c1"),
aes(xintercept = -4),
linetype = "3313", color = "black", linewidth = 0.5) +
geom_vline(data = combined_df |> filter(source == "c2"),
aes(xintercept = -99),
linetype = "dotted", color = "black", linewidth = 1) +
geom_vline(data = long_df |> filter(source == "c3"),
linetype = "dotted", color = "black", linewidth = 0.5) +
geom_vline(data = combined_df |> filter(source == "c2"),
aes(xintercept = -4),
linetype = "3313", color = "black", linewidth = 0.5) +
geom_vline(data = combined_df |> filter(source == "c3"),
aes(xintercept = -97),
linetype = "dotted", color = "black", linewidth = 1) +
geom_text(data = data.frame(source = "c1", relative_week = -40, lengthened_commit_count = 50),
linetype = "dotted", color = "black", linewidth = 0.5) +
geom_vline(data = combined_df |> filter(source == "c3"),
aes(xintercept = -3),
linetype = "3313", color = "black", linewidth = 0.5) +
geom_text(data = data.frame(source = "c1", relative_week = -39, lengthened_commit_count = 50),
aes(x = relative_week, y = lengthened_commit_count, label = "Opt-In Testing Deployment"),
inherit.aes = FALSE, color = "black", size = 4) +
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 1) + # Add vertical line at week 0
geom_text(data = data.frame(source = "c1", relative_week = 7, lengthened_commit_count = 50),
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) + # Add vertical line at week 0
geom_text(data = data.frame(source = "c2", relative_week = 7, lengthened_commit_count = 50),
aes(x = relative_week, y = lengthened_commit_count, label = "Wide Deployment"),
inherit.aes = FALSE, color = "black", size = 4) +
geom_text(data = data.frame(source = "c3", relative_week = -15, lengthened_commit_count = 50),
aes(x = relative_week, y = lengthened_commit_count, label = "Wide Deployment Announcement"),
inherit.aes = FALSE, color = "black", size = 4) +
scale_fill_manual(values = affiliationColors,
labels = c("unaff_commit_count" = "Unaffiliated",
"wikimedia_commit_count" = "WMF-affiliated")) +
@ -74,10 +86,10 @@ commit_authors <- long_df |>
strip.text = element_text(size = 14)# Increase legend title font size
) +
facet_wrap(~source, nrow = 3, labeller = labeller(source = c(
"c1" = "VisualEditor (commits to extensions/visualeditor)",
"c2" = "HTTPS-as-default (relevant commits to mediawiki/core)",
"c3" = "HTTP-deprecation (relevant commits to mediawiki/core)"
"c1" = "VisualEditor (2013) [commits to extensions/visualeditor]",
"c2" = "HTTPS-as-default (2013) [relevant commits to mediawiki/core]",
"c3" = "HTTP-deprecation (2015) [relevant commits to mediawiki/core]"
)))
commit_authors
ggsave(filename = "ww-0501-commits-faceted.png", plot = commit_authors, width = 15, height = 9, dpi = 800)
ggsave(filename = "d1-m2-commits-faceted.png", plot = commit_authors, width = 15, height = 9, dpi = 800)

Binary file not shown.

After

Width:  |  Height:  |  Size: 781 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 774 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 743 KiB

View File

@ -1,18 +1,17 @@
1. SSH tunnel from your workstation using the following command:
ssh -N -L 8787:n3439:39175 mjilg@klone.hyak.uw.edu
ssh -N -L 8787:n3439:38329 mjilg@klone.hyak.uw.edu
and point your web browser to http://localhost:8787
2. log in to RStudio Server using the following credentials:
user: mjilg
password: twImEJor5ex498HTzJjx
password: YXXLCjS/064zAiagiRdx
When done using RStudio Server, terminate the job by:
1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
2. Issue the following command on the login node:
scancel -f 25681892
slurmstepd: error: *** JOB 25681892 ON n3439 CANCELLED AT 2025-05-01T23:08:23 DUE TO TIME LIMIT ***
scancel -f 26402644

View File

@ -80,13 +80,13 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_55861/3758790231.py:41: SettingWithCopyWarning: \n",
"/tmp/ipykernel_76053/3758790231.py:41: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" mid_comment_phab_df['is_relevant'] = mid_comment_phab_df['conversation_id'].isin(relevant_conversation_ids)\n",
"/tmp/ipykernel_55861/3758790231.py:44: SettingWithCopyWarning: \n",
"/tmp/ipykernel_76053/3758790231.py:44: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
@ -148,7 +148,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 8,
"id": "942344db-c8f5-4ed6-a757-c97f8454f18b",
"metadata": {},
"outputs": [
@ -172,6 +172,29 @@
"print(f\"Unique speakers: {unique_speakers}\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "b9229ca3-afb9-4eec-a173-f30be8c4729b",
"metadata": {},
"outputs": [],
"source": [
"given_date = pd.Timestamp(\"2013-08-28\").tz_localize(None)\n",
"task_phab_df['timestamp'] = pd.to_datetime(task_phab_df['timestamp'], unit='s').dt.tz_localize(None)\n",
"task_phab_df['week_bin'] = ((task_phab_df['timestamp'] - given_date).dt.days // 7)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "24205386-d18f-4fb7-b37d-e81c0a5ba532",
"metadata": {},
"outputs": [],
"source": [
"task_phab_df\n",
"task_phab_df.to_csv(\"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/phab_tasks.csv\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": 7,
@ -1024,7 +1047,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.12"
"version": "3.11.11"
}
},
"nbformat": 4,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -24,7 +24,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"id": "f6448c6f-2b5d-45f5-a32e-b3b47c16ef85",
"metadata": {},
"outputs": [],
@ -35,7 +35,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"id": "e30e81ad",
"metadata": {},
"outputs": [],
@ -75,7 +75,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"id": "f359805f",
"metadata": {},
"outputs": [
@ -149,7 +149,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"id": "ffd0b263",
"metadata": {},
"outputs": [
@ -175,17 +175,17 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 9,
"id": "f32f6eed-3aeb-4b05-8d40-7ed85e7235c5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<spacy_experimental.coref.span_resolver_component.SpanResolver at 0x1495ecba4bb0>"
"<spacy_experimental.coref.span_resolver_component.SpanResolver at 0x14cab225fd00>"
]
},
"execution_count": 6,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@ -206,7 +206,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 10,
"id": "a5b062d8-2d26-4a3e-a84c-ba0eaf6eb436",
"metadata": {},
"outputs": [],
@ -220,26 +220,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"id": "424d35e0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"John is frustrated with the VisualEditor project, he thinks it doesn't work."
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 11,
"id": "999e1656-0036-4ba2-bedf-f54493f67790",
"metadata": {},
"outputs": [],
@ -285,7 +266,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 12,
"id": "be476647-624b-4e95-ab62-9c6b08f85368",
"metadata": {},
"outputs": [],
@ -298,7 +279,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 13,
"id": "a9628b54-a1df-49cd-a365-9cba59de3421",
"metadata": {},
"outputs": [
@ -308,7 +289,7 @@
"'i hate ve.interface, ve.interface always messes up i browser'"
]
},
"execution_count": 10,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@ -334,54 +315,13 @@
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" \"\"\"Entry point for launching an IPython kernel.\n",
"Token indices sequence length is longer than the specified maximum sequence length for this model (911 > 512). Running this sequence through the model will result in indexing errors\n",
"Token indices sequence length is longer than the specified maximum sequence length for this model (911 > 512). Running this sequence through the model will result in indexing errors\n",
"Token indices sequence length is longer than the specified maximum sequence length for this model (904 > 512). Running this sequence through the model will result in indexing errors\n"
"Token indices sequence length is longer than the specified maximum sequence length for this model (911 > 512). Running this sequence through the model will result in indexing errors\n"
]
}
],
"source": [
"comment_phab_df['text'] = comment_phab_df['comment_text'].apply(str)\n",
"comment_phab_df['resolved_text'] = comment_phab_df['text'].apply(resolving_comment)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "2b583feb-1c62-4c96-9ba0-2996d72e70d3",
"metadata": {},
"outputs": [
{
"ename": "KeyError",
"evalue": "46088",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 3360\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3361\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3362\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 46088",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/tmp/ipykernel_61233/1116300830.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcomment_phab_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'resolved_text'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m46088\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 940\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 941\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mkey_is_scalar\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 942\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 943\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 944\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_hashable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m_get_value\u001b[0;34m(self, label, takeable)\u001b[0m\n\u001b[1;32m 1049\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1050\u001b[0m \u001b[0;31m# Similar to Index.get_value, but we do not fall back to positional\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1051\u001b[0;31m \u001b[0mloc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1052\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_values_for_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1053\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 3361\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3362\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3363\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3364\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3365\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0misna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhasnans\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 46088"
]
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "92bf47ae",
"metadata": {},
"outputs": [],
"source": [
"comment_phab_df['resolved_text'] = comment_phab_df['text'].apply(resolving_comment)\n",
"comment_phab_df.to_csv(\"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/051825_coref_rel_phab_comments.csv\", index=False)"
]
}
@ -402,7 +342,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.12"
"version": "3.11.11"
}
},
"nbformat": 4,

View File

@ -148,7 +148,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 8,
"id": "942344db-c8f5-4ed6-a757-c97f8454f18b",
"metadata": {},
"outputs": [
@ -172,6 +172,29 @@
"print(f\"Unique speakers: {unique_speakers}\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "0ef35632-ed07-478e-94ab-525169b82783",
"metadata": {},
"outputs": [],
"source": [
"given_date = pd.Timestamp(\"2015-07-02\").tz_localize(None)\n",
"task_phab_df['timestamp'] = pd.to_datetime(task_phab_df['timestamp'], unit='s').dt.tz_localize(None)\n",
"task_phab_df['week_bin'] = ((task_phab_df['timestamp'] - given_date).dt.days // 7)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "1e7bda13-4c2d-413e-b3c6-9c4b38e6cb07",
"metadata": {},
"outputs": [],
"source": [
"task_phab_df\n",
"task_phab_df.to_csv(\"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/phab_tasks.csv\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": 7,

View File

@ -1168,7 +1168,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.11"
"version": "3.7.12"
}
},
"nbformat": 4,

View File

@ -3,6 +3,9 @@ library(tidyverse)
c1_count <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/phab_tasks.csv"
c1_input_df <- read.csv(c1_count , header = TRUE)
c2_count <- "/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/phab_tasks.csv"
c2_input_df <- read.csv(c2_count , header = TRUE)
c3_count <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/phab_tasks.csv"
c3_input_df <- read.csv(c3_count , header = TRUE)
@ -10,17 +13,16 @@ c1_unique_counts <- c1_input_df %>%
group_by(meta.affil, week_bin) %>%
summarise(unique_count = n_distinct(conversation_id), .groups = "drop")
c2_unique_counts <- c2_input_df %>%
group_by(meta.affil, week_bin) %>%
summarise(unique_count = n_distinct(conversation_id), .groups = "drop")
c3_unique_counts <- c3_input_df %>%
group_by(meta.affil, week_bin) %>%
summarise(unique_count = n_distinct(conversation_id), .groups = "drop")
c1_unique_counts <- c1_unique_counts%>% mutate(source = "c1")
c2_unique_counts <- data.frame(
meta.affil = rep("False", 117),
week_bin = -103:13,
unique_count = rep(0, 117),
source = rep("c2", 117)
)
c2_unique_counts <- c2_unique_counts %>% mutate(source = "c2")
c3_unique_counts <- c3_unique_counts %>% mutate(source = "c3")
combined_df <- bind_rows(c1_unique_counts, c2_unique_counts, c3_unique_counts)
@ -37,28 +39,34 @@ commit_authors <- combined_df |>
ggplot(aes(x=week_bin,
y=unique_count,
fill=factor(meta.affil))) +
geom_col(position='dodge') +
labs(x = "Relative Week", y = "Tasks", fill="Task Author") +
geom_col(position='dodge2') +
labs(x = "Relative Week", y = "New Tasks Created", fill="Task Author") +
geom_vline(data = combined_df |> filter(source == "c1"),
aes(xintercept = -29),
linetype = "dotted", color = "black", linewidth = 1) +
linetype = "dotted", color = "black", linewidth = 0.5) +
geom_vline(data = combined_df |> filter(source == "c1"),
aes(xintercept = -9),
linetype = "dotted", color = "black", linewidth = 1) +
linetype = "dotted", color = "black", linewidth = 0.5) +
geom_vline(data = combined_df |> filter(source == "c1"),
aes(xintercept = -4),
linetype = "3313", color = "black", linewidth = 1) +
linetype = "3313", color = "black", linewidth = 0.5) +
geom_vline(data = combined_df |> filter(source == "c2"),
aes(xintercept = -99),
linetype = "dotted", color = "black", linewidth = 0.5) +
geom_vline(data = combined_df |> filter(source == "c2"),
aes(xintercept = -4),
linetype = "3313", color = "black", linewidth = 0.5) +
geom_vline(data = combined_df |> filter(source == "c3"),
aes(xintercept = -97),
linetype = "dotted", color = "black", linewidth = 1) +
linetype = "dotted", color = "black", linewidth = 0.5) +
geom_vline(data = combined_df |> filter(source == "c3"),
aes(xintercept = -3),
linetype = "3313", color = "black", linewidth = 1) +
geom_text(data = data.frame(source = "c1", relative_week = -40, lengthened_commit_count = 130),
linetype = "3313", color = "black", linewidth = 0.5) +
geom_text(data = data.frame(source = "c1", relative_week = -39, lengthened_commit_count = 130),
aes(x = relative_week, y = lengthened_commit_count, label = "Opt-In Testing Deployment"),
inherit.aes = FALSE, color = "black", size = 4) +
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 1) + # Add vertical line at week 0
geom_text(data = data.frame(source = "c1", relative_week = 7, lengthened_commit_count = 130),
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) + # Add vertical line at week 0
geom_text(data = data.frame(source = "c2", relative_week = 7, lengthened_commit_count = 130),
aes(x = relative_week, y = lengthened_commit_count, label = "Wide Deployment"),
inherit.aes = FALSE, color = "black", size = 4) +
geom_text(data = data.frame(source = "c3", relative_week = -15, lengthened_commit_count = 130),
@ -81,10 +89,10 @@ commit_authors <- combined_df |>
strip.text = element_text(size = 14)# Increase legend title font size
) +
facet_wrap(~source, nrow = 3, labeller = labeller(source = c(
"c1" = "VisualEditor",
"c2" = "HTTPS-as-default",
"c3" = "HTTP-deprecation"
"c1" = "VisualEditor (2013)",
"c2" = "HTTPS-as-default (2013)",
"c3" = "HTTP-deprecation (2015)"
)))
commit_authors
ggsave(filename = "ww-0501-tasks-faceted.png", plot = commit_authors, width = 15, height = 9, dpi = 800)
ggsave(filename = "d1-m2-tasks-faceted.png", plot = commit_authors, width = 15, height = 9, dpi = 800)