updated work for some m2 writing tomorrow
This commit is contained in:
parent
3573afbc1a
commit
fd1479775d
@ -173,3 +173,6 @@ ls ../case3
|
||||
cd ..
|
||||
ls
|
||||
ls case1
|
||||
ls
|
||||
cd case2
|
||||
ls
|
||||
|
@ -46,35 +46,35 @@ new_unaff_authors <- new_authors_long_df |>
|
||||
fill=commit_seniority)) +
|
||||
geom_col(position='dodge') +
|
||||
labs(x = "Relative Week", y = "Commits", fill="Contributor Tenure (New contributors <= 5 commits before deployment announcement)") +
|
||||
geom_vline(data = long_df |> filter(source == "c1"),
|
||||
geom_vline(data = combined_df |> filter(source == "c1"),
|
||||
aes(xintercept = -29),
|
||||
linetype = "dotted", color = "black", linewidth = 1) +
|
||||
geom_vline(data = long_df |> filter(source == "c1"),
|
||||
linetype = "dotted", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = combined_df |> filter(source == "c1"),
|
||||
aes(xintercept = -9),
|
||||
linetype = "dotted", color = "black", linewidth = 1) +
|
||||
linetype = "dotted", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = combined_df |> filter(source == "c1"),
|
||||
aes(xintercept = -4),
|
||||
linetype = "3313", color = "black", linewidth = 1) +
|
||||
geom_vline(data = long_df |> filter(source == "c2"),
|
||||
aes(xintercept = -99),
|
||||
linetype = "dotted", color = "black", linewidth = 1) +
|
||||
linetype = "3313", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = combined_df |> filter(source == "c2"),
|
||||
aes(xintercept = -4),
|
||||
linetype = "3313", color = "black", linewidth = 1) +
|
||||
geom_vline(data = long_df |> filter(source == "c3"),
|
||||
aes(xintercept = -99),
|
||||
linetype = "dotted", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = combined_df |> filter(source == "c2"),
|
||||
aes(xintercept = -4),
|
||||
linetype = "3313", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = combined_df |> filter(source == "c3"),
|
||||
aes(xintercept = -97),
|
||||
linetype = "dotted", color = "black", linewidth = 1) +
|
||||
linetype = "dotted", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = combined_df |> filter(source == "c3"),
|
||||
aes(xintercept = -3),
|
||||
linetype = "3313", color = "black", linewidth = 1) +
|
||||
geom_text(data = data.frame(source = "c1", relative_week = -40, lengthened_commit_count = 90),
|
||||
linetype = "3313", color = "black", linewidth = 0.5) +
|
||||
geom_text(data = data.frame(source = "c1", relative_week = -39, lengthened_commit_count = 80),
|
||||
aes(x = relative_week, y = lengthened_commit_count, label = "Opt-In Testing Deployment"),
|
||||
inherit.aes = FALSE, color = "black", size = 4) +
|
||||
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 1) + # Add vertical line at week 0
|
||||
geom_text(data = data.frame(source = "c1", relative_week = 7, lengthened_commit_count = 90),
|
||||
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) + # Add vertical line at week 0
|
||||
geom_text(data = data.frame(source = "c2", relative_week = 7, lengthened_commit_count = 80),
|
||||
aes(x = relative_week, y = lengthened_commit_count, label = "Wide Deployment"),
|
||||
inherit.aes = FALSE, color = "black", size = 4) +
|
||||
geom_text(data = data.frame(source = "c3", relative_week = -15, lengthened_commit_count = 100),
|
||||
geom_text(data = data.frame(source = "c3", relative_week = -15, lengthened_commit_count = 80),
|
||||
aes(x = relative_week, y = lengthened_commit_count, label = "Wide Deployment Announcement"),
|
||||
inherit.aes = FALSE, color = "black", size = 4) +
|
||||
scale_fill_manual(values = c("returning_unaff_commit_count" = "#FFC107", # Color for "Returning Contributors"
|
||||
@ -96,14 +96,14 @@ new_unaff_authors <- new_authors_long_df |>
|
||||
strip.text = element_text(size = 14)# Increase legend title font size
|
||||
) +
|
||||
facet_wrap(~source, nrow = 3, labeller = labeller(source = c(
|
||||
"c1" = "VisualEditor",
|
||||
"c2" = "HTTPS-as-default",
|
||||
"c3" = "HTTP-deprecation"
|
||||
"c1" = "VisualEditor (2013)",
|
||||
"c2" = "HTTPS-as-default (2013)",
|
||||
"c3" = "HTTP-deprecation (2015)"
|
||||
)))
|
||||
|
||||
new_unaff_authors
|
||||
|
||||
ggsave(filename = "ww-0501-bot-commits-faceted.png", plot = new_unaff_authors, width = 15, height = 9, dpi = 800)
|
||||
ggsave(filename = "d1-m2-bot-commits-faceted.png", plot = new_unaff_authors, width = 15, height = 9, dpi = 800)
|
||||
|
||||
unaff_authors <- new_authors_long_df |>
|
||||
ggplot(aes(x=relative_week,
|
||||
|
@ -38,25 +38,37 @@ commit_authors <- long_df |>
|
||||
fill=factor(commit_type))) +
|
||||
geom_col(position='dodge') +
|
||||
labs(x = "Relative Week", y = "Commits", fill="Commit Author") +
|
||||
geom_vline(data = long_df |> filter(source == "c1"),
|
||||
geom_vline(data = combined_df |> filter(source == "c1"),
|
||||
aes(xintercept = -29),
|
||||
linetype = "dotted", color = "black", linewidth = 1) +
|
||||
geom_vline(data = long_df |> filter(source == "c1"),
|
||||
linetype = "dotted", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = combined_df |> filter(source == "c1"),
|
||||
aes(xintercept = -9),
|
||||
linetype = "dotted", color = "black", linewidth = 1) +
|
||||
geom_vline(data = long_df |> filter(source == "c2"),
|
||||
linetype = "dotted", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = combined_df |> filter(source == "c1"),
|
||||
aes(xintercept = -4),
|
||||
linetype = "3313", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = combined_df |> filter(source == "c2"),
|
||||
aes(xintercept = -99),
|
||||
linetype = "dotted", color = "black", linewidth = 1) +
|
||||
geom_vline(data = long_df |> filter(source == "c3"),
|
||||
linetype = "dotted", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = combined_df |> filter(source == "c2"),
|
||||
aes(xintercept = -4),
|
||||
linetype = "3313", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = combined_df |> filter(source == "c3"),
|
||||
aes(xintercept = -97),
|
||||
linetype = "dotted", color = "black", linewidth = 1) +
|
||||
geom_text(data = data.frame(source = "c1", relative_week = -40, lengthened_commit_count = 50),
|
||||
linetype = "dotted", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = combined_df |> filter(source == "c3"),
|
||||
aes(xintercept = -3),
|
||||
linetype = "3313", color = "black", linewidth = 0.5) +
|
||||
geom_text(data = data.frame(source = "c1", relative_week = -39, lengthened_commit_count = 50),
|
||||
aes(x = relative_week, y = lengthened_commit_count, label = "Opt-In Testing Deployment"),
|
||||
inherit.aes = FALSE, color = "black", size = 4) +
|
||||
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 1) + # Add vertical line at week 0
|
||||
geom_text(data = data.frame(source = "c1", relative_week = 7, lengthened_commit_count = 50),
|
||||
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) + # Add vertical line at week 0
|
||||
geom_text(data = data.frame(source = "c2", relative_week = 7, lengthened_commit_count = 50),
|
||||
aes(x = relative_week, y = lengthened_commit_count, label = "Wide Deployment"),
|
||||
inherit.aes = FALSE, color = "black", size = 4) +
|
||||
geom_text(data = data.frame(source = "c3", relative_week = -15, lengthened_commit_count = 50),
|
||||
aes(x = relative_week, y = lengthened_commit_count, label = "Wide Deployment Announcement"),
|
||||
inherit.aes = FALSE, color = "black", size = 4) +
|
||||
scale_fill_manual(values = affiliationColors,
|
||||
labels = c("unaff_commit_count" = "Unaffiliated",
|
||||
"wikimedia_commit_count" = "WMF-affiliated")) +
|
||||
@ -74,10 +86,10 @@ commit_authors <- long_df |>
|
||||
strip.text = element_text(size = 14)# Increase legend title font size
|
||||
) +
|
||||
facet_wrap(~source, nrow = 3, labeller = labeller(source = c(
|
||||
"c1" = "VisualEditor (commits to extensions/visualeditor)",
|
||||
"c2" = "HTTPS-as-default (relevant commits to mediawiki/core)",
|
||||
"c3" = "HTTP-deprecation (relevant commits to mediawiki/core)"
|
||||
"c1" = "VisualEditor (2013) [commits to extensions/visualeditor]",
|
||||
"c2" = "HTTPS-as-default (2013) [relevant commits to mediawiki/core]",
|
||||
"c3" = "HTTP-deprecation (2015) [relevant commits to mediawiki/core]"
|
||||
)))
|
||||
commit_authors
|
||||
|
||||
ggsave(filename = "ww-0501-commits-faceted.png", plot = commit_authors, width = 15, height = 9, dpi = 800)
|
||||
ggsave(filename = "d1-m2-commits-faceted.png", plot = commit_authors, width = 15, height = 9, dpi = 800)
|
||||
|
BIN
m2-figures/d1-m2-bot-commits-faceted.png
Normal file
BIN
m2-figures/d1-m2-bot-commits-faceted.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 781 KiB |
BIN
m2-figures/d1-m2-commits-faceted.png
Normal file
BIN
m2-figures/d1-m2-commits-faceted.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 774 KiB |
BIN
m2-figures/d1-m2-tasks-faceted.png
Normal file
BIN
m2-figures/d1-m2-tasks-faceted.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 743 KiB |
@ -1,18 +1,17 @@
|
||||
1. SSH tunnel from your workstation using the following command:
|
||||
|
||||
ssh -N -L 8787:n3439:39175 mjilg@klone.hyak.uw.edu
|
||||
ssh -N -L 8787:n3439:38329 mjilg@klone.hyak.uw.edu
|
||||
|
||||
and point your web browser to http://localhost:8787
|
||||
|
||||
2. log in to RStudio Server using the following credentials:
|
||||
|
||||
user: mjilg
|
||||
password: twImEJor5ex498HTzJjx
|
||||
password: YXXLCjS/064zAiagiRdx
|
||||
|
||||
When done using RStudio Server, terminate the job by:
|
||||
|
||||
1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
|
||||
2. Issue the following command on the login node:
|
||||
|
||||
scancel -f 25681892
|
||||
slurmstepd: error: *** JOB 25681892 ON n3439 CANCELLED AT 2025-05-01T23:08:23 DUE TO TIME LIMIT ***
|
||||
scancel -f 26402644
|
@ -80,13 +80,13 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/tmp/ipykernel_55861/3758790231.py:41: SettingWithCopyWarning: \n",
|
||||
"/tmp/ipykernel_76053/3758790231.py:41: SettingWithCopyWarning: \n",
|
||||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||||
"\n",
|
||||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
||||
" mid_comment_phab_df['is_relevant'] = mid_comment_phab_df['conversation_id'].isin(relevant_conversation_ids)\n",
|
||||
"/tmp/ipykernel_55861/3758790231.py:44: SettingWithCopyWarning: \n",
|
||||
"/tmp/ipykernel_76053/3758790231.py:44: SettingWithCopyWarning: \n",
|
||||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||||
"\n",
|
||||
@ -148,7 +148,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 8,
|
||||
"id": "942344db-c8f5-4ed6-a757-c97f8454f18b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -172,6 +172,29 @@
|
||||
"print(f\"Unique speakers: {unique_speakers}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "b9229ca3-afb9-4eec-a173-f30be8c4729b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"given_date = pd.Timestamp(\"2013-08-28\").tz_localize(None)\n",
|
||||
"task_phab_df['timestamp'] = pd.to_datetime(task_phab_df['timestamp'], unit='s').dt.tz_localize(None)\n",
|
||||
"task_phab_df['week_bin'] = ((task_phab_df['timestamp'] - given_date).dt.days // 7)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "24205386-d18f-4fb7-b37d-e81c0a5ba532",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"task_phab_df\n",
|
||||
"task_phab_df.to_csv(\"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/phab_tasks.csv\", index=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
@ -1024,7 +1047,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.12"
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
File diff suppressed because one or more lines are too long
1160
phab_analysis/case2/c2_resolved_phab.ipynb
Normal file
1160
phab_analysis/case2/c2_resolved_phab.ipynb
Normal file
File diff suppressed because one or more lines are too long
@ -24,7 +24,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"id": "f6448c6f-2b5d-45f5-a32e-b3b47c16ef85",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -35,7 +35,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"id": "e30e81ad",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -75,7 +75,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"id": "f359805f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -149,7 +149,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"id": "ffd0b263",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -175,17 +175,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 9,
|
||||
"id": "f32f6eed-3aeb-4b05-8d40-7ed85e7235c5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<spacy_experimental.coref.span_resolver_component.SpanResolver at 0x1495ecba4bb0>"
|
||||
"<spacy_experimental.coref.span_resolver_component.SpanResolver at 0x14cab225fd00>"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -206,7 +206,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 10,
|
||||
"id": "a5b062d8-2d26-4a3e-a84c-ba0eaf6eb436",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -220,26 +220,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "424d35e0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"John is frustrated with the VisualEditor project, he thinks it doesn't work."
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 11,
|
||||
"id": "999e1656-0036-4ba2-bedf-f54493f67790",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -285,7 +266,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 12,
|
||||
"id": "be476647-624b-4e95-ab62-9c6b08f85368",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -298,7 +279,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 13,
|
||||
"id": "a9628b54-a1df-49cd-a365-9cba59de3421",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -308,7 +289,7 @@
|
||||
"'i hate ve.interface, ve.interface always messes up i browser'"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -334,54 +315,13 @@
|
||||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
||||
" \"\"\"Entry point for launching an IPython kernel.\n",
|
||||
"Token indices sequence length is longer than the specified maximum sequence length for this model (911 > 512). Running this sequence through the model will result in indexing errors\n",
|
||||
"Token indices sequence length is longer than the specified maximum sequence length for this model (911 > 512). Running this sequence through the model will result in indexing errors\n",
|
||||
"Token indices sequence length is longer than the specified maximum sequence length for this model (904 > 512). Running this sequence through the model will result in indexing errors\n"
|
||||
"Token indices sequence length is longer than the specified maximum sequence length for this model (911 > 512). Running this sequence through the model will result in indexing errors\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"comment_phab_df['text'] = comment_phab_df['comment_text'].apply(str)\n",
|
||||
"comment_phab_df['resolved_text'] = comment_phab_df['text'].apply(resolving_comment)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "2b583feb-1c62-4c96-9ba0-2996d72e70d3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "KeyError",
|
||||
"evalue": "46088",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 3360\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3361\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3362\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
|
||||
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
|
||||
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n",
|
||||
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n",
|
||||
"\u001b[0;31mKeyError\u001b[0m: 46088",
|
||||
"\nThe above exception was the direct cause of the following exception:\n",
|
||||
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[0;32m/tmp/ipykernel_61233/1116300830.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcomment_phab_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'resolved_text'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m46088\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
|
||||
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 940\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 941\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mkey_is_scalar\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 942\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 943\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 944\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_hashable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m_get_value\u001b[0;34m(self, label, takeable)\u001b[0m\n\u001b[1;32m 1049\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1050\u001b[0m \u001b[0;31m# Similar to Index.get_value, but we do not fall back to positional\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1051\u001b[0;31m \u001b[0mloc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1052\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_values_for_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1053\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 3361\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3362\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3363\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3364\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3365\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0misna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhasnans\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;31mKeyError\u001b[0m: 46088"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "92bf47ae",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"comment_phab_df['resolved_text'] = comment_phab_df['text'].apply(resolving_comment)\n",
|
||||
"comment_phab_df.to_csv(\"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/051825_coref_rel_phab_comments.csv\", index=False)"
|
||||
]
|
||||
}
|
||||
@ -402,7 +342,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.12"
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -148,7 +148,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 8,
|
||||
"id": "942344db-c8f5-4ed6-a757-c97f8454f18b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -172,6 +172,29 @@
|
||||
"print(f\"Unique speakers: {unique_speakers}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "0ef35632-ed07-478e-94ab-525169b82783",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"given_date = pd.Timestamp(\"2015-07-02\").tz_localize(None)\n",
|
||||
"task_phab_df['timestamp'] = pd.to_datetime(task_phab_df['timestamp'], unit='s').dt.tz_localize(None)\n",
|
||||
"task_phab_df['week_bin'] = ((task_phab_df['timestamp'] - given_date).dt.days // 7)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "1e7bda13-4c2d-413e-b3c6-9c4b38e6cb07",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"task_phab_df\n",
|
||||
"task_phab_df.to_csv(\"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/phab_tasks.csv\", index=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
|
@ -1168,7 +1168,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
"version": "3.7.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -3,6 +3,9 @@ library(tidyverse)
|
||||
c1_count <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/phab_tasks.csv"
|
||||
c1_input_df <- read.csv(c1_count , header = TRUE)
|
||||
|
||||
c2_count <- "/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/phab_tasks.csv"
|
||||
c2_input_df <- read.csv(c2_count , header = TRUE)
|
||||
|
||||
c3_count <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/phab_tasks.csv"
|
||||
c3_input_df <- read.csv(c3_count , header = TRUE)
|
||||
|
||||
@ -10,17 +13,16 @@ c1_unique_counts <- c1_input_df %>%
|
||||
group_by(meta.affil, week_bin) %>%
|
||||
summarise(unique_count = n_distinct(conversation_id), .groups = "drop")
|
||||
|
||||
c2_unique_counts <- c2_input_df %>%
|
||||
group_by(meta.affil, week_bin) %>%
|
||||
summarise(unique_count = n_distinct(conversation_id), .groups = "drop")
|
||||
|
||||
c3_unique_counts <- c3_input_df %>%
|
||||
group_by(meta.affil, week_bin) %>%
|
||||
summarise(unique_count = n_distinct(conversation_id), .groups = "drop")
|
||||
|
||||
c1_unique_counts <- c1_unique_counts%>% mutate(source = "c1")
|
||||
c2_unique_counts <- data.frame(
|
||||
meta.affil = rep("False", 117),
|
||||
week_bin = -103:13,
|
||||
unique_count = rep(0, 117),
|
||||
source = rep("c2", 117)
|
||||
)
|
||||
c2_unique_counts <- c2_unique_counts %>% mutate(source = "c2")
|
||||
c3_unique_counts <- c3_unique_counts %>% mutate(source = "c3")
|
||||
|
||||
combined_df <- bind_rows(c1_unique_counts, c2_unique_counts, c3_unique_counts)
|
||||
@ -37,28 +39,34 @@ commit_authors <- combined_df |>
|
||||
ggplot(aes(x=week_bin,
|
||||
y=unique_count,
|
||||
fill=factor(meta.affil))) +
|
||||
geom_col(position='dodge') +
|
||||
labs(x = "Relative Week", y = "Tasks", fill="Task Author") +
|
||||
geom_col(position='dodge2') +
|
||||
labs(x = "Relative Week", y = "New Tasks Created", fill="Task Author") +
|
||||
geom_vline(data = combined_df |> filter(source == "c1"),
|
||||
aes(xintercept = -29),
|
||||
linetype = "dotted", color = "black", linewidth = 1) +
|
||||
linetype = "dotted", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = combined_df |> filter(source == "c1"),
|
||||
aes(xintercept = -9),
|
||||
linetype = "dotted", color = "black", linewidth = 1) +
|
||||
linetype = "dotted", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = combined_df |> filter(source == "c1"),
|
||||
aes(xintercept = -4),
|
||||
linetype = "3313", color = "black", linewidth = 1) +
|
||||
linetype = "3313", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = combined_df |> filter(source == "c2"),
|
||||
aes(xintercept = -99),
|
||||
linetype = "dotted", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = combined_df |> filter(source == "c2"),
|
||||
aes(xintercept = -4),
|
||||
linetype = "3313", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = combined_df |> filter(source == "c3"),
|
||||
aes(xintercept = -97),
|
||||
linetype = "dotted", color = "black", linewidth = 1) +
|
||||
linetype = "dotted", color = "black", linewidth = 0.5) +
|
||||
geom_vline(data = combined_df |> filter(source == "c3"),
|
||||
aes(xintercept = -3),
|
||||
linetype = "3313", color = "black", linewidth = 1) +
|
||||
geom_text(data = data.frame(source = "c1", relative_week = -40, lengthened_commit_count = 130),
|
||||
linetype = "3313", color = "black", linewidth = 0.5) +
|
||||
geom_text(data = data.frame(source = "c1", relative_week = -39, lengthened_commit_count = 130),
|
||||
aes(x = relative_week, y = lengthened_commit_count, label = "Opt-In Testing Deployment"),
|
||||
inherit.aes = FALSE, color = "black", size = 4) +
|
||||
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 1) + # Add vertical line at week 0
|
||||
geom_text(data = data.frame(source = "c1", relative_week = 7, lengthened_commit_count = 130),
|
||||
geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) + # Add vertical line at week 0
|
||||
geom_text(data = data.frame(source = "c2", relative_week = 7, lengthened_commit_count = 130),
|
||||
aes(x = relative_week, y = lengthened_commit_count, label = "Wide Deployment"),
|
||||
inherit.aes = FALSE, color = "black", size = 4) +
|
||||
geom_text(data = data.frame(source = "c3", relative_week = -15, lengthened_commit_count = 130),
|
||||
@ -81,10 +89,10 @@ commit_authors <- combined_df |>
|
||||
strip.text = element_text(size = 14)# Increase legend title font size
|
||||
) +
|
||||
facet_wrap(~source, nrow = 3, labeller = labeller(source = c(
|
||||
"c1" = "VisualEditor",
|
||||
"c2" = "HTTPS-as-default",
|
||||
"c3" = "HTTP-deprecation"
|
||||
"c1" = "VisualEditor (2013)",
|
||||
"c2" = "HTTPS-as-default (2013)",
|
||||
"c3" = "HTTP-deprecation (2015)"
|
||||
)))
|
||||
commit_authors
|
||||
|
||||
ggsave(filename = "ww-0501-tasks-faceted.png", plot = commit_authors, width = 15, height = 9, dpi = 800)
|
||||
ggsave(filename = "d1-m2-tasks-faceted.png", plot = commit_authors, width = 15, height = 9, dpi = 800)
|
||||
|
Loading…
Reference in New Issue
Block a user