updated work for some m2 writing tomorrow
This commit is contained in:
		
							parent
							
								
									3573afbc1a
								
							
						
					
					
						commit
						fd1479775d
					
				| @ -173,3 +173,6 @@ ls ../case3 | ||||
| cd .. | ||||
| ls | ||||
| ls case1 | ||||
| ls | ||||
| cd case2 | ||||
| ls | ||||
|  | ||||
| @ -46,35 +46,35 @@ new_unaff_authors <- new_authors_long_df |> | ||||
|              fill=commit_seniority)) + | ||||
|   geom_col(position='dodge') + | ||||
|   labs(x = "Relative Week", y = "Commits", fill="Contributor Tenure (New contributors <= 5 commits before deployment announcement)") + | ||||
|   geom_vline(data = long_df |> filter(source == "c1"),  | ||||
|   geom_vline(data = combined_df |> filter(source == "c1"),  | ||||
|              aes(xintercept = -29),  | ||||
|              linetype = "dotted", color = "black", linewidth = 1) + | ||||
|   geom_vline(data = long_df |> filter(source == "c1"),  | ||||
|              linetype = "dotted", color = "black", linewidth = 0.5) + | ||||
|   geom_vline(data = combined_df |> filter(source == "c1"),  | ||||
|              aes(xintercept = -9),  | ||||
|              linetype = "dotted", color = "black", linewidth = 1) + | ||||
|              linetype = "dotted", color = "black", linewidth = 0.5) + | ||||
|   geom_vline(data = combined_df |> filter(source == "c1"),  | ||||
|              aes(xintercept = -4),  | ||||
|              linetype = "3313", color = "black", linewidth = 1) + | ||||
|   geom_vline(data = long_df |> filter(source == "c2"),  | ||||
|              aes(xintercept = -99),  | ||||
|              linetype = "dotted", color = "black", linewidth = 1) + | ||||
|              linetype = "3313", color = "black", linewidth = 0.5) + | ||||
|   geom_vline(data = combined_df |> filter(source == "c2"),  | ||||
|              aes(xintercept = -4),  | ||||
|              linetype = "3313", color = "black", linewidth = 1) + | ||||
|   geom_vline(data = long_df |> filter(source == "c3"),  | ||||
|              aes(xintercept = -99),  | ||||
|              linetype = "dotted", color = "black", linewidth = 0.5) + | ||||
|   geom_vline(data = combined_df |> filter(source == "c2"),  | ||||
|              aes(xintercept = -4),    | ||||
|              linetype = "3313", color = "black", linewidth = 0.5) + | ||||
|   geom_vline(data = combined_df |> filter(source == "c3"),  | ||||
|              aes(xintercept = -97),  | ||||
|              linetype = "dotted", color = "black", linewidth = 1) + | ||||
|              linetype = "dotted", color = "black", linewidth = 0.5) + | ||||
|   geom_vline(data = combined_df |> filter(source == "c3"),  | ||||
|              aes(xintercept = -3),  | ||||
|              linetype = "3313", color = "black", linewidth = 1) + | ||||
|   geom_text(data = data.frame(source = "c1", relative_week = -40, lengthened_commit_count = 90),  | ||||
|              linetype = "3313", color = "black", linewidth = 0.5) + | ||||
|   geom_text(data = data.frame(source = "c1", relative_week = -39, lengthened_commit_count = 80),  | ||||
|             aes(x = relative_week, y = lengthened_commit_count, label = "Opt-In Testing Deployment"),  | ||||
|             inherit.aes = FALSE, color = "black", size = 4) + | ||||
|   geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 1) + # Add vertical line at week 0 | ||||
|   geom_text(data = data.frame(source = "c1", relative_week = 7, lengthened_commit_count = 90),  | ||||
|   geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) + # Add vertical line at week 0 | ||||
|   geom_text(data = data.frame(source = "c2", relative_week = 7, lengthened_commit_count = 80),  | ||||
|             aes(x = relative_week, y = lengthened_commit_count, label = "Wide Deployment"),  | ||||
|             inherit.aes = FALSE, color = "black", size = 4) + | ||||
|   geom_text(data = data.frame(source = "c3", relative_week = -15, lengthened_commit_count = 100),  | ||||
|   geom_text(data = data.frame(source = "c3", relative_week = -15, lengthened_commit_count = 80),  | ||||
|             aes(x = relative_week, y = lengthened_commit_count, label = "Wide Deployment Announcement"),  | ||||
|             inherit.aes = FALSE, color = "black", size = 4) + | ||||
|   scale_fill_manual(values = c("returning_unaff_commit_count" = "#FFC107",  # Color for "Returning Contributors" | ||||
| @ -96,14 +96,14 @@ new_unaff_authors <- new_authors_long_df |> | ||||
|     strip.text = element_text(size = 14)# Increase legend title font size | ||||
|   ) + | ||||
|   facet_wrap(~source, nrow = 3, labeller = labeller(source = c( | ||||
|     "c1" = "VisualEditor", | ||||
|     "c2" = "HTTPS-as-default", | ||||
|     "c3" = "HTTP-deprecation" | ||||
|     "c1" = "VisualEditor (2013)", | ||||
|     "c2" = "HTTPS-as-default (2013)", | ||||
|     "c3" = "HTTP-deprecation (2015)" | ||||
|   ))) | ||||
| 
 | ||||
| new_unaff_authors | ||||
| 
 | ||||
| ggsave(filename = "ww-0501-bot-commits-faceted.png", plot = new_unaff_authors, width = 15, height = 9, dpi = 800) | ||||
| ggsave(filename = "d1-m2-bot-commits-faceted.png", plot = new_unaff_authors, width = 15, height = 9, dpi = 800) | ||||
| 
 | ||||
| unaff_authors <- new_authors_long_df |> | ||||
|   ggplot(aes(x=relative_week, | ||||
|  | ||||
| @ -38,25 +38,37 @@ commit_authors <- long_df |> | ||||
|              fill=factor(commit_type))) + | ||||
|   geom_col(position='dodge') + | ||||
|   labs(x = "Relative Week", y = "Commits", fill="Commit Author") + | ||||
|   geom_vline(data = long_df |> filter(source == "c1"),  | ||||
|   geom_vline(data = combined_df |> filter(source == "c1"),  | ||||
|              aes(xintercept = -29),  | ||||
|              linetype = "dotted", color = "black", linewidth = 1) + | ||||
|   geom_vline(data = long_df |> filter(source == "c1"),  | ||||
|              linetype = "dotted", color = "black", linewidth = 0.5) + | ||||
|   geom_vline(data = combined_df |> filter(source == "c1"),  | ||||
|              aes(xintercept = -9),  | ||||
|              linetype = "dotted", color = "black", linewidth = 1) + | ||||
|   geom_vline(data = long_df |> filter(source == "c2"),  | ||||
|              linetype = "dotted", color = "black", linewidth = 0.5) + | ||||
|   geom_vline(data = combined_df |> filter(source == "c1"),  | ||||
|              aes(xintercept = -4),  | ||||
|              linetype = "3313", color = "black", linewidth = 0.5) + | ||||
|   geom_vline(data = combined_df |> filter(source == "c2"),  | ||||
|              aes(xintercept = -99),  | ||||
|              linetype = "dotted", color = "black", linewidth = 1) + | ||||
|   geom_vline(data = long_df |> filter(source == "c3"),  | ||||
|              linetype = "dotted", color = "black", linewidth = 0.5) + | ||||
|   geom_vline(data = combined_df |> filter(source == "c2"),  | ||||
|              aes(xintercept = -4),    | ||||
|              linetype = "3313", color = "black", linewidth = 0.5) + | ||||
|   geom_vline(data = combined_df |> filter(source == "c3"),  | ||||
|              aes(xintercept = -97),  | ||||
|              linetype = "dotted", color = "black", linewidth = 1) + | ||||
|   geom_text(data = data.frame(source = "c1", relative_week = -40, lengthened_commit_count = 50),  | ||||
|              linetype = "dotted", color = "black", linewidth = 0.5) + | ||||
|   geom_vline(data = combined_df |> filter(source == "c3"),  | ||||
|              aes(xintercept = -3),  | ||||
|              linetype = "3313", color = "black", linewidth = 0.5) + | ||||
|   geom_text(data = data.frame(source = "c1", relative_week = -39, lengthened_commit_count = 50),  | ||||
|             aes(x = relative_week, y = lengthened_commit_count, label = "Opt-In Testing Deployment"),  | ||||
|             inherit.aes = FALSE, color = "black", size = 4) + | ||||
|   geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 1) + # Add vertical line at week 0 | ||||
|   geom_text(data = data.frame(source = "c1", relative_week = 7, lengthened_commit_count = 50),  | ||||
|   geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) + # Add vertical line at week 0 | ||||
|   geom_text(data = data.frame(source = "c2", relative_week = 7, lengthened_commit_count = 50),  | ||||
|             aes(x = relative_week, y = lengthened_commit_count, label = "Wide Deployment"),  | ||||
|             inherit.aes = FALSE, color = "black", size = 4) + | ||||
|   geom_text(data = data.frame(source = "c3", relative_week = -15, lengthened_commit_count = 50),  | ||||
|             aes(x = relative_week, y = lengthened_commit_count, label = "Wide Deployment Announcement"),  | ||||
|             inherit.aes = FALSE, color = "black", size = 4) + | ||||
|   scale_fill_manual(values = affiliationColors, | ||||
|                     labels = c("unaff_commit_count" = "Unaffiliated", | ||||
|                                "wikimedia_commit_count" = "WMF-affiliated")) + | ||||
| @ -74,10 +86,10 @@ commit_authors <- long_df |> | ||||
|     strip.text = element_text(size = 14)# Increase legend title font size | ||||
|   ) + | ||||
|   facet_wrap(~source, nrow = 3, labeller = labeller(source = c( | ||||
|     "c1" = "VisualEditor (commits to extensions/visualeditor)", | ||||
|     "c2" = "HTTPS-as-default (relevant commits to mediawiki/core)", | ||||
|     "c3" = "HTTP-deprecation (relevant commits to mediawiki/core)" | ||||
|     "c1" = "VisualEditor (2013) [commits to extensions/visualeditor]", | ||||
|     "c2" = "HTTPS-as-default (2013) [relevant commits to mediawiki/core]", | ||||
|     "c3" = "HTTP-deprecation (2015) [relevant commits to mediawiki/core]" | ||||
|   ))) | ||||
| commit_authors | ||||
| 
 | ||||
| ggsave(filename = "ww-0501-commits-faceted.png", plot = commit_authors, width = 15, height = 9, dpi = 800) | ||||
| ggsave(filename = "d1-m2-commits-faceted.png", plot = commit_authors, width = 15, height = 9, dpi = 800) | ||||
|  | ||||
							
								
								
									
										
											BIN
										
									
								
								m2-figures/d1-m2-bot-commits-faceted.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								m2-figures/d1-m2-bot-commits-faceted.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 781 KiB | 
							
								
								
									
										
											BIN
										
									
								
								m2-figures/d1-m2-commits-faceted.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								m2-figures/d1-m2-commits-faceted.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 774 KiB | 
							
								
								
									
										
											BIN
										
									
								
								m2-figures/d1-m2-tasks-faceted.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								m2-figures/d1-m2-tasks-faceted.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 743 KiB | 
| @ -1,18 +1,17 @@ | ||||
| 1. SSH tunnel from your workstation using the following command: | ||||
| 
 | ||||
|    ssh -N -L 8787:n3439:39175 mjilg@klone.hyak.uw.edu | ||||
|    ssh -N -L 8787:n3439:38329 mjilg@klone.hyak.uw.edu | ||||
| 
 | ||||
|    and point your web browser to http://localhost:8787 | ||||
| 
 | ||||
| 2. log in to RStudio Server using the following credentials: | ||||
| 
 | ||||
|    user: mjilg | ||||
|    password: twImEJor5ex498HTzJjx | ||||
|    password: YXXLCjS/064zAiagiRdx | ||||
| 
 | ||||
| When done using RStudio Server, terminate the job by: | ||||
| 
 | ||||
| 1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window) | ||||
| 2. Issue the following command on the login node: | ||||
| 
 | ||||
|       scancel -f 25681892 | ||||
| slurmstepd: error: *** JOB 25681892 ON n3439 CANCELLED AT 2025-05-01T23:08:23 DUE TO TIME LIMIT *** | ||||
|       scancel -f 26402644 | ||||
| @ -80,13 +80,13 @@ | ||||
|      "name": "stderr", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "/tmp/ipykernel_55861/3758790231.py:41: SettingWithCopyWarning: \n", | ||||
|       "/tmp/ipykernel_76053/3758790231.py:41: SettingWithCopyWarning: \n", | ||||
|       "A value is trying to be set on a copy of a slice from a DataFrame.\n", | ||||
|       "Try using .loc[row_indexer,col_indexer] = value instead\n", | ||||
|       "\n", | ||||
|       "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", | ||||
|       "  mid_comment_phab_df['is_relevant'] = mid_comment_phab_df['conversation_id'].isin(relevant_conversation_ids)\n", | ||||
|       "/tmp/ipykernel_55861/3758790231.py:44: SettingWithCopyWarning: \n", | ||||
|       "/tmp/ipykernel_76053/3758790231.py:44: SettingWithCopyWarning: \n", | ||||
|       "A value is trying to be set on a copy of a slice from a DataFrame.\n", | ||||
|       "Try using .loc[row_indexer,col_indexer] = value instead\n", | ||||
|       "\n", | ||||
| @ -148,7 +148,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 6, | ||||
|    "execution_count": 8, | ||||
|    "id": "942344db-c8f5-4ed6-a757-c97f8454f18b", | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
| @ -172,6 +172,29 @@ | ||||
|     "print(f\"Unique speakers: {unique_speakers}\")" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 9, | ||||
|    "id": "b9229ca3-afb9-4eec-a173-f30be8c4729b", | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "given_date = pd.Timestamp(\"2013-08-28\").tz_localize(None)\n", | ||||
|     "task_phab_df['timestamp'] = pd.to_datetime(task_phab_df['timestamp'], unit='s').dt.tz_localize(None)\n", | ||||
|     "task_phab_df['week_bin'] = ((task_phab_df['timestamp'] - given_date).dt.days // 7)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 10, | ||||
|    "id": "24205386-d18f-4fb7-b37d-e81c0a5ba532", | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "task_phab_df\n", | ||||
|     "task_phab_df.to_csv(\"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/phab_tasks.csv\", index=False)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 7, | ||||
| @ -1024,7 +1047,7 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.7.12" | ||||
|    "version": "3.11.11" | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  | ||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										1160
									
								
								phab_analysis/case2/c2_resolved_phab.ipynb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1160
									
								
								phab_analysis/case2/c2_resolved_phab.ipynb
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @ -24,7 +24,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 2, | ||||
|    "execution_count": 3, | ||||
|    "id": "f6448c6f-2b5d-45f5-a32e-b3b47c16ef85", | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
| @ -35,7 +35,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 3, | ||||
|    "execution_count": 4, | ||||
|    "id": "e30e81ad", | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
| @ -75,7 +75,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 4, | ||||
|    "execution_count": 5, | ||||
|    "id": "f359805f", | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
| @ -149,7 +149,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 5, | ||||
|    "execution_count": 6, | ||||
|    "id": "ffd0b263", | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
| @ -175,17 +175,17 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 6, | ||||
|    "execution_count": 9, | ||||
|    "id": "f32f6eed-3aeb-4b05-8d40-7ed85e7235c5", | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "<spacy_experimental.coref.span_resolver_component.SpanResolver at 0x1495ecba4bb0>" | ||||
|        "<spacy_experimental.coref.span_resolver_component.SpanResolver at 0x14cab225fd00>" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 6, | ||||
|      "execution_count": 9, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
| @ -206,7 +206,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 7, | ||||
|    "execution_count": 10, | ||||
|    "id": "a5b062d8-2d26-4a3e-a84c-ba0eaf6eb436", | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
| @ -220,26 +220,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 6, | ||||
|    "id": "424d35e0", | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "data": { | ||||
|       "text/plain": [ | ||||
|        "John is frustrated with the VisualEditor project, he thinks it doesn't work." | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 6, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
|    ], | ||||
|    "source": [] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 8, | ||||
|    "execution_count": 11, | ||||
|    "id": "999e1656-0036-4ba2-bedf-f54493f67790", | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
| @ -285,7 +266,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 9, | ||||
|    "execution_count": 12, | ||||
|    "id": "be476647-624b-4e95-ab62-9c6b08f85368", | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
| @ -298,7 +279,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 10, | ||||
|    "execution_count": 13, | ||||
|    "id": "a9628b54-a1df-49cd-a365-9cba59de3421", | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
| @ -308,7 +289,7 @@ | ||||
|        "'i hate ve.interface, ve.interface always messes up i browser'" | ||||
|       ] | ||||
|      }, | ||||
|      "execution_count": 10, | ||||
|      "execution_count": 13, | ||||
|      "metadata": {}, | ||||
|      "output_type": "execute_result" | ||||
|     } | ||||
| @ -334,54 +315,13 @@ | ||||
|       "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", | ||||
|       "  \"\"\"Entry point for launching an IPython kernel.\n", | ||||
|       "Token indices sequence length is longer than the specified maximum sequence length for this model (911 > 512). Running this sequence through the model will result in indexing errors\n", | ||||
|       "Token indices sequence length is longer than the specified maximum sequence length for this model (911 > 512). Running this sequence through the model will result in indexing errors\n", | ||||
|       "Token indices sequence length is longer than the specified maximum sequence length for this model (904 > 512). Running this sequence through the model will result in indexing errors\n" | ||||
|       "Token indices sequence length is longer than the specified maximum sequence length for this model (911 > 512). Running this sequence through the model will result in indexing errors\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "comment_phab_df['text'] = comment_phab_df['comment_text'].apply(str)\n", | ||||
|     "comment_phab_df['resolved_text'] = comment_phab_df['text'].apply(resolving_comment)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 15, | ||||
|    "id": "2b583feb-1c62-4c96-9ba0-2996d72e70d3", | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "ename": "KeyError", | ||||
|      "evalue": "46088", | ||||
|      "output_type": "error", | ||||
|      "traceback": [ | ||||
|       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | ||||
|       "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)", | ||||
|       "\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m   3360\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3361\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3362\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | ||||
|       "\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", | ||||
|       "\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", | ||||
|       "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n", | ||||
|       "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[0;34m()\u001b[0m\n", | ||||
|       "\u001b[0;31mKeyError\u001b[0m: 46088", | ||||
|       "\nThe above exception was the direct cause of the following exception:\n", | ||||
|       "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)", | ||||
|       "\u001b[0;32m/tmp/ipykernel_61233/1116300830.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcomment_phab_df\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'resolved_text'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m46088\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | ||||
|       "\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m    940\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    941\u001b[0m         \u001b[0;32melif\u001b[0m \u001b[0mkey_is_scalar\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 942\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    943\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    944\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mis_hashable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | ||||
|       "\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m_get_value\u001b[0;34m(self, label, takeable)\u001b[0m\n\u001b[1;32m   1049\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1050\u001b[0m         \u001b[0;31m# Similar to Index.get_value, but we do not fall back to positional\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1051\u001b[0;31m         \u001b[0mloc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1052\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_values_for_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1053\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | ||||
|       "\u001b[0;32m/gscratch/scrubbed/mjilg/envs/coref2-notebook/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m   3361\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3362\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3363\u001b[0;31m                 \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3364\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3365\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0misna\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhasnans\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | ||||
|       "\u001b[0;31mKeyError\u001b[0m: 46088" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "source": [] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "id": "92bf47ae", | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "comment_phab_df['resolved_text'] = comment_phab_df['text'].apply(resolving_comment)\n", | ||||
|     "comment_phab_df.to_csv(\"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/051825_coref_rel_phab_comments.csv\", index=False)" | ||||
|    ] | ||||
|   } | ||||
| @ -402,7 +342,7 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.7.12" | ||||
|    "version": "3.11.11" | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  | ||||
| @ -148,7 +148,7 @@ | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 6, | ||||
|    "execution_count": 8, | ||||
|    "id": "942344db-c8f5-4ed6-a757-c97f8454f18b", | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
| @ -172,6 +172,29 @@ | ||||
|     "print(f\"Unique speakers: {unique_speakers}\")" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 9, | ||||
|    "id": "0ef35632-ed07-478e-94ab-525169b82783", | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "given_date = pd.Timestamp(\"2015-07-02\").tz_localize(None)\n", | ||||
|     "task_phab_df['timestamp'] = pd.to_datetime(task_phab_df['timestamp'], unit='s').dt.tz_localize(None)\n", | ||||
|     "task_phab_df['week_bin'] = ((task_phab_df['timestamp'] - given_date).dt.days // 7)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 10, | ||||
|    "id": "1e7bda13-4c2d-413e-b3c6-9c4b38e6cb07", | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "task_phab_df\n", | ||||
|     "task_phab_df.to_csv(\"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/phab_tasks.csv\", index=False)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 7, | ||||
|  | ||||
| @ -1168,7 +1168,7 @@ | ||||
|    "name": "python", | ||||
|    "nbconvert_exporter": "python", | ||||
|    "pygments_lexer": "ipython3", | ||||
|    "version": "3.11.11" | ||||
|    "version": "3.7.12" | ||||
|   } | ||||
|  }, | ||||
|  "nbformat": 4, | ||||
|  | ||||
| @ -3,6 +3,9 @@ library(tidyverse) | ||||
| c1_count <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case1/phab_tasks.csv" | ||||
| c1_input_df <- read.csv(c1_count , header = TRUE)  | ||||
| 
 | ||||
| c2_count <- "/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case2/phab_tasks.csv" | ||||
| c2_input_df <- read.csv(c2_count , header = TRUE)  | ||||
| 
 | ||||
| c3_count <-"/mmfs1/gscratch/comdata/users/mjilg/mw-repo-lifecycles/case3/phab_tasks.csv" | ||||
| c3_input_df <- read.csv(c3_count , header = TRUE)  | ||||
| 
 | ||||
| @ -10,17 +13,16 @@ c1_unique_counts <- c1_input_df %>% | ||||
|   group_by(meta.affil, week_bin) %>% | ||||
|   summarise(unique_count = n_distinct(conversation_id), .groups = "drop") | ||||
| 
 | ||||
| c2_unique_counts <- c2_input_df %>% | ||||
|   group_by(meta.affil, week_bin) %>% | ||||
|   summarise(unique_count = n_distinct(conversation_id), .groups = "drop") | ||||
| 
 | ||||
| c3_unique_counts <- c3_input_df %>% | ||||
|   group_by(meta.affil, week_bin) %>% | ||||
|   summarise(unique_count = n_distinct(conversation_id), .groups = "drop") | ||||
| 
 | ||||
| c1_unique_counts <- c1_unique_counts%>% mutate(source = "c1") | ||||
| c2_unique_counts <- data.frame( | ||||
|   meta.affil = rep("False", 117),  | ||||
|   week_bin = -103:13,           | ||||
|   unique_count = rep(0, 117),   | ||||
|   source = rep("c2", 117)        | ||||
| ) | ||||
| c2_unique_counts <- c2_unique_counts %>% mutate(source = "c2") | ||||
| c3_unique_counts <- c3_unique_counts %>% mutate(source = "c3") | ||||
| 
 | ||||
| combined_df <- bind_rows(c1_unique_counts, c2_unique_counts, c3_unique_counts) | ||||
| @ -37,28 +39,34 @@ commit_authors <- combined_df |> | ||||
|   ggplot(aes(x=week_bin,  | ||||
|              y=unique_count,  | ||||
|              fill=factor(meta.affil))) + | ||||
|   geom_col(position='dodge') + | ||||
|   labs(x = "Relative Week", y = "Tasks", fill="Task Author") + | ||||
|   geom_col(position='dodge2') + | ||||
|   labs(x = "Relative Week", y = "New Tasks Created", fill="Task Author") + | ||||
|   geom_vline(data = combined_df |> filter(source == "c1"),  | ||||
|              aes(xintercept = -29),  | ||||
|              linetype = "dotted", color = "black", linewidth = 1) + | ||||
|              linetype = "dotted", color = "black", linewidth = 0.5) + | ||||
|   geom_vline(data = combined_df |> filter(source == "c1"),  | ||||
|              aes(xintercept = -9),  | ||||
|              linetype = "dotted", color = "black", linewidth = 1) + | ||||
|              linetype = "dotted", color = "black", linewidth = 0.5) + | ||||
|   geom_vline(data = combined_df |> filter(source == "c1"),  | ||||
|              aes(xintercept = -4),  | ||||
|              linetype = "3313", color = "black", linewidth = 1) + | ||||
|              linetype = "3313", color = "black", linewidth = 0.5) + | ||||
|   geom_vline(data = combined_df |> filter(source == "c2"),  | ||||
|              aes(xintercept = -99),  | ||||
|              linetype = "dotted", color = "black", linewidth = 0.5) + | ||||
|   geom_vline(data = combined_df |> filter(source == "c2"),  | ||||
|              aes(xintercept = -4),    | ||||
|              linetype = "3313", color = "black", linewidth = 0.5) + | ||||
|   geom_vline(data = combined_df |> filter(source == "c3"),  | ||||
|              aes(xintercept = -97),  | ||||
|              linetype = "dotted", color = "black", linewidth = 1) + | ||||
|              linetype = "dotted", color = "black", linewidth = 0.5) + | ||||
|   geom_vline(data = combined_df |> filter(source == "c3"),  | ||||
|              aes(xintercept = -3),  | ||||
|              linetype = "3313", color = "black", linewidth = 1) + | ||||
|   geom_text(data = data.frame(source = "c1", relative_week = -40, lengthened_commit_count = 130),  | ||||
|              linetype = "3313", color = "black", linewidth = 0.5) + | ||||
|   geom_text(data = data.frame(source = "c1", relative_week = -39, lengthened_commit_count = 130),  | ||||
|             aes(x = relative_week, y = lengthened_commit_count, label = "Opt-In Testing Deployment"),  | ||||
|             inherit.aes = FALSE, color = "black", size = 4) + | ||||
|   geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 1) + # Add vertical line at week 0 | ||||
|   geom_text(data = data.frame(source = "c1", relative_week = 7, lengthened_commit_count = 130),  | ||||
|   geom_vline(xintercept = 0, linetype = "dashed", color = "black", linewidth = 0.5) + # Add vertical line at week 0 | ||||
|   geom_text(data = data.frame(source = "c2", relative_week = 7, lengthened_commit_count = 130),  | ||||
|             aes(x = relative_week, y = lengthened_commit_count, label = "Wide Deployment"),  | ||||
|             inherit.aes = FALSE, color = "black", size = 4) + | ||||
|   geom_text(data = data.frame(source = "c3", relative_week = -15, lengthened_commit_count = 130),  | ||||
| @ -81,10 +89,10 @@ commit_authors <- combined_df |> | ||||
|     strip.text = element_text(size = 14)# Increase legend title font size | ||||
|   ) + | ||||
|   facet_wrap(~source, nrow = 3, labeller = labeller(source = c( | ||||
|     "c1" = "VisualEditor", | ||||
|     "c2" = "HTTPS-as-default", | ||||
|     "c3" = "HTTP-deprecation" | ||||
|     "c1" = "VisualEditor (2013)", | ||||
|     "c2" = "HTTPS-as-default (2013)", | ||||
|     "c3" = "HTTP-deprecation (2015)" | ||||
|   ))) | ||||
| commit_authors | ||||
| 
 | ||||
| ggsave(filename = "ww-0501-tasks-faceted.png", plot = commit_authors, width = 15, height = 9, dpi = 800) | ||||
| ggsave(filename = "d1-m2-tasks-faceted.png", plot = commit_authors, width = 15, height = 9, dpi = 800) | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user