contributing_model_done
This commit is contained in:
		
							parent
							
								
									4917c423c6
								
							
						
					
					
						commit
						f52ee087b7
					
				
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
										
											Binary file not shown.
										
									
								
							@ -16,10 +16,10 @@ library(lme4)
 | 
				
			|||||||
library(optimx)
 | 
					library(optimx)
 | 
				
			||||||
library(lattice)
 | 
					library(lattice)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#all_gmodel <- glmer.nb(log1p_count ~ before_after * week_index + scaled_age + (before_after * week_index | project_id),
 | 
					all_gmodel <- glmer.nb(log1p_count ~ before_after * week_index + scaled_age + (before_after * week_index | project_id),
 | 
				
			||||||
#                       control=glmerControl(optimizer="bobyqa",
 | 
					                       control=glmerControl(optimizer="bobyqa",
 | 
				
			||||||
#                                            optCtrl=list(maxfun=2e5)), nAGQ=0,
 | 
					                                            optCtrl=list(maxfun=2e5)), nAGQ=0,
 | 
				
			||||||
#                       data=readme_df)
 | 
					                       data=readme_df)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
summary(all_gmodel)
 | 
					summary(all_gmodel)
 | 
				
			||||||
#saveRDS(all_gmodel, "020125_readme_model.rda")
 | 
					#saveRDS(all_gmodel, "020125_readme_model.rda")
 | 
				
			||||||
@ -49,5 +49,3 @@ g <- glmer_ranef_Dweek |>
 | 
				
			|||||||
g
 | 
					g
 | 
				
			||||||
 | 
					
 | 
				
			||||||
write.csv(glmer_ranef_Dweek, "0201_readme_dweek_ranefs.csv")
 | 
					write.csv(glmer_ranef_Dweek, "0201_readme_dweek_ranefs.csv")
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
				
			|||||||
@ -6,6 +6,12 @@ library(rdd)
 | 
				
			|||||||
readme_df_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/README_weekly_count_data.csv"
 | 
					readme_df_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/README_weekly_count_data.csv"
 | 
				
			||||||
df = read.csv(readme_df_filepath, header = TRUE)
 | 
					df = read.csv(readme_df_filepath, header = TRUE)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#filtered_df <- df |>
 | 
				
			||||||
 | 
					#  filter(!project_id == "letsencrypt_letsencrypt")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#output_filepath <-"/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/README_weekly_count_data.csv"
 | 
				
			||||||
 | 
					#write.csv(filtered_df, output_filepath, row.names = FALSE)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#EDA 
 | 
					#EDA 
 | 
				
			||||||
var(df$commit_count) # 112.4945
 | 
					var(df$commit_count) # 112.4945
 | 
				
			||||||
mean(df$commit_count) # 2.431342
 | 
					mean(df$commit_count) # 2.431342
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										
											BIN
										
									
								
								topic-outcome-models/020325_CONTRIBUTING_commit_topic_model.rda
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								topic-outcome-models/020325_CONTRIBUTING_commit_topic_model.rda
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										37
									
								
								topic-outcome-models/contributing_topic_outcome_model.R
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								topic-outcome-models/contributing_topic_outcome_model.R
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,37 @@
 | 
				
			|||||||
 | 
					library(dplyr)
 | 
				
			||||||
 | 
					library(lubridate)
 | 
				
			||||||
 | 
					library(rdd)
 | 
				
			||||||
 | 
					library(stringr)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					contributing_count_data_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/CONTRIBUTING_weekly_count_data.csv"
 | 
				
			||||||
 | 
					contributing_count_df = read.csv(contributing_count_data_filepath, header = TRUE) 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					contributing_topic_dist_filepath <- "text_analysis/020125_CONTRIBUTING_file_topic_distributions.csv"
 | 
				
			||||||
 | 
					contributing_topics_df = read.csv(contributing_topic_dist_filepath, header = TRUE) 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					window_num <- 5
 | 
				
			||||||
 | 
					contributing_count_df <- contributing_count_df |>
 | 
				
			||||||
 | 
					  filter(week_index >= (- window_num) & week_index <= (window_num)) |>
 | 
				
			||||||
 | 
					  mutate(scaled_age = scale(age)) |>
 | 
				
			||||||
 | 
					  mutate(scaled_age_at_commit = scale(age_at_commit))|>
 | 
				
			||||||
 | 
					  mutate(log1p_count = log1p(commit_count))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					summed_data <- contributing_count_df |>
 | 
				
			||||||
 | 
					  filter(before_after == 1) |>
 | 
				
			||||||
 | 
					  group_by(project_id) |>
 | 
				
			||||||
 | 
					  summarise_at(vars(commit_count), list(summed_count=sum))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					contributing_topics_df <- contributing_topics_df |>
 | 
				
			||||||
 | 
					  mutate(project_id = sapply(str_split(filename, "_hullabaloo_"), `[`, 1)) |>
 | 
				
			||||||
 | 
					  mutate(project_id = ifelse(filename=="_vcr_vcr_CONTRIBUTING.md", "vcr_vcr", project_id)) |>
 | 
				
			||||||
 | 
					  mutate(project_id = ifelse(filename=="marshmallow-code_marshmallow.git_CONTRIBUTING.rst", "marshmallow-code_marshmallow.git", project_id))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					merged_df <- inner_join(summed_data, contributing_topics_df, by="project_id")
 | 
				
			||||||
 | 
					merged_df$logged_commits <- log1p(merged_df$summed_count)
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					library(MASS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					commit_outcome_model <- glm.nb(logged_commits ~ 0 + t0 + t1 + t2 + t3 + t4, data=merged_df)
 | 
				
			||||||
 | 
					qqnorm(residuals(commit_outcome_model))
 | 
				
			||||||
 | 
					summary(commit_outcome_model)
 | 
				
			||||||
 | 
					saveRDS(commit_outcome_model, "020325_commit_topic_model.rda")
 | 
				
			||||||
							
								
								
									
										30
									
								
								topic-outcome-models/readme_topic_outcome_model.R
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								topic-outcome-models/readme_topic_outcome_model.R
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,30 @@
 | 
				
			|||||||
 | 
					library(dplyr)
 | 
				
			||||||
 | 
					library(lubridate)
 | 
				
			||||||
 | 
					library(rdd)
 | 
				
			||||||
 | 
					library(stringr)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					readme_count_data_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/README_weekly_count_data.csv"
 | 
				
			||||||
 | 
					readme_count_df = read.csv(readme_count_data_filepath, header = TRUE) 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					readme_topic_dist_filepath <- "text_analysis/020125_README_file_topic_distributions.csv"
 | 
				
			||||||
 | 
					readme_topics_df = read.csv(readme_topic_dist_filepath, header = TRUE) 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					window_num <- 5
 | 
				
			||||||
 | 
					readme_count_df <- readme_count_df |>
 | 
				
			||||||
 | 
					  filter(week_index >= (- window_num) & week_index <= (window_num)) |>
 | 
				
			||||||
 | 
					  mutate(scaled_age = scale(age)) |>
 | 
				
			||||||
 | 
					  mutate(scaled_age_at_commit = scale(age_at_commit))|>
 | 
				
			||||||
 | 
					  mutate(log1p_count = log1p(commit_count))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					summed_data <- readme_count_df |>
 | 
				
			||||||
 | 
					  filter(before_after == 1) |>
 | 
				
			||||||
 | 
					  group_by(project_id) |>
 | 
				
			||||||
 | 
					  summarise_at(vars(commit_count), list(summed_count=sum))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					readme_topics_df <- readme_topics_df |>
 | 
				
			||||||
 | 
					  mutate(project_id = sapply(str_split(filename, "_hullabaloo_"), `[`, 1)) |> 
 | 
				
			||||||
 | 
					  mutate(project_id = ifelse(filename=="jaraco_keyrings.alt_hullabaloo_README.rst", "jaraco_keyrings.alt", project_id)) |>
 | 
				
			||||||
 | 
					  mutate(project_id = ifelse(filename=="_vcr_vcr_README.md", "vcr_vcr", project_id)) 
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					merged_df <- inner_join(summed_data, readme_topics_df, by="project_id")
 | 
				
			||||||
 | 
					merged_df$logged_commits <- log1p(merged_df$summed_count)
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user