finished commit/topic outcome model
This commit is contained in:
		
							parent
							
								
									ef5348013e
								
							
						
					
					
						commit
						6c4ed8461d
					
				
							
								
								
									
										
											BIN
										
									
								
								020325_CONTRIBUTING_commit_topic_model.rda
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								020325_CONTRIBUTING_commit_topic_model.rda
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							@ -6,7 +6,7 @@ library(stringr)
 | 
				
			|||||||
readme_count_data_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/README_weekly_count_data.csv"
 | 
					readme_count_data_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/README_weekly_count_data.csv"
 | 
				
			||||||
readme_count_df = read.csv(readme_count_data_filepath, header = TRUE) 
 | 
					readme_count_df = read.csv(readme_count_data_filepath, header = TRUE) 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
readme_topic_dist_filepath <- "text_analysis/020125_README_file_topic_distributions.csv"
 | 
					readme_topic_dist_filepath <- "text_analysis/020325_README_file_topic_distributions.csv"
 | 
				
			||||||
readme_topics_df = read.csv(readme_topic_dist_filepath, header = TRUE) 
 | 
					readme_topics_df = read.csv(readme_topic_dist_filepath, header = TRUE) 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
window_num <- 5
 | 
					window_num <- 5
 | 
				
			||||||
@ -26,14 +26,14 @@ readme_topics_df <- readme_topics_df |>
 | 
				
			|||||||
  mutate(project_id = ifelse(filename=="jaraco_keyrings.alt_hullabaloo_README.rst", "jaraco_keyrings.alt", project_id)) |>
 | 
					  mutate(project_id = ifelse(filename=="jaraco_keyrings.alt_hullabaloo_README.rst", "jaraco_keyrings.alt", project_id)) |>
 | 
				
			||||||
  mutate(project_id = ifelse(filename=="_vcr_vcr_README.md", "vcr_vcr", project_id)) 
 | 
					  mutate(project_id = ifelse(filename=="_vcr_vcr_README.md", "vcr_vcr", project_id)) 
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
filtered_topics <- readme_topics_df |>
 | 
					#loss of jaraco_keyring, though jaraco keyrings.alt is represented
 | 
				
			||||||
  filter(project_id %in% summed_data$project_id)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
merged_df <- inner_join(summed_data, readme_topics_df, by="project_id")
 | 
					merged_df <- inner_join(summed_data, readme_topics_df, by="project_id")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
multiple_mappings <- merged_df %>%
 | 
					 | 
				
			||||||
  group_by(project_id) %>%
 | 
					 | 
				
			||||||
  filter(n() > 1) %>%
 | 
					 | 
				
			||||||
  ungroup()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
merged_df$logged_commits <- log1p(merged_df$summed_count)
 | 
					merged_df$logged_commits <- log1p(merged_df$summed_count)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					library(MASS)
 | 
				
			||||||
 | 
					commit_outcome_model <- glm.nb(logged_commits ~ 0 + t0 + t1 + t2 + t3 + t4 + t5 + t6 + t7 + t8 + t9 + t10, data=merged_df)
 | 
				
			||||||
 | 
					qqnorm(residuals(commit_outcome_model))
 | 
				
			||||||
 | 
					summary(commit_outcome_model)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					saveRDS(commit_outcome_model, "020325_CONTRIBUTING_commit_topic_model.rda")
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user