caught duplicates in documents, re-running README topic model
This commit is contained in:
		
							parent
							
								
									f52ee087b7
								
							
						
					
					
						commit
						5ab4b58542
					
				
							
								
								
									
										
											BIN
										
									
								
								020325_readme_model.rda
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								020325_readme_model.rda
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										4248
									
								
								0203_readme_dweek_ranefs.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4248
									
								
								0203_readme_dweek_ranefs.csv
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										17
									
								
								mg-govdoc-cr_24004290.out
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								mg-govdoc-cr_24004290.out
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,17 @@
 | 
				
			|||||||
 | 
					1. SSH tunnel from your workstation using the following command:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   ssh -N -L 8787:n3434:42289 mjilg@klone.hyak.uw.edu
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   and point your web browser to http://localhost:8787
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2. log in to RStudio Server using the following credentials:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   user: mjilg
 | 
				
			||||||
 | 
					   password: K/bHQjx0xRAp26CGpsXM
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					When done using RStudio Server, terminate the job by:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
 | 
				
			||||||
 | 
					2. Issue the following command on the login node:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      scancel -f 24004290
 | 
				
			||||||
@ -22,7 +22,7 @@ all_gmodel <- glmer.nb(log1p_count ~ before_after * week_index + scaled_age + (b
 | 
				
			|||||||
                       data=readme_df)
 | 
					                       data=readme_df)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
summary(all_gmodel)
 | 
					summary(all_gmodel)
 | 
				
			||||||
#saveRDS(all_gmodel, "020125_readme_model.rda")
 | 
					#saveRDS(all_gmodel, "020325_readme_model.rda")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
model_residuals <- residuals(all_gmodel)
 | 
					model_residuals <- residuals(all_gmodel)
 | 
				
			||||||
acf(model_residuals)
 | 
					acf(model_residuals)
 | 
				
			||||||
@ -35,7 +35,7 @@ variance_components <- as.data.frame(VarCorr(all_gmodel))
 | 
				
			|||||||
library(broom.mixed)
 | 
					library(broom.mixed)
 | 
				
			||||||
library(ggplot2)
 | 
					library(ggplot2)
 | 
				
			||||||
condvals <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE)
 | 
					condvals <- broom.mixed::tidy(all_gmodel, effects = "ran_vals", conf.int = TRUE)
 | 
				
			||||||
glmer_ranef_Dweek <- test_condvals [which(test_condvals $term == "before_after:week_index"),]
 | 
					glmer_ranef_Dweek <- condvals [which(condvals $term == "before_after:week_index"),]
 | 
				
			||||||
has_zero <- function(estimate, low, high){
 | 
					has_zero <- function(estimate, low, high){
 | 
				
			||||||
  return(ifelse((low < 0),ifelse((high > 0), 1, 0), 2))
 | 
					  return(ifelse((low < 0),ifelse((high > 0), 1, 0), 2))
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@ -48,4 +48,4 @@ g <- glmer_ranef_Dweek |>
 | 
				
			|||||||
  theme_bw()
 | 
					  theme_bw()
 | 
				
			||||||
g
 | 
					g
 | 
				
			||||||
 | 
					
 | 
				
			||||||
write.csv(glmer_ranef_Dweek, "0201_readme_dweek_ranefs.csv")
 | 
					write.csv(glmer_ranef_Dweek, "0203_readme_dweek_ranefs.csv")
 | 
				
			||||||
 | 
				
			|||||||
@ -26,5 +26,14 @@ readme_topics_df <- readme_topics_df |>
 | 
				
			|||||||
  mutate(project_id = ifelse(filename=="jaraco_keyrings.alt_hullabaloo_README.rst", "jaraco_keyrings.alt", project_id)) |>
 | 
					  mutate(project_id = ifelse(filename=="jaraco_keyrings.alt_hullabaloo_README.rst", "jaraco_keyrings.alt", project_id)) |>
 | 
				
			||||||
  mutate(project_id = ifelse(filename=="_vcr_vcr_README.md", "vcr_vcr", project_id)) 
 | 
					  mutate(project_id = ifelse(filename=="_vcr_vcr_README.md", "vcr_vcr", project_id)) 
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
 | 
					filtered_topics <- readme_topics_df |>
 | 
				
			||||||
 | 
					  filter(project_id %in% summed_data$project_id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
merged_df <- inner_join(summed_data, readme_topics_df, by="project_id")
 | 
					merged_df <- inner_join(summed_data, readme_topics_df, by="project_id")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					multiple_mappings <- merged_df %>%
 | 
				
			||||||
 | 
					  group_by(project_id) %>%
 | 
				
			||||||
 | 
					  filter(n() > 1) %>%
 | 
				
			||||||
 | 
					  ungroup()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
merged_df$logged_commits <- log1p(merged_df$summed_count)
 | 
					merged_df$logged_commits <- log1p(merged_df$summed_count)
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user