20 lines
834 B
R
20 lines
834 B
R
library(dplyr)
|
|
count_path <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/metadata/co_0205_CONTRIBUTING_weekly_count_data.csv"
|
|
manifest_path <- "validation/020525_CONTRIBUTING_manifest.csv"
|
|
|
|
count_df <- read.csv(count_path)
|
|
manifest_df <- read.csv(manifest_path)
|
|
length(unique(count_df$project_id))
|
|
|
|
unique_project_ids <- unique(count_df$project_id)
|
|
missing_project_ids <- setdiff(unique_project_ids, manifest_df$repo_id)
|
|
missing_project_ids
|
|
|
|
|
|
count_df_filtered <- count_df %>% filter(!project_id %in% missing_project_ids)
|
|
length(unique(count_df_filtered$project_id))
|
|
|
|
missing_repo_ids <- setdiff(manifest_df$repo_id, count_df_filtered$project_id)
|
|
|
|
write.csv(count_df_filtered, "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/metadata/final_0207_CONTRIBUTING_weekly_count_data.csv", row.names =FALSE)
|