24_deb_pkg_gov/R/data_updates.R

99 lines
4.2 KiB
R
Raw Permalink Normal View History

library(tidyverse)
#things to get:
# - delete old age column
# - normal age, in date
# - age from today in days
# - delta between first commit and document in days
#README Document updates
#loading in new ages
####RDD CSV
first_commit_df <- read_csv("../062424_did_first_commit_readme.csv")
first_commit_df_2 <- read_csv("../062424_did_first_commit_readme_2.csv")
first_commit_df <- rbind(first_commit_df, first_commit_df_2)
# need to first do an rbind with this data and the second file
# check with the head of the file/size of the file
old_rdd_readme <- read_csv("../final_data/deb_readme_did.csv")
old_rdd_readme <- merge(old_rdd_readme, first_commit_df, by="upstream_vcs_link")
new_rm_data <- old_rdd_readme |>
mutate(first_commit_dt = as.POSIXct(first_commit,
format = "%a %b %d %H:%M:%S %Y %z")) |>
mutate(age_in_days =
as.numeric(
difftime(
as.POSIXct("2024-06-24 00:00:00", format = "%Y-%m-%d %H:%M:%S"),
first_commit_dt,
units = "days"))) |>
mutate (event_gap =
as.numeric(
difftime(
as.POSIXct(event_date, format = "%Y-%m-%d %H:%M:%S"),
first_commit_dt,
units = "days")))
head(new_rm_data)
new_rm_data$age_of_project = NULL
head(new_rm_data)
write.csv(new_rm_data, file = "../final_data/deb_readme_did_updated.csv", row.names = FALSE)
####PopChange CSV
old_pop_readme <- read_csv("../final_data/deb_readme_pop_change.csv")
old_pop_readme <- merge(old_pop_readme, first_commit_df, by="upstream_vcs_link")
new_pop_data <- old_pop_readme |>
mutate(first_commit_dt = as.POSIXct(first_commit,
format = "%a %b %d %H:%M:%S %Y %z")) |>
mutate(age_in_days =
as.numeric(
difftime(
as.POSIXct("2024-06-24 00:00:00", format = "%Y-%m-%d %H:%M:%S"),
first_commit_dt,
units = "days"))) |>
mutate (event_gap =
as.numeric(
difftime(
as.POSIXct(event_date, format = "%Y-%m-%d %H:%M:%S"),
first_commit_dt,
units = "days")))
new_pop_data$age_of_project = NULL
head(new_pop_data)
write.csv(new_pop_data, file = "../final_data/deb_readme_pop_change_updated.csv", row.names = FALSE)
#CONTRIBUTING Document updates
first_commit_contrib <- read_csv("../062424_did_first_commit_contrib.csv")
####RDD CSV
old_rdd_contrib <- read_csv("../final_data/deb_contrib_did.csv")
old_rdd_contrib <- merge(old_rdd_contrib, first_commit_contrib, by="upstream_vcs_link")
new_rdd_contrib_data <- old_rdd_contrib |>
mutate(first_commit_dt = as.POSIXct(first_commit,
format = "%a %b %d %H:%M:%S %Y %z")) |>
mutate(age_in_days =
as.numeric(
difftime(
as.POSIXct("2024-06-24 00:00:00", format = "%Y-%m-%d %H:%M:%S"),
first_commit_dt,
units = "days"))) |>
mutate (event_gap =
as.numeric(
difftime(
as.POSIXct(event_date, format = "%Y-%m-%d %H:%M:%S"),
first_commit_dt,
units = "days")))
new_rdd_contrib_data$age_of_project = NULL
write.csv(new_rdd_contrib_data, file = "../final_data/deb_contrib_did_change_updated.csv", row.names = FALSE)
####PopChange CSV
old_pop_contrib <- read_csv("../final_data/deb_contrib_pop_change.csv")
old_pop_contrib <- merge(old_pop_contrib, first_commit_contrib, by="upstream_vcs_link")
new_pop_contrib_data <- old_pop_contrib |>
mutate(first_commit_dt = as.POSIXct(first_commit,
format = "%a %b %d %H:%M:%S %Y %z")) |>
mutate(age_in_days =
as.numeric(
difftime(
as.POSIXct("2024-06-24 00:00:00", format = "%Y-%m-%d %H:%M:%S"),
first_commit_dt,
units = "days"))) |>
mutate (event_gap =
as.numeric(
difftime(
as.POSIXct(event_date, format = "%Y-%m-%d %H:%M:%S"),
first_commit_dt,
units = "days")))
new_pop_contrib_data$age_of_project = NULL
write.csv(new_pop_contrib_data, file = "../final_data/deb_contrib_pop_change_updated.csv", row.names = FALSE)