99 lines
4.2 KiB
R
99 lines
4.2 KiB
R
library(tidyverse)
|
|
#things to get:
|
|
# - delete old age column
|
|
# - normal age, in date
|
|
# - age from today in days
|
|
# - delta between first commit and document in days
|
|
#README Document updates
|
|
#loading in new ages
|
|
####RDD CSV
|
|
first_commit_df <- read_csv("../062424_did_first_commit_readme.csv")
|
|
first_commit_df_2 <- read_csv("../062424_did_first_commit_readme_2.csv")
|
|
first_commit_df <- rbind(first_commit_df, first_commit_df_2)
|
|
# need to first do an rbind with this data and the second file
|
|
# check with the head of the file/size of the file
|
|
old_rdd_readme <- read_csv("../final_data/deb_readme_did.csv")
|
|
old_rdd_readme <- merge(old_rdd_readme, first_commit_df, by="upstream_vcs_link")
|
|
new_rm_data <- old_rdd_readme |>
|
|
mutate(first_commit_dt = as.POSIXct(first_commit,
|
|
format = "%a %b %d %H:%M:%S %Y %z")) |>
|
|
mutate(age_in_days =
|
|
as.numeric(
|
|
difftime(
|
|
as.POSIXct("2024-06-24 00:00:00", format = "%Y-%m-%d %H:%M:%S"),
|
|
first_commit_dt,
|
|
units = "days"))) |>
|
|
mutate (event_gap =
|
|
as.numeric(
|
|
difftime(
|
|
as.POSIXct(event_date, format = "%Y-%m-%d %H:%M:%S"),
|
|
first_commit_dt,
|
|
units = "days")))
|
|
head(new_rm_data)
|
|
new_rm_data$age_of_project = NULL
|
|
head(new_rm_data)
|
|
write.csv(new_rm_data, file = "../final_data/deb_readme_did_updated.csv", row.names = FALSE)
|
|
####PopChange CSV
|
|
old_pop_readme <- read_csv("../final_data/deb_readme_pop_change.csv")
|
|
old_pop_readme <- merge(old_pop_readme, first_commit_df, by="upstream_vcs_link")
|
|
new_pop_data <- old_pop_readme |>
|
|
mutate(first_commit_dt = as.POSIXct(first_commit,
|
|
format = "%a %b %d %H:%M:%S %Y %z")) |>
|
|
mutate(age_in_days =
|
|
as.numeric(
|
|
difftime(
|
|
as.POSIXct("2024-06-24 00:00:00", format = "%Y-%m-%d %H:%M:%S"),
|
|
first_commit_dt,
|
|
units = "days"))) |>
|
|
mutate (event_gap =
|
|
as.numeric(
|
|
difftime(
|
|
as.POSIXct(event_date, format = "%Y-%m-%d %H:%M:%S"),
|
|
first_commit_dt,
|
|
units = "days")))
|
|
new_pop_data$age_of_project = NULL
|
|
head(new_pop_data)
|
|
write.csv(new_pop_data, file = "../final_data/deb_readme_pop_change_updated.csv", row.names = FALSE)
|
|
#CONTRIBUTING Document updates
|
|
first_commit_contrib <- read_csv("../062424_did_first_commit_contrib.csv")
|
|
####RDD CSV
|
|
old_rdd_contrib <- read_csv("../final_data/deb_contrib_did.csv")
|
|
old_rdd_contrib <- merge(old_rdd_contrib, first_commit_contrib, by="upstream_vcs_link")
|
|
new_rdd_contrib_data <- old_rdd_contrib |>
|
|
mutate(first_commit_dt = as.POSIXct(first_commit,
|
|
format = "%a %b %d %H:%M:%S %Y %z")) |>
|
|
mutate(age_in_days =
|
|
as.numeric(
|
|
difftime(
|
|
as.POSIXct("2024-06-24 00:00:00", format = "%Y-%m-%d %H:%M:%S"),
|
|
first_commit_dt,
|
|
units = "days"))) |>
|
|
mutate (event_gap =
|
|
as.numeric(
|
|
difftime(
|
|
as.POSIXct(event_date, format = "%Y-%m-%d %H:%M:%S"),
|
|
first_commit_dt,
|
|
units = "days")))
|
|
new_rdd_contrib_data$age_of_project = NULL
|
|
write.csv(new_rdd_contrib_data, file = "../final_data/deb_contrib_did_change_updated.csv", row.names = FALSE)
|
|
####PopChange CSV
|
|
old_pop_contrib <- read_csv("../final_data/deb_contrib_pop_change.csv")
|
|
old_pop_contrib <- merge(old_pop_contrib, first_commit_contrib, by="upstream_vcs_link")
|
|
new_pop_contrib_data <- old_pop_contrib |>
|
|
mutate(first_commit_dt = as.POSIXct(first_commit,
|
|
format = "%a %b %d %H:%M:%S %Y %z")) |>
|
|
mutate(age_in_days =
|
|
as.numeric(
|
|
difftime(
|
|
as.POSIXct("2024-06-24 00:00:00", format = "%Y-%m-%d %H:%M:%S"),
|
|
first_commit_dt,
|
|
units = "days"))) |>
|
|
mutate (event_gap =
|
|
as.numeric(
|
|
difftime(
|
|
as.POSIXct(event_date, format = "%Y-%m-%d %H:%M:%S"),
|
|
first_commit_dt,
|
|
units = "days")))
|
|
new_pop_contrib_data$age_of_project = NULL
|
|
write.csv(new_pop_contrib_data, file = "../final_data/deb_contrib_pop_change_updated.csv", row.names = FALSE)
|