updating data with new initial commit dates
This commit is contained in:
parent
033a0a2bee
commit
765e18738e
62
R/data_updates.R
Normal file
62
R/data_updates.R
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
library(tidyverse)
|
||||||
|
#things to get:
|
||||||
|
# - delete old age column
|
||||||
|
# - normal age, in date
|
||||||
|
# - age from today in days
|
||||||
|
# - delta between first commit and document in days
|
||||||
|
#README Document updates
|
||||||
|
#loading in new ages
|
||||||
|
####RDD CSV
|
||||||
|
first_commit_df <- read_csv("../062424_did_first_commit_readme.csv")
|
||||||
|
first_commit_df_2 <- read_csv("../062424_did_first_commit_readme_2.csv")
|
||||||
|
first_commit_df <- rbind(first_commit_df, first_commit_df_2)
|
||||||
|
# need to first do an rbind with this data and the second file
|
||||||
|
# check with the head of the file/size of the file
|
||||||
|
old_rdd_readme <- read_csv("../final_data/deb_readme_did.csv")
|
||||||
|
old_rdd_readme <- merge(old_rdd_readme, first_commit_df, by="upstream_vcs_link")
|
||||||
|
new_rm_data <- old_rdd_readme |>
|
||||||
|
mutate(first_commit_dt = as.POSIXct(first_commit,
|
||||||
|
format = "%a %b %d %H:%M:%S %Y %z")) |>
|
||||||
|
mutate(age_in_days =
|
||||||
|
as.numeric(
|
||||||
|
difftime(
|
||||||
|
as.POSIXct("2024-06-24 00:00:00", format = "%Y-%m-%d %H:%M:%S"),
|
||||||
|
first_commit_dt,
|
||||||
|
units = "days"))) |>
|
||||||
|
mutate (event_gap =
|
||||||
|
as.numeric(
|
||||||
|
difftime(
|
||||||
|
as.POSIXct(event_date, format = "%Y-%m-%d %H:%M:%S"),
|
||||||
|
first_commit_dt,
|
||||||
|
units = "days")))
|
||||||
|
head(new_rm_data)
|
||||||
|
new_rm_data$age_of_project = NULL
|
||||||
|
head(new_rm_data)
|
||||||
|
write.csv(new_rm_data, file = "../final_data/deb_readme_did_updated.csv", row.names = FALSE)
|
||||||
|
####PopChange CSV
|
||||||
|
old_pop_readme <- read_csv("../final_data/deb_readme_pop_change.csv")
|
||||||
|
old_pop_readme <- merge(old_pop_readme, first_commit_df, by="upstream_vcs_link")
|
||||||
|
new_pop_data <- old_pop_readme |>
|
||||||
|
mutate(first_commit_dt = as.POSIXct(first_commit,
|
||||||
|
format = "%a %b %d %H:%M:%S %Y %z")) |>
|
||||||
|
mutate(age_in_days =
|
||||||
|
as.numeric(
|
||||||
|
difftime(
|
||||||
|
as.POSIXct("2024-06-24 00:00:00", format = "%Y-%m-%d %H:%M:%S"),
|
||||||
|
first_commit_dt,
|
||||||
|
units = "days"))) |>
|
||||||
|
mutate (event_gap =
|
||||||
|
as.numeric(
|
||||||
|
difftime(
|
||||||
|
as.POSIXct(event_date, format = "%Y-%m-%d %H:%M:%S"),
|
||||||
|
first_commit_dt,
|
||||||
|
units = "days")))
|
||||||
|
new_pop_data$age_of_project = NULL
|
||||||
|
head(new_pop_data)
|
||||||
|
write.csv(new_pop_data, file = "../final_data/deb_readme_pop_change_updated.csv", row.names = FALSE)
|
||||||
|
#CONTRIBUTING Document updates
|
||||||
|
|
||||||
|
####RDD CSV
|
||||||
|
old_rdd_contrib <- read_csv("../final_data/deb_contrib_did.csv")
|
||||||
|
####PopChange CSV
|
||||||
|
old_pop_contrib <- read_csv("../final_data/deb_contrib_pop_change.csv")
|
2280
final_data/deb_readme_did_updated.csv
Normal file
2280
final_data/deb_readme_did_updated.csv
Normal file
File diff suppressed because it is too large
Load Diff
2280
final_data/deb_readme_pop_change_updated.csv
Normal file
2280
final_data/deb_readme_pop_change_updated.csv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user