81 lines
3.7 KiB
R
81 lines
3.7 KiB
R
|
contributing_count_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/CONTRIBUTING_weekly_count_data.csv"
|
||
|
contributing_count_df = read.csv(contributing_count_filepath, header = TRUE)
|
||
|
|
||
|
window_num <- 5
|
||
|
contributing_count_df <- contributing_count_df |>
|
||
|
filter(week_index >= (- window_num) & week_index <= (window_num)) |>
|
||
|
mutate(scaled_age = scale(age)) |>
|
||
|
mutate(scaled_age_at_commit = scale(age_at_commit))|>
|
||
|
mutate(log1p_count = log1p(commit_count))
|
||
|
|
||
|
quantile(contributing_count_df$age / 365, probs = c(0.25, 0.5, 0.75))
|
||
|
quantile(contributing_count_df$age_at_commit / 365, probs = c(0.25, 0.5, 0.75))
|
||
|
aggregate_cccd <- contributing_count_df |>
|
||
|
group_by(project_id) |>
|
||
|
summarize(
|
||
|
avg_weekly_commits = mean(commit_count),
|
||
|
authors_before = sum(new_author_emails[before_after == 0]),
|
||
|
authors_after = sum(new_author_emails[before_after == 1]),
|
||
|
)
|
||
|
quantile(aggregate_cccd$avg_weekly_commits, probs = c(0.25, 0.5, 0.75))
|
||
|
quantile(aggregate_cccd$authors_before, probs = c(0.25, 0.5, 0.75))
|
||
|
quantile(aggregate_cccd$authors_after, probs = c(0.25, 0.5, 0.75))
|
||
|
|
||
|
readme_count_filepath <- "/mmfs1/gscratch/comdata/users/mjilg/govdoc-cr-data/final_data/README_weekly_count_data.csv"
|
||
|
readme_count_df = read.csv(readme_count_filepath, header = TRUE)
|
||
|
|
||
|
window_num <- 5
|
||
|
readme_count_df <- readme_count_df |>
|
||
|
filter(week_index >= (- window_num) & week_index <= (window_num)) |>
|
||
|
mutate(scaled_age = scale(age)) |>
|
||
|
mutate(scaled_age_at_commit = scale(age_at_commit))|>
|
||
|
mutate(log1p_count = log1p(commit_count))
|
||
|
|
||
|
quantile(readme_count_df$age / 365, probs = c(0.25, 0.5, 0.75))
|
||
|
quantile(readme_count_df$age_at_commit / 365, probs = c(0.25, 0.5, 0.75))
|
||
|
aggregate_rccd <- readme_count_df |>
|
||
|
group_by(project_id) |>
|
||
|
summarize(
|
||
|
avg_weekly_commits = mean(commit_count),
|
||
|
authors_before = sum(new_author_emails[before_after == 0]),
|
||
|
authors_after = sum(new_author_emails[before_after == 1]),
|
||
|
)
|
||
|
quantile(aggregate_rccd$avg_weekly_commits, probs = c(0.25, 0.5, 0.75))
|
||
|
quantile(aggregate_rccd$authors_before, probs = c(0.25, 0.5, 0.75))
|
||
|
quantile(aggregate_rccd$authors_after, probs = c(0.25, 0.5, 0.75))
|
||
|
|
||
|
readme_readability <- read.csv("text_analysis/020325_README_readability.csv", header=TRUE)
|
||
|
median(readme_readability$mcalpine_eflaw)
|
||
|
median(readme_readability$linsear_write_formula)
|
||
|
median(readme_readability$flesch_reading_ease)
|
||
|
|
||
|
contributing_readability <- read.csv("text_analysis/020125_CONTRIBUTING_readability.csv", header=TRUE)
|
||
|
median(contributing_readability$mcalpine_eflaw)
|
||
|
median(contributing_readability$linsear_write_formula)
|
||
|
median(contributing_readability$flesch_reading_ease)
|
||
|
|
||
|
|
||
|
contributing_topic <- read.csv("text_analysis/020125_CONTRIBUTING_file_topic_distributions.csv", header=TRUE)
|
||
|
quantile(contributing_topic$t0, probs = c(0.25, 0.5, 0.75))
|
||
|
quantile(contributing_topic$t1, probs = c(0.25, 0.5, 0.75))
|
||
|
quantile(contributing_topic$t2, probs = c(0.25, 0.5, 0.75))
|
||
|
quantile(contributing_topic$t3, probs = c(0.25, 0.5, 0.75))
|
||
|
quantile(contributing_topic$t4, probs = c(0.25, 0.5, 0.75))
|
||
|
|
||
|
readme_topic <- read.csv("text_analysis/020325_README_file_topic_distributions.csv", header=TRUE)
|
||
|
quantile(readme_topic$t0, probs = c(0.25, 0.5, 0.75))
|
||
|
quantile(readme_topic$t1, probs = c(0.25, 0.5, 0.75))
|
||
|
quantile(readme_topic$t2, probs = c(0.25, 0.5, 0.75))
|
||
|
quantile(readme_topic$t3, probs = c(0.25, 0.5, 0.75))
|
||
|
quantile(readme_topic$t4, probs = c(0.25, 0.5, 0.75))
|
||
|
quantile(readme_topic$t5, probs = c(0.25, 0.5, 0.75))
|
||
|
quantile(readme_topic$t6, probs = c(0.25, 0.5, 0.75))
|
||
|
quantile(readme_topic$t7, probs = c(0.25, 0.5, 0.75))
|
||
|
quantile(readme_topic$t8, probs = c(0.25, 0.5, 0.75))
|
||
|
quantile(readme_topic$t9, probs = c(0.25, 0.5, 0.75))
|
||
|
quantile(readme_topic$t10, probs = c(0.25, 0.5, 0.75))
|
||
|
|
||
|
|
||
|
|
||
|
|