1
0
mw-lifecycle-analysis/p2/df_consolidation.R
2025-09-16 11:37:46 -07:00

29 lines
1.0 KiB
R

library(tidyverse)
library(jsonlite)
main_csv <-"~/p2/071425_master_discussion_data.csv"
main_df <- read.csv(main_csv, header = TRUE)
gerrit_csv <-"~/p2/080425_gerrit_filled_df.csv"
gerrit_df <- read.csv(gerrit_csv, header = TRUE)
library(dplyr)
gerrit_extracted_df <- gerrit_df |>
mutate(
gerrit_status = str_match(selected_gerrit_results, "'status':\\s*'([^']*)',\\s*'reviewer")[,2],
owner_email = str_match(selected_gerrit_results, "'owner_email':\\s*'([^']*)'")[,2],
written_url_in_message = str_match(selected_gerrit_results, "'written_url_in_message':\\s*'([^']*)'")[,2],
code_insertions = as.integer(str_match(selected_gerrit_results, "'code_insertions':\\s*(\\d+)")[,2]),
code_deletions = as.integer(str_match(selected_gerrit_results, "'code_deletions':\\s*(\\d+)")[,2]),
reviewer_count = as.integer(str_match(selected_gerrit_results, "'reviewer_count':\\s*(\\d+)")[,2])
)
# TODO select
# gerrit status: categorical
# owner_wikimedia: BOOL
# LOCa: #
# LOCd: #
# count_reviewers: #
# url: url