1
0
mw-lifecycle-analysis/p2/df_consolidation.R
2025-09-14 09:11:33 -07:00

27 lines
775 B
R

library(tidyverse)
library(jsonlite)
main_csv <-"~/p2/071425_master_discussion_data.csv"
main_df <- read.csv(main_csv, header = TRUE)
gerrit_csv <-"~/p2/080425_gerrit_filled_df.csv"
gerrit_df <- read.csv(gerrit_csv, header = TRUE)
library(dplyr)
gerrit_extracted_df <- gerrit_df |>
mutate(
jsonfixed = gsub("(?<=\\{|\\[|, )'(\\w+?)'(?=\\s*:)", '"\\1"', selected_gerrit_results, perl = TRUE),
jsonfixed = gsub(":(\\s*)'(.*?)'(?=[}\\],])", ':\\1"\\2"', jsonfixed, perl = TRUE),
jsonfixed = gsub("(?<=[:\\[,\\{])\\s*'([^']*)'\\s*(?=[,\\}\\]])", '"\\1"', jsonfixed, perl = TRUE),
expandedjsonlist = lapply(jsonfixed, fromJSON)
)
# TODO select
# gerrit status: categorical
# owner_wikimedia: BOOL
# LOCa: #
# LOCd: #
# count_reviewers: #
# url: url