updating with search for relevant commits
This commit is contained in:
parent
c086203934
commit
5f39c6f3cf
3025
121325_work/012926_possibly_relevant_sample.csv
Normal file
3025
121325_work/012926_possibly_relevant_sample.csv
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,7 @@
|
|||||||
library(tidyverse)
|
library(tidyverse)
|
||||||
library(dplyr)
|
library(dplyr)
|
||||||
library(lubridate)
|
library(lubridate)
|
||||||
|
library(stringr)
|
||||||
c1_event_date <- as.Date("2013-07-01")
|
c1_event_date <- as.Date("2013-07-01")
|
||||||
c2_event_date <- as.Date("2013-08-28")
|
c2_event_date <- as.Date("2013-08-28")
|
||||||
c3_event_date <- as.Date("2015-07-02")
|
c3_event_date <- as.Date("2015-07-02")
|
||||||
@ -30,6 +31,7 @@ active_names<- c("Timo Tijhof", "Krinkle", "Roan Kattouw", "Catrope",
|
|||||||
"Trevor Parscal", "Ed Sanders", "Moriel Schottlender", "Gabriel Wicke", "C. Scott Ananian")
|
"Trevor Parscal", "Ed Sanders", "Moriel Schottlender", "Gabriel Wicke", "C. Scott Ananian")
|
||||||
core_df <- core_df |>
|
core_df <- core_df |>
|
||||||
mutate(commit_date = ymd_hms(commit_date)) |>
|
mutate(commit_date = ymd_hms(commit_date)) |>
|
||||||
|
mutate(is_http = ifelse(contains_http_but_not_url(message), TRUE, FALSE))|>
|
||||||
mutate(isAuthorWMF = case_when(
|
mutate(isAuthorWMF = case_when(
|
||||||
author_name %in% active_names ~ "NAMES",
|
author_name %in% active_names ~ "NAMES",
|
||||||
grepl("@wikimedia\\.org", author_email, ignore.case = TRUE) ~ "TRUE",
|
grepl("@wikimedia\\.org", author_email, ignore.case = TRUE) ~ "TRUE",
|
||||||
@ -54,8 +56,28 @@ c1_relevant <- core_df |>
|
|||||||
mutate(week_index = relative_week(commit_date, c1_event_date)) |>
|
mutate(week_index = relative_week(commit_date, c1_event_date)) |>
|
||||||
filter(week_index >= -33 & week_index <= 13) |>
|
filter(week_index >= -33 & week_index <= 13) |>
|
||||||
filter(author_email != "jenkins-bot@gerrit.wikimedia.org") |>
|
filter(author_email != "jenkins-bot@gerrit.wikimedia.org") |>
|
||||||
filter(isVE == TRUE)
|
filter(isVE == TRUE)|>
|
||||||
|
mutate(case = "c1") |>
|
||||||
|
select(authored_date, commit_date, author_name, author_email, message)
|
||||||
|
|
||||||
|
c2_relevant <- core_df |>
|
||||||
|
mutate(week_index = relative_week(commit_date, c2_event_date)) |>
|
||||||
|
filter(week_index >= -104 & week_index <= 13) |>
|
||||||
|
filter(author_email != "jenkins-bot@gerrit.wikimedia.org") |>
|
||||||
|
filter(is_http) |>
|
||||||
|
mutate(case = "c1") |>
|
||||||
|
select(authored_date, commit_date, author_name, author_email, message)
|
||||||
|
|
||||||
|
c3_relevant <- core_df |>
|
||||||
|
mutate(week_index = relative_week(commit_date, c3_event_date)) |>
|
||||||
|
filter(week_index >= -83 & week_index <= 13) |>
|
||||||
|
filter(author_email != "jenkins-bot@gerrit.wikimedia.org") |>
|
||||||
|
filter(is_http) |>
|
||||||
|
mutate(case = "c1") |>
|
||||||
|
select(authored_date, commit_date, author_name, author_email, message)
|
||||||
|
|
||||||
|
possibly_relevant <- rbind(c1_relevant, c2_relevant, c3_relevant)
|
||||||
|
write.csv(possibly_relevant, "012926_possibly_relevant_sample.csv", row.names = FALSE)
|
||||||
|
|
||||||
total_share <- core_df |>
|
total_share <- core_df |>
|
||||||
filter(commit_date >= as.Date('2012-11-11') & commit_date <= as.Date("2015-10-02"))|>
|
filter(commit_date >= as.Date('2012-11-11') & commit_date <= as.Date("2015-10-02"))|>
|
||||||
@ -3,6 +3,8 @@ library(dplyr)
|
|||||||
library(stringr)
|
library(stringr)
|
||||||
main_csv <-"~/analysis_data/121625_unified.csv"
|
main_csv <-"~/analysis_data/121625_unified.csv"
|
||||||
main_df <- read.csv(main_csv, header = TRUE)
|
main_df <- read.csv(main_csv, header = TRUE)
|
||||||
|
dsl_csv <-"~/dsl/121625_DSL_frame.csv"
|
||||||
|
dsl_df <- read.csv(dsl_csv, header = TRUE)
|
||||||
|
|
||||||
#01-10-26 look for affil rosters
|
#01-10-26 look for affil rosters
|
||||||
affils_ <- main_df |>
|
affils_ <- main_df |>
|
||||||
@ -87,7 +89,7 @@ tasks_flagged <- main_df %>%
|
|||||||
filter(comment_type == "task_description") |>
|
filter(comment_type == "task_description") |>
|
||||||
left_join(first_task, by = c("source", "AuthorPHID")) %>%
|
left_join(first_task, by = c("source", "AuthorPHID")) %>%
|
||||||
mutate(is_first_time_author = week_index == first_task_week)
|
mutate(is_first_time_author = week_index == first_task_week)
|
||||||
|
# Results for RQ2
|
||||||
summary_df <- tasks_flagged %>%
|
summary_df <- tasks_flagged %>%
|
||||||
mutate(
|
mutate(
|
||||||
period = case_when(
|
period = case_when(
|
||||||
@ -96,7 +98,7 @@ summary_df <- tasks_flagged %>%
|
|||||||
TRUE ~ NA
|
TRUE ~ NA
|
||||||
)
|
)
|
||||||
) |>
|
) |>
|
||||||
group_by(period, source) %>%
|
group_by(period, source, isAuthorWMF) %>%
|
||||||
summarize(
|
summarize(
|
||||||
total_tasks = n(),
|
total_tasks = n(),
|
||||||
first_time_tasks = sum(is_first_time_author),
|
first_time_tasks = sum(is_first_time_author),
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user