updating with new olmo labels
This commit is contained in:
parent
cec9d82d41
commit
108b8aacd6
BIN
120725_logit_dsl.RDS
Normal file
BIN
120725_logit_dsl.RDS
Normal file
Binary file not shown.
392641
analysis_data/120725_unified.csv
Normal file
392641
analysis_data/120725_unified.csv
Normal file
File diff suppressed because one or more lines are too long
146015
analysis_data/all_120525_olmo_batched_categorized.csv
Normal file
146015
analysis_data/all_120525_olmo_batched_categorized.csv
Normal file
File diff suppressed because one or more lines are too long
@ -31,10 +31,21 @@ desc_info <- main_df %>%
|
||||
task_desc_dateClosed = as.POSIXct(date_closed, origin = "1970-01-01", tz = "UTC")
|
||||
)
|
||||
|
||||
old_csv <- "~/analysis_data/100625_constituent_dfs/071425_master_discussion_data.csv"
|
||||
old_df <- read.csv(old_csv, header = TRUE)
|
||||
old_task_status <- old_df |>
|
||||
filter(comment_type == "task_description") |>
|
||||
select(TaskPHID, status)
|
||||
|
||||
new_desc_info <- desc_info |>
|
||||
left_join(
|
||||
old_task_status,
|
||||
by= "TaskPHID"
|
||||
)
|
||||
#identifying comments in ADAC set
|
||||
main_df <- main_df |>
|
||||
mutate(created = as.POSIXct(date_created, origin = "1970-01-01", tz = "UTC")) |>
|
||||
left_join(desc_info, by = "TaskPHID") |>
|
||||
left_join(new_desc_info, by = "TaskPHID") |>
|
||||
mutate(
|
||||
ADAC = as.integer(
|
||||
!is.na(task_desc_author) &
|
||||
@ -73,7 +84,7 @@ first_join <- main_df|>
|
||||
by = "id"
|
||||
)
|
||||
|
||||
olmo_csv <- "~/analysis_data/102125_constituent_dfs/110525_olmo_batched_categorized.csv"
|
||||
olmo_csv <- "~/analysis_data/all_120525_olmo_batched_categorized.csv"
|
||||
olmo_df <- read.csv(olmo_csv, header = TRUE)
|
||||
|
||||
olmo_df <- olmo_df |>
|
||||
@ -135,6 +146,13 @@ pulling <- unified_df |>
|
||||
pulling <- unified_df |>
|
||||
filter(id == "23366" | id == "20846" | id == "20847")
|
||||
|
||||
write.csv(unified_df, "110925_unified.csv", row.names = FALSE)
|
||||
# [ x ] get the focal repo for gerrit code changes
|
||||
unified_df <- unified_df |>
|
||||
mutate(
|
||||
gerrit_repo = str_extract(selected_gerrit_results, "(?<='project': ')[^']+"),
|
||||
task_status = status.y
|
||||
)
|
||||
|
||||
write.csv(unified_df, "120725_unified.csv", row.names = FALSE)
|
||||
|
||||
|
||||
|
||||
Binary file not shown.
3236
dsl/120725_DSL_frame.csv
Normal file
3236
dsl/120725_DSL_frame.csv
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,7 @@
|
||||
library(tidyverse)
|
||||
library(dsl)
|
||||
|
||||
dsl_csv <-"~/dsl/111725_DSL_frame.csv"
|
||||
dsl_csv <-"~/dsl/120725_DSL_frame.csv"
|
||||
dsl_df <- read.csv(dsl_csv, header = TRUE)
|
||||
|
||||
dsl_df <- dsl_df |>
|
||||
@ -81,7 +81,7 @@ dev_model <- dsl(
|
||||
data=dsl_df
|
||||
)
|
||||
summary(dev_model)
|
||||
saveRDS(dev_model, "120225_logit_dsl.RDS")
|
||||
saveRDS(dev_model, "120725_logit_dsl.RDS")
|
||||
|
||||
library(broom)
|
||||
library(dplyr)
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
library(tidyverse)
|
||||
|
||||
unified_csv <-"~/analysis_data/110925_unified.csv"
|
||||
unified_csv <-"~/analysis_data/120725_unified.csv"
|
||||
unified_df <- read.csv(unified_csv, header = TRUE)
|
||||
|
||||
# 1. aggregate to the task level
|
||||
@ -220,7 +220,8 @@ task_level_variables <- unified_df |>
|
||||
descriptions <- unified_df |>
|
||||
filter(comment_type == "task_description")|>
|
||||
select(TaskPHID, task_title, date_created, date_closed, isAuthorWMF,
|
||||
source, phase, week_index, author_closer, resolution_outcome, priority )
|
||||
source, phase, week_index, author_closer, resolution_outcome, priority,
|
||||
gerrit_repo, task_status)
|
||||
|
||||
task_level_variables <- task_level_variables |>
|
||||
left_join(
|
||||
@ -290,4 +291,4 @@ ggplot(task_level_variables,
|
||||
theme_minimal()
|
||||
|
||||
# 4. save
|
||||
write.csv(task_level_variables, "111725_DSL_frame.csv", row.names = FALSE)
|
||||
write.csv(task_level_variables, "120725_DSL_frame.csv", row.names = FALSE)
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
library(tidyverse)
|
||||
#library(dsl)
|
||||
library(dplyr)
|
||||
dsl_csv <-"~/dsl/111725_DSL_frame.csv"
|
||||
dsl_csv <-"~/dsl/120725_DSL_frame.csv"
|
||||
dsl_df <- read.csv(dsl_csv, header = TRUE)
|
||||
|
||||
dsl_df <- dsl_df |>
|
||||
|
||||
Loading…
Reference in New Issue
Block a user