updating human sampling
This commit is contained in:
parent
a14b08cfd8
commit
b982973f37
File diff suppressed because one or more lines are too long
@ -3,11 +3,11 @@ library(tidyverse)
|
|||||||
main_csv <-"~/analysis_data/100325_unified_phab.csv"
|
main_csv <-"~/analysis_data/100325_unified_phab.csv"
|
||||||
main_df <- read.csv(main_csv, header = TRUE)
|
main_df <- read.csv(main_csv, header = TRUE)
|
||||||
|
|
||||||
set.seed(1871) # For reproducibility
|
set.seed(1893) # For reproducibility
|
||||||
|
|
||||||
sampled_df <- main_df %>%
|
sampled_df <- main_df %>%
|
||||||
group_by(source) %>%
|
group_by(source) %>%
|
||||||
mutate(sampled_TaskPHID = TaskPHID %in% sample(unique(TaskPHID), 35)) %>%
|
mutate(sampled_TaskPHID = TaskPHID %in% sample(unique(TaskPHID), 40)) %>%
|
||||||
ungroup() %>%
|
ungroup() %>%
|
||||||
filter(sampled_TaskPHID) %>%
|
filter(sampled_TaskPHID) %>%
|
||||||
select(-sampled_TaskPHID)
|
select(-sampled_TaskPHID)
|
||||||
@ -16,7 +16,7 @@ labeling_sampled_df <- sampled_df %>%
|
|||||||
group_by(source) %>%
|
group_by(source) %>%
|
||||||
mutate(
|
mutate(
|
||||||
verification_sample = if_else(
|
verification_sample = if_else(
|
||||||
TaskPHID %in% sample(unique(TaskPHID), min(7, length(unique(TaskPHID)))), 1L, 0L
|
TaskPHID %in% sample(unique(TaskPHID), min(8, length(unique(TaskPHID)))), 1L, 0L
|
||||||
)
|
)
|
||||||
) %>%
|
) %>%
|
||||||
ungroup()
|
ungroup()
|
||||||
@ -41,5 +41,6 @@ sentence_level_sample <- labeling_sampled_df |>
|
|||||||
|
|
||||||
table(sentence_level_sample$verification_sample)
|
table(sentence_level_sample$verification_sample)
|
||||||
(nrow(sentence_level_sample) / 293) * 1.5
|
(nrow(sentence_level_sample) / 293) * 1.5
|
||||||
|
length(unique(sentence_level_sample$TaskPHID))
|
||||||
|
|
||||||
write.csv(sentence_level_sample, "100625_human_info_sample.csv", row.names = FALSE)
|
write.csv(sentence_level_sample, "100625_human_info_sample.csv", row.names = FALSE)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user