diff --git a/R/.Rhistory b/R/.Rhistory index 9187d92..f484971 100644 --- a/R/.Rhistory +++ b/R/.Rhistory @@ -1,271 +1,3 @@ -qqnorm(octo_data$issue_mmt) -qqnorm(log(octo_data$issue_mmt)) -qqnorm(residuals(octo_data$issue_mmt)) -qqnorm(octo_data$issue_mmt) -qqnorm(log(octo_data$issue_mmt)) -qqnorm(octo_data$issue_mmt) -hist(log(octo_data$issue_mmt)) -hist(sqrt(octo_data$issue_mmt)) -#below are the models for the octo data, there should be analysis for each one -octo_mmtmodel1 <- lm(underproduction_mean ~ mmt + new.age.factor, data=octo_data) -summary(octo_mmtmodel1) -#below are the models for the octo data, there should be analysis for each one -octo_mmtmodel1 <- lm(underproduction_mean ~ mmt + new.age.factor, data=octo_data) -summary(octo_mmtmodel1) -# below this is the analysis for the octo data -octo_data$new.age <- as.numeric(cut(octo_data$age_of_project/365, breaks=c(0,7.524197,10.323056,13.649367,17), labels=c(1,2,3,4))) -table(octo_data$new.age) -octo_data$new.age.factor <- as.factor(octo_data$new.age) -hist(octo_data$new.age) -#below are the models for the octo data, there should be analysis for each one -octo_mmtmodel1 <- lm(underproduction_mean ~ mmt + new.age.factor, data=octo_data) -summary(octo_mmtmodel1) -hist(sqrt(octo_data$issue_mmt)) -hist(sqrt(octo_data$issue_mmt)) -hist(octo_data$issue_mmt) -#right skewed data, need to transform -library(rcompanion) -install.packages(rcompanion) -hist(sqrt(octo_data$issue_mmt)) -qqnorm(1/octo_data$issue_mmt) -hist(1/octo_data$issue_mmt) -hist(log(octo_data$issue_mmt)) -hist(sqrt(octo_data$issue_mmt)) -hist(log(octo_data$issue_mmt)) -octo_data$sqrt_issue_mmt <- sqrt(octo_data$issue_mmt) -sqrt_issue_mmtmodel1 <- lm(underproduction_mean ~ sqrt_issue_mmt + new.age.factor, data=octo_data) -summary(sqrt_issue_mmtmodel1) -summary(issue_mmtmodel1) -octo_data$wiki_mmt <- ((octo_data$wiki_contrib_count * 2) + (octo_data$total_contrib - octo_data$wiki_contrib_count)) / (octo_data$total_contrib) -hist(octo_data$wiki_mmt) -wiki_mmtmodel1 <- lm(underproduction_mean ~ wiki_mmt + new.age.factor, data=octo_data) -summary(wiki_mmtmodel1) -g3 <- ggplot(octo_data, aes(wiki_mmt)) + geom_histogram(binwidth = 5) -g3 -g3 <- ggplot(octo_data, aes(wiki_mmt)) + geom_histogram(binwidth = 0.05) -g3 -g3 <- ggplot(octo_data, aes(wiki_mmt)) + geom_histogram(binwidth = 0.05) + theme_bw() -g3 -g3 <- ggplot(octo_data, aes(wiki_mmt)) + geom_histogram(binwidth = 0.01) + theme_bw() -g3 -g2 <- ggplot(octo_data, aes(issue_mmt)) + geom_histogram(binwidth = 0.01) + theme_bw() -g2 -g1 <- ggplot(octo_data, aes(sqrt_issue_mmt)) + geom_histogram(binwidth = 0.01) + theme_bw() -g1 -g3 <- ggplot(octo_data, aes(wiki_mmt)) + geom_histogram(binwidth = 0.01) + theme_bw() -g3 -g2 <- ggplot(octo_data, aes(issue_mmt)) + geom_histogram(binwidth = 0.01) + theme_bw() -g2 -texreg(list(octo_mmtmodel1, issue_mmtmodel1, wiki_mmtmodel1), stars=NULL, digits=2, -custom.model.names=c( 'M1: augm. formality','M2: MMT', 'M3: milestones' ), -custom.coef.names=c('(Intercept)', 'Augmented formality', 'MMT', 'Age-2', 'Age-3', 'Age-4', 'Milestones'), -use.packages=FALSE, table=FALSE, ci.force = TRUE) -source('powerAnalysis.R') #my little "lib" -texreg(list(octo_mmtmodel1, issue_mmtmodel1, wiki_mmtmodel1), stars=NULL, digits=2, -custom.model.names=c( 'M1: augm. formality','M2: MMT', 'M3: milestones' ), -custom.coef.names=c('(Intercept)', 'Augmented formality', 'MMT', 'Age-2', 'Age-3', 'Age-4', 'Milestones'), -use.packages=FALSE, table=FALSE, ci.force = TRUE) -library(texreg) #my little "lib" -texreg(list(octo_mmtmodel1, issue_mmtmodel1, wiki_mmtmodel1), stars=NULL, digits=2, -custom.model.names=c( 'M1: augm. formality','M2: MMT', 'M3: milestones' ), -custom.coef.names=c('(Intercept)', 'Augmented formality', 'MMT', 'Age-2', 'Age-3', 'Age-4', 'Milestones'), -use.packages=FALSE, table=FALSE, ci.force = TRUE) -texreg(list(octo_mmtmodel1, issue_mmtmodel1, wiki_mmtmodel1), stars=NULL, digits=2, -custom.model.names=c( 'M1: MMT','M2: issue contrib.', 'M3: wiki_contrib.' ), -custom.coef.names=c('(Intercept)', 'MMT', 'Issues', 'Age-2', 'Age-3', 'Age-4', 'Wiki'), -use.packages=FALSE, table=FALSE, ci.force = TRUE) -glimpse(readme_df) -library(tidyverse) -#set wd, read in data -try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path))) -readme_df <- read_csv("../final_data/deb_readme_did.csv") -contributing_df <- read_csv("../final_data/deb_contrib_did.csv") -glimpse(readme_df) -head(readme_df) -colnames(readme_df) <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_cnt", "before_mrg_cnt", "after_all_cnt", "after_mrg_cnt", "before_auth_new", "after_commit_new", "after_auth_new", "before_commit_new") -glimpse(readme_df) -col_order <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_cnt", "after_all_cnt", "before_mrg_cnt", "after_mrg_cnt", "before_auth_new", "after_auth_new", "before_commit_new", "after_commit_new") -readme_df <- readme_df[,col_order] -glimpse(readme_df) -#TODO: turn character type into vector of numbers -str_split(test, ", ") -test <- "[0, 0, 0, 0]" -#TODO: turn character type into vector of numbers -str_split(test, ", ") -#TODO: turn character type into vector of numbers -str_split(gsub("[][]","", test), ", ") -readme_df %>% add_column(cnt_before_all = str_split(gsub("[][]","", before_all_count), ", ")) -readme_df %>% mutate(cnt_before_all = str_split(gsub("[][]","", before_all_count), ", ")) -readme_df %>% mutate("cnt_before_all" = str_split(gsub("[][]","", "before_all_count"), ", ")) -head(readme_df$before_all_cnt) -head(readme_df$cnt_before_all) -readme_df %>% mutate(cnt_before_all = str_split(gsub("[][]","", "before_all_count"), ", ")) -head(readme_df$cnt_before_all) -View(readme_df) -View(readme_df) -readme_df$cnt_before_all -readme_df %>% mutate(cnt_before_all = str_split(gsub("[][]","", "before_all_count"), ", ")) -str_split(gsub("[][]","", readme_df$before_all_count), ", ") -#str_split(gsub("[][]","", readme_df$before_all_cnt), ", ") -readme_df %>% mutate(cnt_before_all = str_split(gsub("[][]","", "before_all_cnt"), ", ")) -readme_df$cnt_before_all -#str_split(gsub("[][]","", readme_df$before_all_cnt), ", ") -readme_df %>% mutate("cnt_before_all" = str_split(gsub("[][]","", "before_all_cnt"), ", ")) -readme_df$cnt_before_all -str_split(gsub("[][]","", readme_df$before_all_cnt), ", ") -readme_df$cnt_before_all <- str_split(gsub("[][]","", readme_df$before_all_cnt), ", ") -readme_df$cnt_before_all -readme_df$cnt_after_all <- str_split(gsub("[][]","", readme_df$after_all_cnt), ", ") -readme_df$cnt_after_all -readme_df$cnt_before_mrg <- str_split(gsub("[][]","", readme_df$before_mrg_cnt), ", ") -readme_df$cnt_before_mrg -readme_df$cnt_after_mrg <- str_split(gsub("[][]","", readme_df$after_mrg_cnt), ", ") -readme_df$cnt_after_mrg -#TODO: figure out if one needs to expand the data into a different dataframe, and if so how -readme_df <- subset(readme_df, select = -c("before_all_cnt", "before_mrg_cnt", "after_all_cnt", "after_mrg_cnt")) -drop <- c("before_all_cnt", "before_mrg_cnt", "after_all_cnt", "after_mrg_cnt") -readme_df = readme_df[,!(names(readme_df) %in% drop)] -View(readme_df) -library(tidyverse) -#set wd, read in data -try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path))) -readme_df <- read_csv("../final_data/deb_readme_did.csv") -colnames(readme_df) <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_cnt", "before_mrg_cnt", "after_all_cnt", "after_mrg_cnt", "before_auth_new", "after_commit_new", "after_auth_new", "before_commit_new") -col_order <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_cnt", "after_all_cnt", "before_mrg_cnt", "after_mrg_cnt", "before_auth_new", "after_auth_new", "before_commit_new", "after_commit_new") -readme_df <- readme_df[,col_order] -glimpse(readme_df) -readme_df$cnt_before_all <- str_split(gsub("[][]","", readme_df$before_all_cnt), ", ") -readme_df$cnt_before_all <- as.numeric(readme_df$cnt_before_all) -View(readme_df) -readme_df$cnt_before_all -type(readme_df$cnt_before_all) -typeof(readme_df$cnt_before_all) -typeof(readme_df$cnt_before_all[0]) -readme_df$cnt_before_all <- unlist(str_split(gsub("[][]","", readme_df$before_all_cnt), ", ")) -readme_df$cnt_before_all <- str_split(gsub("[][]","", readme_df$before_all_cnt), ", ") -typeof(readme_df$cnt_before_all) -typeof(readme_df$cnt_before_all[[0]]) -typeof(readme_df$cnt_before_all[0]) -sapply(readme_df, class) -readme_df[,lapply(readme_df, unlist)] -readme_df[,lapply(readme_df$cnt, unlist)] -readme_df[,lapply(readme_df$cnt_before_all, unlist)] -typeof(readme_df$cnt_before_all[0]) -View(readme_df) -View(readme_df) -readme_df$cnt_before_all <- as.numeric(str_split(gsub("[][]","", readme_df$before_all_cnt), ", ")) -readme_df$cnt_before_all <- as.numeric(str_split(gsub("[][]","", readme_df$before_all_cnt), ", ")[[1]]) -readme_df$cnt_before_all <- str_split(gsub("[][]","", readme_df$before_all_cnt), ", ") -typeof(readme_df$cnt_before_all[0]) -typeof(readme_df$cnt_before_all[0][0]) -readme_df$cnt_before_all[0] -unlist(readme_df$cnt_before_all[0]) -readme_df$cnt_before_all[0] -readme_df$cnt_before_all -test <- readme_df$cnt_before_all -test -as.numeric(test) -test[0] -test[1] -as.numeric(test[1]) -unlist(test[1]) -as.numeric(unlist(test[1])) -test2 <- as.numeric(unlist(test)) -test2 -print(entry) -for (entry in test) { -print(entry) -} -print(as.numeric(unlist(entry))) -for (entry in test) { -print(as.numeric(unlist(entry))) -} -test_two <- append(test_two, as.numeric(unlist(entry))) -print(as.numeric(unlist(entry))) -for (entry in test) { -test_two <- append(test_two, as.numeric(unlist(entry))) -print(as.numeric(unlist(entry))) -} -test_two <- c() -for (entry in test) { -test_two <- append(test_two, as.numeric(unlist(entry))) -print(as.numeric(unlist(entry))) -} -readme_df$cnt_before_all <- as.numeric(readme_df$cnt_before_all) -test_two -readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) -iterator <- 0 -for (entry in test) { -readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) -print(as.numeric(unlist(entry))) -iterator <- iterator + 1 -} -View(readme_df) -library(tidyverse) -#set wd, read in data -try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path))) -readme_df <- read_csv("../final_data/deb_readme_did.csv") -colnames(readme_df) <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_cnt", "before_mrg_cnt", "after_all_cnt", "after_mrg_cnt", "before_auth_new", "after_commit_new", "after_auth_new", "before_commit_new") -col_order <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_cnt", "after_all_cnt", "before_mrg_cnt", "after_mrg_cnt", "before_auth_new", "after_auth_new", "before_commit_new", "after_commit_new") -readme_df <- readme_df[,col_order] -glimpse(readme_df) -head(readme_df) -#this has to happen on the analysis side of things for a given row, it cannot happen on the storage side -#this is a conversation of whether or not the data should be saved in terms of -readme_df$cnt_before_all <- str_split(gsub("[][]","", readme_df$before_all_cnt), ", ") -# test <- readme_df$cnt_before_all -# as.numeric(unlist(test[1])) -# test_two <- c() -# iterator <- 0 -# for (entry in test) { -# readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) -# print(as.numeric(unlist(entry))) -# iterator <- iterator + 1 -# } -# test_two -readme_df$cnt_after_all <- str_split(gsub("[][]","", readme_df$after_all_cnt), ", ") -readme_df$cnt_before_mrg <- str_split(gsub("[][]","", readme_df$before_mrg_cnt), ", ") -readme_df$cnt_after_mrg <- str_split(gsub("[][]","", readme_df$after_mrg_cnt), ", ") -drop <- c("before_all_cnt", "before_mrg_cnt", "after_all_cnt", "after_mrg_cnt") -readme_df = readme_df[,!(names(readme_df) %in% drop)] -#Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step -new_test <- head(readme_df, 1) -View(new_test) -write.csv(readme_df, "r_readme_did.csv", row.names=FALSE) -# as.numeric(unlist(test[1])) -# test_two <- c() -# iterator <- 0 -# for (entry in test) { -# readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) -# print(as.numeric(unlist(entry))) -# iterator <- iterator + 1 -# } -# test_two -#Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step -new_test <- head(readme_df, 1) -View(new_test) -longer <- new_test |> -pivot_longer(cols = starts_with("cnt"), -names_to = "window", -values_to = "count")) -longer <- new_test |> -pivot_longer(cols = starts_with("cnt"), -names_to = "window", -values_to = "count") -longer -View(longer) -longer |> unnest(count) -new_longer <- longer |> unnest(count) -View(new_longer) -longer -View(new_longer) -longer <- new_test |> -pivot_longer(cols = starts_with("cnt"), -names_to = "window", -values_to = "count") |> -unnest(as.numeric(unlist(count))) -longer -longer <- new_test |> pivot_longer(cols = starts_with("cnt"), names_to = "window", values_to = "count") |> @@ -510,3 +242,271 @@ longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, le View(longer) head(longer) sapply(longer, class) +library(plyr) +library(tidyverse) +#set wd, read in data +try(setwd(dirname(rstudioapi::getActiveDocumentContext()$path))) +readme_df <- read_csv("../final_data/deb_readme_did.csv") +contributing_df <- read_csv("../final_data/deb_contrib_did.csv") +#preprocessing for readme_df +colnames(readme_df) <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct", "before_auth_new", "after_commit_new", "after_auth_new", "before_commit_new") +col_order <- c("upstream_vcs_link", "event_date", "event_hash", "before_all_ct", "after_all_ct", "before_mrg_ct", "after_mrg_ct", "before_auth_new", "after_auth_new", "before_commit_new", "after_commit_new") +readme_df <- readme_df[,col_order] +readme_df$ct_before_all <- str_split(gsub("[][]","", readme_df$before_all_ct), ", ") +readme_df$ct_after_all <- str_split(gsub("[][]","", readme_df$after_all_ct), ", ") +readme_df$ct_before_mrg <- str_split(gsub("[][]","", readme_df$before_mrg_ct), ", ") +readme_df$ct_after_mrg <- str_split(gsub("[][]","", readme_df$after_mrg_ct), ", ") +drop <- c("before_all_ct", "before_mrg_ct", "after_all_ct", "after_mrg_ct") +readme_df = readme_df[,!(names(readme_df) %in% drop)] +# as.numeric(unlist(test[1])) +# test_two <- c() +# iterator <- 0 +# for (entry in test) { +# readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) +# print(as.numeric(unlist(entry))) +# iterator <- iterator + 1 +# } +# test_two +#Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step +new_test <- head(readme_df, 1) +longer <- new_test |> +pivot_longer(cols = starts_with("ct"), +names_to = "window", +values_to = "count") |> +unnest(count) +longer$observation_type <- gsub("^.*_", "", longer$window) +longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, length.out=length(observation_type))) +View(longer) +#testing out analysis below +longer[which(longer$observation_type == all)] |> +ggplot(aes(x = week, y = count)) + +geom_point() + +geom_vline(xintercept = 26) +#testing out analysis below +longer[which(longer$observation_type == "all")] |> +ggplot(aes(x = week, y = count)) + +geom_point() + +geom_vline(xintercept = 26) +#testing out analysis below +longer[which(longer$observation_type == "all"),] |> +ggplot(aes(x = week, y = count)) + +geom_point() + +geom_vline(xintercept = 26) +View(readme_df) +# as.numeric(unlist(test[1])) +# test_two <- c() +# iterator <- 0 +# for (entry in test) { +# readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) +# print(as.numeric(unlist(entry))) +# iterator <- iterator + 1 +# } +# test_two +#Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step +new_test <- readme_df[5,] +longer <- new_test |> +pivot_longer(cols = starts_with("ct"), +names_to = "window", +values_to = "count") |> +unnest(count) +longer$observation_type <- gsub("^.*_", "", longer$window) +longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, length.out=length(observation_type))) +#testing out analysis below +longer[which(longer$observation_type == "all"),] |> +ggplot(aes(x = week, y = count)) + +geom_point() + +geom_vline(xintercept = 26) +View(readme_df) +# as.numeric(unlist(test[1])) +# test_two <- c() +# iterator <- 0 +# for (entry in test) { +# readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) +# print(as.numeric(unlist(entry))) +# iterator <- iterator + 1 +# } +# test_two +#Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step +new_test <- readme_df[76,] +longer <- new_test |> +pivot_longer(cols = starts_with("ct"), +names_to = "window", +values_to = "count") |> +unnest(count) +longer$observation_type <- gsub("^.*_", "", longer$window) +longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, length.out=length(observation_type))) +#testing out analysis below +longer[which(longer$observation_type == "all"),] |> +ggplot(aes(x = week, y = count)) + +geom_point() + +geom_vline(xintercept = 26) +# as.numeric(unlist(test[1])) +# test_two <- c() +# iterator <- 0 +# for (entry in test) { +# readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) +# print(as.numeric(unlist(entry))) +# iterator <- iterator + 1 +# } +# test_two +#Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step +new_test <- readme_df[77,] +longer <- new_test |> +pivot_longer(cols = starts_with("ct"), +names_to = "window", +values_to = "count") |> +unnest(count) +longer$observation_type <- gsub("^.*_", "", longer$window) +longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, length.out=length(observation_type))) +#testing out analysis below +longer[which(longer$observation_type == "all"),] |> +ggplot(aes(x = week, y = count)) + +geom_point() + +geom_vline(xintercept = 26) +# as.numeric(unlist(test[1])) +# test_two <- c() +# iterator <- 0 +# for (entry in test) { +# readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) +# print(as.numeric(unlist(entry))) +# iterator <- iterator + 1 +# } +# test_two +#Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step +new_test <- readme_df[143,] +longer <- new_test |> +pivot_longer(cols = starts_with("ct"), +names_to = "window", +values_to = "count") |> +unnest(count) +longer$observation_type <- gsub("^.*_", "", longer$window) +longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, length.out=length(observation_type))) +#testing out analysis below +longer[which(longer$observation_type == "all"),] |> +ggplot(aes(x = week, y = count)) + +geom_point() + +geom_vline(xintercept = 26) +# as.numeric(unlist(test[1])) +# test_two <- c() +# iterator <- 0 +# for (entry in test) { +# readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) +# print(as.numeric(unlist(entry))) +# iterator <- iterator + 1 +# } +# test_two +#Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step +new_test <- readme_df[185,] +longer <- new_test |> +pivot_longer(cols = starts_with("ct"), +names_to = "window", +values_to = "count") |> +unnest(count) +longer$observation_type <- gsub("^.*_", "", longer$window) +longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, length.out=length(observation_type))) +#testing out analysis below +longer[which(longer$observation_type == "all"),] |> +ggplot(aes(x = week, y = count)) + +geom_point() + +geom_vline(xintercept = 26) +# as.numeric(unlist(test[1])) +# test_two <- c() +# iterator <- 0 +# for (entry in test) { +# readme_df$cnt_before_all[iterator] <- as.numeric(unlist(entry)) +# print(as.numeric(unlist(entry))) +# iterator <- iterator + 1 +# } +# test_two +#Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step +new_test <- readme_df[231,] +longer <- new_test |> +pivot_longer(cols = starts_with("ct"), +names_to = "window", +values_to = "count") |> +unnest(count) +longer$observation_type <- gsub("^.*_", "", longer$window) +longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, length.out=length(observation_type))) +#testing out analysis below +longer[which(longer$observation_type == "all"),] |> +ggplot(aes(x = week, y = count)) + +geom_point() + +geom_vline(xintercept = 26) +longer[which(longer$observation_type == "all"),] |> +mutate(D = ifelse(week >= 26, 1, 0)) |> +lm(count ~ D + I(week - 26)) |> +summary() +longer[which(longer$observation_type == "all"),] |> +mutate(D = ifelse(week >= 26, 1, 0)) |> +lm(count ~ D * I(week - 26)) |> +summary() +longer[which(longer$observation_type == "all"),] |> +mutate(D = ifelse(week >= 26, 1, 0)) |> +lm(formula = count ~ D * I(week - 26)) |> +summary() +longer[which(longer$observation_type == "all"),] |> +select(count, week) |> +mutate(D = ifelse(week >= 26, 1, 0)) |> +ggplot(aes(x = week, y = count, color = D)) + +geom_point() + +geom_smooth(method = "lm") +longer[which(longer$observation_type == "all"),] |> +select(count, week) |> +mutate(D = as.factor(ifelse(week >= 26, 1, 0))) |> +ggplot(aes(x = week, y = count, color = D)) + +geom_point() + +geom_smooth(method = "lm") +longer[which(longer$observation_type == "all"),] |> +select(count, week) |> +mutate(D = as.factor(ifelse(week >= 26, 1, 0))) |> +ggplot(aes(x = week, y = count, color = D)) + +geom_point() + +geom_smooth() +longer[which(longer$observation_type == "all"),] |> +select(count, week) |> +mutate(D = as.factor(ifelse(week >= 26, 1, 0))) |> +ggplot(aes(x = week, y = count, color = D)) + +geom_point() + +geom_smooth() +longer[which(longer$observation_type == "all"),] |> +select(count, week) |> +mutate(D = as.factor(ifelse(week >= 26, 1, 0))) |> +ggplot(aes(x = week, y = count, color = D)) + +geom_point() + +geom_smooth(aes(x = week, y = count, color = D)) +longer[which(longer$observation_type == "all"),] |> +select(count, week) |> +mutate(D = as.factor(ifelse(week >= 26, 1, 0))) |> +ggplot(aes(x = week, y = count, color = D)) + +geom_point() + +geom_smooth() +sapply(longer, class) +longer$count <- as.numeric(longer$count) +sapply(longer, class) +#testing out analysis below +longer[which(longer$observation_type == "all"),] |> +ggplot(aes(x = week, y = count)) + +geom_point() + +geom_vline(xintercept = 26) +longer[which(longer$observation_type == "all"),] |> +mutate(D = ifelse(week >= 26, 1, 0)) |> +lm(formula = count ~ D * I(week - 26)) |> +summary() +longer[which(longer$observation_type == "all"),] |> +select(count, week) |> +mutate(D = as.factor(ifelse(week >= 26, 1, 0))) |> +ggplot(aes(x = week, y = count, color = D)) + +geom_point() + +geom_smooth() +longer[which(longer$observation_type == "all"),] |> +select(count, week) |> +mutate(D = as.factor(ifelse(week >= 26, 1, 0))) |> +ggplot(aes(x = week, y = count, color = D)) + +geom_point() + +geom_smooth(se = False) +longer[which(longer$observation_type == "all"),] |> +select(count, week) |> +mutate(D = as.factor(ifelse(week >= 26, 1, 0))) |> +ggplot(aes(x = week, y = count, color = D)) + +geom_point() + +geom_smooth(se = FALSE) diff --git a/R/didAnalysis.R b/R/didAnalysis.R new file mode 100644 index 0000000..e69de29 diff --git a/R/didCleaning.R b/R/didCleaning.R index 2722467..2c15fc2 100644 --- a/R/didCleaning.R +++ b/R/didCleaning.R @@ -32,7 +32,7 @@ readme_df = readme_df[,!(names(readme_df) %in% drop)] # } # test_two #Yes, need to expand the dataframe, but again, for the sake of clarity, do not want to until analysis step -new_test <- head(readme_df, 1) +new_test <- readme_df[231,] longer <- new_test |> pivot_longer(cols = starts_with("ct"), names_to = "window", @@ -40,3 +40,24 @@ longer <- new_test |> unnest(count) longer$observation_type <- gsub("^.*_", "", longer$window) longer <- ddply(longer, "observation_type", transform, week=seq(from=0, by=1, length.out=length(observation_type))) +longer$count <- as.numeric(longer$count) + +#sapply(longer, class) + +#testing out analysis below +longer[which(longer$observation_type == "all"),] |> + ggplot(aes(x = week, y = count)) + + geom_point() + + geom_vline(xintercept = 26) + +longer[which(longer$observation_type == "all"),] |> + mutate(D = ifelse(week >= 26, 1, 0)) |> + lm(formula = count ~ D * I(week - 26)) |> + summary() + +longer[which(longer$observation_type == "all"),] |> + select(count, week) |> + mutate(D = as.factor(ifelse(week >= 26, 1, 0))) |> + ggplot(aes(x = week, y = count, color = D)) + + geom_point() + + geom_smooth(se = FALSE)