diff --git a/R/calculatePower.R b/R/calculatePower.R index 2fc06fd..4e1668f 100644 --- a/R/calculatePower.R +++ b/R/calculatePower.R @@ -14,14 +14,17 @@ rm(list=ls()) set.seed(424242) +library(readr) + # (1) - Get the pilot data and clean it #source('~/Research/tor_wikipedia_edits/handcoded_edits/inter_coder_reliability_ns0.R') -source ('/data/users/mgaughan/kkex_data_110823_3') -d$nd <- to_logical(d$not.damaging, custom_true=c("Y")) -levels(d$source) <- c("IP-based Editors", "New Editors", "Registered Editors", "Tor-based Editors") +#source ('/data/users/mgaughan/kkex_data_110823_3') +data1 <- read_csv('/data/users/mgaughan/power_data_110923.csv',show_col_types = FALSE) +#d$nd <- to_logical(d$not.damaging, custom_true=c("Y")) +#levels(d$source) <- c("IP-based Editors", "New Editors", "Registered Editors", "Tor-based Editors") # (2) - Run the model on the pilot data -pilotM <- glm(nd ~ source, family=binomial(link="logit"), data=d) +pilotM <- glm(nd ~ source, family=binomial(link="logit"), data=data1) summary(pilotM) #we expect effect sizes on this order pilot.b0 <- coef(summary(pilotM))[1,1] diff --git a/consolidate_data.py b/consolidate_data.py new file mode 100644 index 0000000..5648d4f --- /dev/null +++ b/consolidate_data.py @@ -0,0 +1,24 @@ +import json +import os +import csv + +directory = '/data/users/mgaughan/kkex_data_110823_3/' + + +with open('/data/users/mgaughan/power_data_110923.csv', "w") as csv_file: + csv_header = ['repo', 'age', 'contributors', 'collaborators', 'milestones'] + writer = csv.writer(csv_file, delimiter=',') + writer.writerow(csv_header) + for filename in os.listdir(directory): + new_row = [] + f = os.path.join(directory, filename) + if os.path.isfile(f): + print(f) + new_row.append(f.split("/")[-1]) + open_f = open(f) + data = json.load(open_f) + new_row.append(data['perceval_obj']['age_of_project']) + new_row.append(data['perceval_obj']['contributors']) + new_row.append(data['perceval_obj']['collaborators']) + new_row.append(data['gha_obj']['milestone_count']) + writer.writerow(new_row) \ No newline at end of file