loading in csv data

2023-11-09 10:45:16 -06:00 · 2023-11-09 10:45:16 -06:00 · f1fbbfb27e
commit f1fbbfb27e
parent a108587eaa
2 changed files with 31 additions and 4 deletions
--- a/R/calculatePower.R
+++ b/R/calculatePower.R
@ -14,14 +14,17 @@
 rm(list=ls())
 set.seed(424242) 
 library(readr)
 # (1) - Get the pilot data and clean it
 #source('~/Research/tor_wikipedia_edits/handcoded_edits/inter_coder_reliability_ns0.R')
-source ('/data/users/mgaughan/kkex_data_110823_3')
+#source ('/data/users/mgaughan/kkex_data_110823_3')
-d$nd <- to_logical(d$not.damaging, custom_true=c("Y")) 
+data1 <- read_csv('/data/users/mgaughan/power_data_110923.csv',show_col_types = FALSE)
-levels(d$source) <- c("IP-based Editors", "New Editors", "Registered Editors", "Tor-based Editors")
+#d$nd <- to_logical(d$not.damaging, custom_true=c("Y")) 
 #levels(d$source) <- c("IP-based Editors", "New Editors", "Registered Editors", "Tor-based Editors")
 # (2) - Run the model on the pilot data
-pilotM <- glm(nd ~ source, family=binomial(link="logit"), data=d)
+pilotM <- glm(nd ~ source, family=binomial(link="logit"), data=data1)
 summary(pilotM) #we expect effect sizes on this order
 pilot.b0 <- coef(summary(pilotM))[1,1] 
--- a/consolidate_data.py
+++ b/consolidate_data.py
@ -0,0 +1,24 @@
 import json
 import os
 import csv
 directory = '/data/users/mgaughan/kkex_data_110823_3/'
 with open('/data/users/mgaughan/power_data_110923.csv', "w") as csv_file:
    csv_header = ['repo', 'age', 'contributors', 'collaborators', 'milestones']
    writer = csv.writer(csv_file, delimiter=',')
    writer.writerow(csv_header)
    for filename in os.listdir(directory):
        new_row = []
        f = os.path.join(directory, filename)
        if os.path.isfile(f):
            print(f)
            new_row.append(f.split("/")[-1])
            open_f = open(f)
            data = json.load(open_f)
            new_row.append(data['perceval_obj']['age_of_project'])
            new_row.append(data['perceval_obj']['contributors'])
            new_row.append(data['perceval_obj']['collaborators'])
            new_row.append(data['gha_obj']['milestone_count'])
            writer.writerow(new_row)