loading in csv data

This commit is contained in:
Matthew Gaughan 2023-11-09 10:45:16 -06:00
parent a108587eaa
commit f1fbbfb27e
2 changed files with 31 additions and 4 deletions

View File

@ -14,14 +14,17 @@
rm(list=ls()) rm(list=ls())
set.seed(424242) set.seed(424242)
library(readr)
# (1) - Get the pilot data and clean it # (1) - Get the pilot data and clean it
#source('~/Research/tor_wikipedia_edits/handcoded_edits/inter_coder_reliability_ns0.R') #source('~/Research/tor_wikipedia_edits/handcoded_edits/inter_coder_reliability_ns0.R')
source ('/data/users/mgaughan/kkex_data_110823_3') #source ('/data/users/mgaughan/kkex_data_110823_3')
d$nd <- to_logical(d$not.damaging, custom_true=c("Y")) data1 <- read_csv('/data/users/mgaughan/power_data_110923.csv',show_col_types = FALSE)
levels(d$source) <- c("IP-based Editors", "New Editors", "Registered Editors", "Tor-based Editors") #d$nd <- to_logical(d$not.damaging, custom_true=c("Y"))
#levels(d$source) <- c("IP-based Editors", "New Editors", "Registered Editors", "Tor-based Editors")
# (2) - Run the model on the pilot data # (2) - Run the model on the pilot data
pilotM <- glm(nd ~ source, family=binomial(link="logit"), data=d) pilotM <- glm(nd ~ source, family=binomial(link="logit"), data=data1)
summary(pilotM) #we expect effect sizes on this order summary(pilotM) #we expect effect sizes on this order
pilot.b0 <- coef(summary(pilotM))[1,1] pilot.b0 <- coef(summary(pilotM))[1,1]

24
consolidate_data.py Normal file
View File

@ -0,0 +1,24 @@
import json
import os
import csv
directory = '/data/users/mgaughan/kkex_data_110823_3/'
with open('/data/users/mgaughan/power_data_110923.csv', "w") as csv_file:
csv_header = ['repo', 'age', 'contributors', 'collaborators', 'milestones']
writer = csv.writer(csv_file, delimiter=',')
writer.writerow(csv_header)
for filename in os.listdir(directory):
new_row = []
f = os.path.join(directory, filename)
if os.path.isfile(f):
print(f)
new_row.append(f.split("/")[-1])
open_f = open(f)
data = json.load(open_f)
new_row.append(data['perceval_obj']['age_of_project'])
new_row.append(data['perceval_obj']['contributors'])
new_row.append(data['perceval_obj']['collaborators'])
new_row.append(data['gha_obj']['milestone_count'])
writer.writerow(new_row)