24_deb_pkg_gov/consolidate_data.py

54 lines
1.8 KiB
Python

import json
import os
import csv
import pandas
#directory = '/data/users/mgaughan/kkex_data_111023/'
path_to_file = 'power_data_111023_mmt.csv'
csv_1 = pandas.read_csv(path_to_file)
print(csv_1)
csv_2 = pandas.read_csv('/data_ext/users/kcz1100/kaylea_dissertation/collabnetXS/raw_data/inst_all_packages_full_results.csv')
print(csv_2['pkg'])
up_fac_mean = []
pkg_name = []
for index, row in csv_1.iterrows():
temporary_proj_name = row['repo'][:-12].split('_')[1]
proj_name = ""
if '.git' in temporary_proj_name:
proj_name = temporary_proj_name[:-4]
else:
proj_name = temporary_proj_name
row = csv_2.loc[proj_name == csv_2['pkg']]
if proj_name == "hacking":
print("HACKING HACKING HACKING")
print(proj_name + row['pkg'])
print(row['up.fac.mean'])
up_fac_mean.append(row['up.fac.mean'])
pkg_name.append(proj_name)
#csv_1['up.fac.mean'] = up_fac_mean
csv_1['pkg'] = pkg_name
print(csv_1)
csv_1.to_csv('power_data_111023_mmt.csv')
'''
with open('/data/users/mgaughan/power_data_111023.csv', "w") as csv_file:
csv_header = ['repo', 'age', 'contributors', 'collaborators', 'milestones']
writer = csv.writer(csv_file, delimiter=',')
writer.writerow(csv_header)
for filename in os.listdir(directory):
new_row = []
f = os.path.join(directory, filename)
if os.path.isfile(f):
print(f)
new_row.append(f.split("/")[-1])
open_f = open(f)
data = json.load(open_f)
new_row.append(data['perceval_obj']['age_of_project'])
new_row.append(data['perceval_obj']['contributors'])
new_row.append(data['perceval_obj']['collaborators'])
new_row.append(data['gha_obj']['milestone_count'])
writer.writerow(new_row)
'''