24_deb_pkg_gov/consolidate_data.py

66 lines
2.1 KiB
Python
Raw Normal View History

2023-11-09 16:45:16 +00:00
import json
import os
import csv
2024-03-07 00:25:27 +00:00
import pandas as pd
2023-11-09 16:45:16 +00:00
2023-11-10 21:45:09 +00:00
#directory = '/data/users/mgaughan/kkex_data_111023/'
2024-03-07 00:25:27 +00:00
'''
2023-11-10 21:45:09 +00:00
path_to_file = 'power_data_111023_mmt.csv'
2023-11-09 16:45:16 +00:00
csv_1 = pandas.read_csv(path_to_file)
print(csv_1)
csv_2 = pandas.read_csv('/data_ext/users/kcz1100/kaylea_dissertation/collabnetXS/raw_data/inst_all_packages_full_results.csv')
print(csv_2['pkg'])
up_fac_mean = []
2023-11-10 17:31:43 +00:00
pkg_name = []
for index, row in csv_1.iterrows():
temporary_proj_name = row['repo'][:-12].split('_')[1]
proj_name = ""
if '.git' in temporary_proj_name:
proj_name = temporary_proj_name[:-4]
else:
proj_name = temporary_proj_name
2023-11-10 17:31:43 +00:00
row = csv_2.loc[proj_name == csv_2['pkg']]
if proj_name == "hacking":
print("HACKING HACKING HACKING")
print(proj_name + row['pkg'])
print(row['up.fac.mean'])
up_fac_mean.append(row['up.fac.mean'])
2023-11-10 17:31:43 +00:00
pkg_name.append(proj_name)
#csv_1['up.fac.mean'] = up_fac_mean
csv_1['pkg'] = pkg_name
print(csv_1)
2023-11-10 21:45:09 +00:00
csv_1.to_csv('power_data_111023_mmt.csv')
2024-03-07 00:25:27 +00:00
2023-11-10 21:38:49 +00:00
with open('/data/users/mgaughan/power_data_111023.csv', "w") as csv_file:
2023-11-09 16:45:16 +00:00
csv_header = ['repo', 'age', 'contributors', 'collaborators', 'milestones']
writer = csv.writer(csv_file, delimiter=',')
writer.writerow(csv_header)
for filename in os.listdir(directory):
new_row = []
f = os.path.join(directory, filename)
if os.path.isfile(f):
print(f)
new_row.append(f.split("/")[-1])
open_f = open(f)
data = json.load(open_f)
new_row.append(data['perceval_obj']['age_of_project'])
new_row.append(data['perceval_obj']['contributors'])
new_row.append(data['perceval_obj']['collaborators'])
new_row.append(data['gha_obj']['milestone_count'])
writer.writerow(new_row)
2024-03-07 00:25:27 +00:00
'''
def concat_csv():
df1 = pd.read_csv('c_013124_octo_data.csv')
df2 = pd.read_csv('c_020524_octo_data.csv')
df3 = pd.read_csv('c_021824_octo_data.csv')
df4 = df1.merge(df2, how='outer')
df5 = df4.merge(df3, how='outer')
df5.to_csv('kk_final_octo.csv', index=False)
print(df5.shape[0])
if __name__ == "__main__":
concat_csv()