66 lines
2.1 KiB
Python
66 lines
2.1 KiB
Python
import json
|
|
import os
|
|
import csv
|
|
import pandas as pd
|
|
|
|
#directory = '/data/users/mgaughan/kkex_data_111023/'
|
|
'''
|
|
path_to_file = 'power_data_111023_mmt.csv'
|
|
|
|
csv_1 = pandas.read_csv(path_to_file)
|
|
print(csv_1)
|
|
csv_2 = pandas.read_csv('/data_ext/users/kcz1100/kaylea_dissertation/collabnetXS/raw_data/inst_all_packages_full_results.csv')
|
|
print(csv_2['pkg'])
|
|
|
|
up_fac_mean = []
|
|
pkg_name = []
|
|
for index, row in csv_1.iterrows():
|
|
temporary_proj_name = row['repo'][:-12].split('_')[1]
|
|
proj_name = ""
|
|
if '.git' in temporary_proj_name:
|
|
proj_name = temporary_proj_name[:-4]
|
|
else:
|
|
proj_name = temporary_proj_name
|
|
row = csv_2.loc[proj_name == csv_2['pkg']]
|
|
if proj_name == "hacking":
|
|
print("HACKING HACKING HACKING")
|
|
print(proj_name + row['pkg'])
|
|
print(row['up.fac.mean'])
|
|
up_fac_mean.append(row['up.fac.mean'])
|
|
pkg_name.append(proj_name)
|
|
#csv_1['up.fac.mean'] = up_fac_mean
|
|
csv_1['pkg'] = pkg_name
|
|
print(csv_1)
|
|
csv_1.to_csv('power_data_111023_mmt.csv')
|
|
|
|
|
|
with open('/data/users/mgaughan/power_data_111023.csv', "w") as csv_file:
|
|
csv_header = ['repo', 'age', 'contributors', 'collaborators', 'milestones']
|
|
writer = csv.writer(csv_file, delimiter=',')
|
|
writer.writerow(csv_header)
|
|
for filename in os.listdir(directory):
|
|
new_row = []
|
|
f = os.path.join(directory, filename)
|
|
if os.path.isfile(f):
|
|
print(f)
|
|
new_row.append(f.split("/")[-1])
|
|
open_f = open(f)
|
|
data = json.load(open_f)
|
|
new_row.append(data['perceval_obj']['age_of_project'])
|
|
new_row.append(data['perceval_obj']['contributors'])
|
|
new_row.append(data['perceval_obj']['collaborators'])
|
|
new_row.append(data['gha_obj']['milestone_count'])
|
|
writer.writerow(new_row)
|
|
'''
|
|
|
|
def concat_csv():
|
|
df1 = pd.read_csv('c_013124_octo_data.csv')
|
|
df2 = pd.read_csv('c_020524_octo_data.csv')
|
|
df3 = pd.read_csv('c_021824_octo_data.csv')
|
|
df4 = df1.merge(df2, how='outer')
|
|
df5 = df4.merge(df3, how='outer')
|
|
df5.to_csv('kk_final_octo.csv', index=False)
|
|
print(df5.shape[0])
|
|
|
|
if __name__ == "__main__":
|
|
concat_csv() |