import pexpect import os import json import sys import subprocess import csv import ast import time #sys.path.append('../octohatrack') #import octohatrack.__main__ as oh csv_path = "c_021824_octo_data.csv" csv_013024_path = "c_021824_te_rows.csv" def main(): wd = os.getcwd() with open(csv_path, 'w', newline='') as output_file: keys = ["project_name","underproduction_mean","underproduction_low","underproduction_high","debian_vcs_link","upstream_vcs_link","age_of_project","contributors","collaborators","milestone_count", "api_contrib_count", "issue_contrib_count", "file_contrib_count", "wiki_contrib_count", ] dict_writer = csv.DictWriter(output_file, keys) dict_writer.writeheader() with open(csv_013024_path, 'w', newline='') as output_file: keys = ["project_name","underproduction_mean","underproduction_low","underproduction_high","debian_vcs_link","upstream_vcs_link","age_of_project","contributors","collaborators","milestone_count"] dict_writer2 = csv.DictWriter(output_file, keys) dict_writer2.writeheader() with open("c_020524_te_rows.csv", "r") as file: #with open("013024_te_rows.csv", "r") as file: reader = csv.reader(file) for i, line in enumerate(reader): #time.sleep(45) project_dict ={} if "github" not in line[5]: continue repo_name = line[5].strip()[19:] if repo_name[-4:] == ".git": repo_name = repo_name[:-4] print(repo_name) project_dict["project_name"] = repo_name.split("/")[1] project_dict['underproduction_mean'] = line[1] project_dict['underproduction_low'] = line[2] project_dict['underproduction_high'] = line[3] project_dict['debian_vcs_link'] = line[4] project_dict['upstream_vcs_link'] = line[5] project_dict['age_of_project'] = line[6] project_dict['contributors'] = line[7] project_dict['collaborators'] = line[8] project_dict['milestone_count'] = line[9] #print(project_dict) os.chdir(wd) os.chdir("../octohatrack") ##--- FAILS FOR: ## - not github hosted try: octohatrack_results = subprocess.run(['python3', '-m', 'octohatrack', repo_name, '--wait-for-reset'], capture_output = True, text=True, timeout=900).stdout except (subprocess.TimeoutExpired, TypeError) as e: dict_writer2.writerow(project_dict) print(e) continue os.chdir(wd) split_results = octohatrack_results.split("\n") for entry in split_results: if "{'api_contributors'" in entry: formatted = ast.literal_eval(entry) #print(formatted) project_dict["api_contrib_count"] = len(formatted['api_contributors']) #project_dict["api_contrib_list"] = formatted['api_contributors'] project_dict["issue_contrib_count"] = len(formatted['issue_pr_contributors']) #project_dict["issue_contrib_list"] = formatted['issue_pr_contributors'] project_dict["file_contrib_count"] = len(formatted['file_contributors']) #project_dict["file_contrib_list"] = formatted['file_contributors'] project_dict["wiki_contrib_count"] = len(formatted['wiki_contributors']) #project_dict["wiki_contrib_list"] = formatted['file_contributors'] print(project_dict) dict_writer.writerow(project_dict) with open('/data/users/mgaughan/d_kkex_contrib_uni_013124/' + 'contrib_roster_' + project_dict["project_name"] + '.json', 'w') as data_path: json.dump(formatted, data_path) def consolidate_rosters(): rosters_dir = "/data/users/mgaughan/c_kkex_contrib_uni_013124/" with open("013024_octo_data.csv", 'r') as file: csv_reader = csv.DictReader(file) for row in csv_reader: project_name = row['upstream_vcs_link'].strip()[19:].split("/")[1] if project_name[-4:] == ".git": project_name = project_name[:-4] for filename in os.listdir(rosters_dir): if filename == 'contrib_roster_' + project_name + '.json': with open(rosters_dir + filename, "r") as file: d = json.load(file) api_contrib_count = len(d["api_contributors"]) issue_contrib_count = len(d["issue_pr_contributors"]) file_contrib_count = len(d["file_contributors"]) wiki_contrib_count = len(d["wiki_contributors"]) print(str(api_contrib_count) + " | " + str(issue_contrib_count) + " | " + str(file_contrib_count) + " | " + str(wiki_contrib_count) + " | ") row["api_contrib_count"] = api_contrib_count row["issue_contrib_count"] = issue_contrib_count row["file_contrib_count"] = file_contrib_count row["wiki_contrib_count"] = wiki_contrib_count print("match!") print(filename) print(row) if __name__ == "__main__": main()