import csv import requests import os import datetime as dt import perceval_tasks as pt import github_api_req as gha import gh_gsql_req as ghs key = os.environ.get('KKEXKEY') test_csv_path = "120523_data_test.csv" def main(): early_cutoff = dt.datetime(2008,2, 8) with open('inst_all_packages_full_results.csv', newline='') as csvfile: array_of_projects =[] spamreader = csv.reader(csvfile) index = 0 successful_count = 0 for row in spamreader: index += 1 if index > 10: break project_dict = {} project_dict["project_name"] = row[0] project_dict["underproduction_mean"] = row[16] project_dict["underproduction_low"] = row[17] project_dict["underproduction_high"] = row[18] project_dict["vcs_link"] = debian_query(project_dict["project_name"]) if project_dict["vcs_link"] == "": continue perceval_data = pt.main(project_dict["vcs_link"], early_cutoff) if perceval_data == {}: continue project_dict['age_of_project'], project_dict['contributors'], project_dict['collaborators'] = perceval_data['age_of_project'], perceval_data['contributors'], perceval_data['collaborators'] successful_count += 1 if index > 1: array_of_projects.append(project_dict) print("success rate: " + str(successful_count/index) + "; total success count: " + str(successful_count)) keys = array_of_projects[0].keys() with open(test_csv_path, 'w', newline='') as output_file: dict_writer = csv.DictWriter(output_file, keys) dict_writer.writeheader() dict_writer.writerows(array_of_projects) def debian_query(package_name): headers = {'content-type': 'application/json', 'Accept-Charset': 'UTF-8'} first_api_url = "https://sources.debian.org/api/src/" + package_name try: first_response = requests.get(url = first_api_url, headers=headers) first_response_dict = first_response.json() except: print('error with the first debian request') first_response_dict = {} return ("") if first_response_dict == {'error': 404}: print('not found in debian system') return ("") #print(first_response_dict) most_recent_package_version = first_response_dict['versions'][0]['version'] #print(first_response_dict['versions'][0]['version']) second_api_url = "https://sources.debian.org/api/info/package/" + package_name + "/" + most_recent_package_version + "/" try: second_response = requests.get(url = second_api_url, headers=headers) second_response_dict = second_response.json() except: print('error with the second debian request') second_response_dict = {} return ("") if 'vcs_browser' not in second_response_dict['pkg_infos'].keys(): print('no vcs link') return ("") print(second_response_dict) print(second_response_dict['pkg_infos']['vcs_browser']) return second_response_dict['pkg_infos']['vcs_browser'] if __name__ == "__main__": main() #debian_query("zurl")