2023-12-05 18:36:07 +00:00
|
|
|
import csv
|
|
|
|
import requests
|
|
|
|
import os
|
|
|
|
import datetime as dt
|
2023-12-06 00:39:10 +00:00
|
|
|
import wget
|
2023-12-05 18:36:07 +00:00
|
|
|
|
|
|
|
import perceval_tasks as pt
|
|
|
|
import github_api_req as gha
|
|
|
|
import gh_gsql_req as ghs
|
2023-12-06 00:39:10 +00:00
|
|
|
import debian_queries as dqs
|
2023-12-05 18:36:07 +00:00
|
|
|
|
|
|
|
|
|
|
|
key = os.environ.get('KKEXKEY')
|
|
|
|
test_csv_path = "120523_data_test.csv"
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
early_cutoff = dt.datetime(2008,2, 8)
|
|
|
|
with open('inst_all_packages_full_results.csv', newline='') as csvfile:
|
|
|
|
array_of_projects =[]
|
|
|
|
spamreader = csv.reader(csvfile)
|
|
|
|
index = 0
|
|
|
|
successful_count = 0
|
|
|
|
for row in spamreader:
|
|
|
|
index += 1
|
2023-12-06 00:39:10 +00:00
|
|
|
if index > 20:
|
2023-12-05 18:36:07 +00:00
|
|
|
break
|
|
|
|
project_dict = {}
|
|
|
|
project_dict["project_name"] = row[0]
|
|
|
|
project_dict["underproduction_mean"] = row[16]
|
|
|
|
project_dict["underproduction_low"] = row[17]
|
|
|
|
project_dict["underproduction_high"] = row[18]
|
2023-12-06 00:39:10 +00:00
|
|
|
project_dict["debian_vcs_link"] = dqs.debian_query(project_dict["project_name"])
|
|
|
|
if project_dict["debian_vcs_link"] == "":
|
2023-12-05 18:36:07 +00:00
|
|
|
continue
|
2023-12-06 00:39:10 +00:00
|
|
|
if "github" in project_dict["debian_vcs_link"]:
|
|
|
|
project_dict["upstream_vcs_link"] = clean_gh_vcs_link(project_dict["debian_vcs_link"])
|
|
|
|
else:
|
|
|
|
project_dict["upstream_vcs_link"] = dqs.debian_vcs_query(project_dict["debian_vcs_link"])
|
|
|
|
if project_dict["upstream_vcs_link"] == "":
|
|
|
|
continue
|
|
|
|
perceval_data = pt.main(project_dict["upstream_vcs_link"], early_cutoff)
|
2023-12-05 18:36:07 +00:00
|
|
|
if perceval_data == {}:
|
|
|
|
continue
|
|
|
|
project_dict['age_of_project'], project_dict['contributors'], project_dict['collaborators'] = perceval_data['age_of_project'], perceval_data['contributors'], perceval_data['collaborators']
|
|
|
|
successful_count += 1
|
|
|
|
if index > 1:
|
|
|
|
array_of_projects.append(project_dict)
|
|
|
|
print("success rate: " + str(successful_count/index) + "; total success count: " + str(successful_count))
|
|
|
|
keys = array_of_projects[0].keys()
|
|
|
|
with open(test_csv_path, 'w', newline='') as output_file:
|
|
|
|
dict_writer = csv.DictWriter(output_file, keys)
|
|
|
|
dict_writer.writeheader()
|
|
|
|
dict_writer.writerows(array_of_projects)
|
|
|
|
|
|
|
|
|
2023-12-06 00:39:10 +00:00
|
|
|
def clean_gh_vcs_link(debian_vcs_link):
|
|
|
|
url_array = debian_vcs_link.split("/")
|
|
|
|
new_url = "/".join(url_array[:5])
|
|
|
|
return new_url
|
2023-12-05 18:36:07 +00:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|
2023-12-06 00:39:10 +00:00
|
|
|
#clean_gh_vcs_link("https://github.com/kilobyte/3270font/tree/debian/")
|
|
|
|
#debian_vcs_query("https://salsa.debian.org/debian/0xffff/")
|