diff --git a/github_api_req.py b/github_api_req.py new file mode 100644 index 0000000..485bdac --- /dev/null +++ b/github_api_req.py @@ -0,0 +1,32 @@ +import requests +import datetime as dt + +def main(vcs, begin_date): + gha_info = {} + #this is the entire list of Github 'milestones' grabbed from the API + gha_info['milestones'] = get_milestone_information(vcs) + #this is the count of milestones that occur after the cutoff date + gha_info['milestone_count'] = parse_milestones(gha_info['milestones'], begin_date) + return gha_info + +#this simple API call has been working for now but may need to be updated as more information is desired +def get_milestone_information(vcs_path): + repo_uri=vcs_path[0] + repo_uri_list = repo_uri.split('/') + api_url = "https://api.github.com/repos/" + repo_uri_list[-2] + "/" + repo_uri_list[-1] + "/milestones" + response = requests.get(api_url) + response_dict = response.json() + return response_dict + +def parse_milestones(milestones, earliest_date): + count_of_milestones = 0 + for entry in milestones: + #if entry date is more recent than the earliest date we're looking at + if dt.datetime.fromisoformat(entry['created_at'][:-1]) > earliest_date: + count_of_milestones += 1 + return count_of_milestones + + +if __name__ == "__main__": + vcs = ['https://github.com/fabiangreffrath/woof'] + main(vcs) \ No newline at end of file diff --git a/main.py b/main.py index 6553819..846118c 100644 --- a/main.py +++ b/main.py @@ -1,2 +1,34 @@ import perceval -import os \ No newline at end of file +import os +import yaml +import perceval_tasks as pt +import github_api_req as gha +import datetime as dt + + +#TODO: get lists of authors -> get lists of contributors -> compute 'Mean Membership Type' + +#TODO: compute Formality level metric + +def main(): + # we should discuss whether we're using the 93 day window that seems to be widely used or if we want a longer window + early_cutoff = dt.datetime(2015,3, 17) + print("earliest date examined: " + str(early_cutoff)) + #placeholder for now + manifest = '../kaylea_dissertation/lifecycle/package_metadata/jupyter-notebook_manifest.yaml' + with open(manifest, 'r') as stream: + try: + config = yaml.safe_load(stream) + #below lines will probably need to be refactored as tasks expand + vcs_path = config['Upstream_VCS'] + perceval_obj = pt.main(vcs_path, early_cutoff) + gha_obj = gha.main(vcs_path, early_cutoff) + #these are the two variables in the denominator of the formality measure + print(perceval_obj['age_of_project']) + print(gha_obj['milestone_count']) + except yaml.YAMLOError as err: + print(err) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/perceval_tasks.py b/perceval_tasks.py index 4ff73ca..e5574eb 100644 --- a/perceval_tasks.py +++ b/perceval_tasks.py @@ -1,24 +1,37 @@ -import perceval -import venv -import yaml -import subprocess +import datetime as dt +from perceval.backends.core.git import Git -def main(manifest): - with open(manifest, 'r') as stream: - #try: - config = yaml.safe_load(stream) - get_perceval_log(config['Upstream_VCS']) - print(config['Upstream_VCS']) - #except yaml.YAMLOError as err: - # print(err) +#globals +repo_dir = '/tmp/perceval.git' -def get_perceval_log(vcs_path): - print(vcs_path) - perceval_output = subprocess.run(['perceval', 'git', vcs_path[0], '--from-date', '2023-08-08'], capture_output=True) - print(type(perceval_output.__str__())) - with open('test_perceval_output.txt', 'w') as f: - f.write(perceval_output.__str__()) - print('COMPLETE') +#main function for all subsequent tasks using perceval +def main(vcs_path, begin_date): + perceval_info = {} + perceval_info['list_of_commits'] = get_perceval_log(vcs_path, begin_date) + perceval_info['age_of_project'] = get_repo_age(perceval_info['list_of_commits'] ) + return perceval_info + + +# this is the primary function for getting the list of commits from perceval +def get_perceval_log(vcs_path, begin_date): + repo = Git(uri=vcs_path[0], gitpath=repo_dir) + # this is a temporary date_from, will need to be more inclusive in the future + fetched_commits = repo.fetch(from_date=begin_date) + return list(fetched_commits) + +#this function is just to evaluate the repository age, as defined by Tamburri and used by van Meijel +def get_repo_age(all_commits): + first_commit = all_commits[0] + last_commit = all_commits[-1] + first_date = dt.datetime.strptime(first_commit['data']["CommitDate"], '%c %z') + last_date = dt.datetime.strptime(last_commit['data']["CommitDate"], '%c %z') + print(first_date) + print("---------------------") + print(last_date) + #project life, as defined in YOSHI, unit is days + project_life = last_date - first_date + print(project_life) + return project_life if __name__ == "__main__":