From 0711641ab9f2a10665dcc41524b1910b3951a1b7 Mon Sep 17 00:00:00 2001 From: mjgaughan Date: Mon, 23 Oct 2023 20:11:51 -0500 Subject: [PATCH] prototype replication done --- github_api_req.py | 35 ++++++++++++++++++++++++++++------- main.py | 35 ++++++++++++++++++++++++++--------- perceval_tasks.py | 34 ++++++++++++++++++++++++---------- 3 files changed, 78 insertions(+), 26 deletions(-) diff --git a/github_api_req.py b/github_api_req.py index 485bdac..fcf0d0c 100644 --- a/github_api_req.py +++ b/github_api_req.py @@ -2,17 +2,20 @@ import requests import datetime as dt def main(vcs, begin_date): + repo_uri=vcs[0] gha_info = {} #this is the entire list of Github 'milestones' grabbed from the API - gha_info['milestones'] = get_milestone_information(vcs) + gha_info['milestones'] = get_milestone_information(repo_uri) #this is the count of milestones that occur after the cutoff date gha_info['milestone_count'] = parse_milestones(gha_info['milestones'], begin_date) + #split_actors(repo_uri, actors_list) return gha_info + #this simple API call has been working for now but may need to be updated as more information is desired -def get_milestone_information(vcs_path): - repo_uri=vcs_path[0] +def get_milestone_information(repo_uri): repo_uri_list = repo_uri.split('/') + print(repo_uri_list) api_url = "https://api.github.com/repos/" + repo_uri_list[-2] + "/" + repo_uri_list[-1] + "/milestones" response = requests.get(api_url) response_dict = response.json() @@ -22,11 +25,29 @@ def parse_milestones(milestones, earliest_date): count_of_milestones = 0 for entry in milestones: #if entry date is more recent than the earliest date we're looking at - if dt.datetime.fromisoformat(entry['created_at'][:-1]) > earliest_date: + # TODO: decide whether to use created_at or updated_at or closed_at + if dt.datetime.fromisoformat(entry['updated_at'][:-1]) > earliest_date: count_of_milestones += 1 return count_of_milestones +''' +#using the github API to identify who is a collaborator on the project and who is just a contributor +def split_actors(repo_uri, actors_list): + call_sheet = {'collaborator' : [], 'contributor' : []} + repo_uri_list = repo_uri.split('/') + api_url = "https://api.github.com/repos/" + repo_uri_list[-2] + "/" + repo_uri_list[-1] + "/collaborators/" + for actor in actors_list[:2]: + actor_email = actor.split('<')[1][:-1] + print(actor_email) + actor_user = get_gh_un(actor_email) + response_dict = response.json() + print(response_dict) -if __name__ == "__main__": - vcs = ['https://github.com/fabiangreffrath/woof'] - main(vcs) \ No newline at end of file +#this function grabs the Github username from an associated email +def get_gh_un(email): + api_url = 'https://api.github.com/search/users?q=' + email + response = requests.get(api_url) + response_dict = response.json() + gh_username = response_dict['items'][0]['login'] + return gh_username +''' diff --git a/main.py b/main.py index 846118c..e9d2d83 100644 --- a/main.py +++ b/main.py @@ -5,15 +5,10 @@ import perceval_tasks as pt import github_api_req as gha import datetime as dt - -#TODO: get lists of authors -> get lists of contributors -> compute 'Mean Membership Type' - -#TODO: compute Formality level metric - def main(): # we should discuss whether we're using the 93 day window that seems to be widely used or if we want a longer window - early_cutoff = dt.datetime(2015,3, 17) - print("earliest date examined: " + str(early_cutoff)) + early_cutoff = dt.datetime(2023,6, 17) + print("Earliest date examined: " + str(early_cutoff)) #placeholder for now manifest = '../kaylea_dissertation/lifecycle/package_metadata/jupyter-notebook_manifest.yaml' with open(manifest, 'r') as stream: @@ -21,14 +16,36 @@ def main(): config = yaml.safe_load(stream) #below lines will probably need to be refactored as tasks expand vcs_path = config['Upstream_VCS'] + print("------------------") + print(vcs_path) perceval_obj = pt.main(vcs_path, early_cutoff) gha_obj = gha.main(vcs_path, early_cutoff) #these are the two variables in the denominator of the formality measure - print(perceval_obj['age_of_project']) - print(gha_obj['milestone_count']) + print("Age of Project: " + str(perceval_obj['age_of_project'])) + print('Contributor Count: ' + str(len(perceval_obj['contributors']))) + print('Collaborator Count: ' + str(len(perceval_obj['collaborators']))) + print('Number of Milestones: ' + str(gha_obj['milestone_count'])) + new_mmt = compute_new_mmt(len(perceval_obj['contributors']), len(perceval_obj['collaborators'])) + print('New MMT: ' + str(new_mmt)) + old_mmt = compute_old_mmt(len(perceval_obj['contributors']), len(perceval_obj['collaborators'])) + print('Old MMT: ' + str(old_mmt)) + #new mmt formality score + new_formality = compute_formality_score(new_mmt, gha_obj['milestone_count'], perceval_obj['age_of_project']) + print(new_formality) except yaml.YAMLOError as err: print(err) +#this is Yoshi 2 MMT per van Meijel +def compute_new_mmt(contrib_count, collab_count): + return (contrib_count + collab_count * 2) / (contrib_count + collab_count) + +#this is Yoshi 1 mmt per Tamburri +def compute_old_mmt(contrib_count, collab_count): + return (contrib_count) / (contrib_count + collab_count) + +#formality score +def compute_formality_score(mmt, milestones, lifetime): + return mmt / (milestones / lifetime) if __name__ == "__main__": main() \ No newline at end of file diff --git a/perceval_tasks.py b/perceval_tasks.py index e5574eb..b57000a 100644 --- a/perceval_tasks.py +++ b/perceval_tasks.py @@ -1,19 +1,23 @@ import datetime as dt from perceval.backends.core.git import Git +import argparse #globals -repo_dir = '/tmp/perceval.git' +repo_dir = '/tmp/' #main function for all subsequent tasks using perceval def main(vcs_path, begin_date): perceval_info = {} perceval_info['list_of_commits'] = get_perceval_log(vcs_path, begin_date) - perceval_info['age_of_project'] = get_repo_age(perceval_info['list_of_commits'] ) + perceval_info['age_of_project'] = get_repo_age(perceval_info['list_of_commits']) + perceval_info['contributors'], perceval_info['collaborators'] = get_all_actors(perceval_info['list_of_commits']) return perceval_info # this is the primary function for getting the list of commits from perceval def get_perceval_log(vcs_path, begin_date): + print(vcs_path) + repo_dir = '/tmp/' + str(vcs_path[0].split('/')[-1]) + ".git" repo = Git(uri=vcs_path[0], gitpath=repo_dir) # this is a temporary date_from, will need to be more inclusive in the future fetched_commits = repo.fetch(from_date=begin_date) @@ -25,15 +29,25 @@ def get_repo_age(all_commits): last_commit = all_commits[-1] first_date = dt.datetime.strptime(first_commit['data']["CommitDate"], '%c %z') last_date = dt.datetime.strptime(last_commit['data']["CommitDate"], '%c %z') - print(first_date) - print("---------------------") - print(last_date) #project life, as defined in YOSHI, unit is days project_life = last_date - first_date - print(project_life) - return project_life + return project_life.total_seconds() / 86400 +#attempt at getting the rosters, though need to make sure that we can get the MR +def get_all_actors(all_commits): + #collaborators are more senior than contributors, doing it by author/commit + authors = [] + committers = [] + for commit in all_commits: + author = commit['data']['Author'] + committer = commit['data']['Commit'] + if committer not in committers: + committers.append(committer) + if author not in authors: + authors.append(author) + # now cleaning lists + for committer in committers: + if committer in authors: + authors.remove(committer) + return authors, committers -if __name__ == "__main__": - manifest = '../kaylea_dissertation/lifecycle/package_metadata/woof_manifest.yaml' - main(manifest) \ No newline at end of file