prototype replication done
This commit is contained in:
parent
5e6a08471a
commit
0711641ab9
@ -2,17 +2,20 @@ import requests
|
|||||||
import datetime as dt
|
import datetime as dt
|
||||||
|
|
||||||
def main(vcs, begin_date):
|
def main(vcs, begin_date):
|
||||||
|
repo_uri=vcs[0]
|
||||||
gha_info = {}
|
gha_info = {}
|
||||||
#this is the entire list of Github 'milestones' grabbed from the API
|
#this is the entire list of Github 'milestones' grabbed from the API
|
||||||
gha_info['milestones'] = get_milestone_information(vcs)
|
gha_info['milestones'] = get_milestone_information(repo_uri)
|
||||||
#this is the count of milestones that occur after the cutoff date
|
#this is the count of milestones that occur after the cutoff date
|
||||||
gha_info['milestone_count'] = parse_milestones(gha_info['milestones'], begin_date)
|
gha_info['milestone_count'] = parse_milestones(gha_info['milestones'], begin_date)
|
||||||
|
#split_actors(repo_uri, actors_list)
|
||||||
return gha_info
|
return gha_info
|
||||||
|
|
||||||
|
|
||||||
#this simple API call has been working for now but may need to be updated as more information is desired
|
#this simple API call has been working for now but may need to be updated as more information is desired
|
||||||
def get_milestone_information(vcs_path):
|
def get_milestone_information(repo_uri):
|
||||||
repo_uri=vcs_path[0]
|
|
||||||
repo_uri_list = repo_uri.split('/')
|
repo_uri_list = repo_uri.split('/')
|
||||||
|
print(repo_uri_list)
|
||||||
api_url = "https://api.github.com/repos/" + repo_uri_list[-2] + "/" + repo_uri_list[-1] + "/milestones"
|
api_url = "https://api.github.com/repos/" + repo_uri_list[-2] + "/" + repo_uri_list[-1] + "/milestones"
|
||||||
response = requests.get(api_url)
|
response = requests.get(api_url)
|
||||||
response_dict = response.json()
|
response_dict = response.json()
|
||||||
@ -22,11 +25,29 @@ def parse_milestones(milestones, earliest_date):
|
|||||||
count_of_milestones = 0
|
count_of_milestones = 0
|
||||||
for entry in milestones:
|
for entry in milestones:
|
||||||
#if entry date is more recent than the earliest date we're looking at
|
#if entry date is more recent than the earliest date we're looking at
|
||||||
if dt.datetime.fromisoformat(entry['created_at'][:-1]) > earliest_date:
|
# TODO: decide whether to use created_at or updated_at or closed_at
|
||||||
|
if dt.datetime.fromisoformat(entry['updated_at'][:-1]) > earliest_date:
|
||||||
count_of_milestones += 1
|
count_of_milestones += 1
|
||||||
return count_of_milestones
|
return count_of_milestones
|
||||||
|
|
||||||
|
'''
|
||||||
|
#using the github API to identify who is a collaborator on the project and who is just a contributor
|
||||||
|
def split_actors(repo_uri, actors_list):
|
||||||
|
call_sheet = {'collaborator' : [], 'contributor' : []}
|
||||||
|
repo_uri_list = repo_uri.split('/')
|
||||||
|
api_url = "https://api.github.com/repos/" + repo_uri_list[-2] + "/" + repo_uri_list[-1] + "/collaborators/"
|
||||||
|
for actor in actors_list[:2]:
|
||||||
|
actor_email = actor.split('<')[1][:-1]
|
||||||
|
print(actor_email)
|
||||||
|
actor_user = get_gh_un(actor_email)
|
||||||
|
response_dict = response.json()
|
||||||
|
print(response_dict)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
#this function grabs the Github username from an associated email
|
||||||
vcs = ['https://github.com/fabiangreffrath/woof']
|
def get_gh_un(email):
|
||||||
main(vcs)
|
api_url = 'https://api.github.com/search/users?q=' + email
|
||||||
|
response = requests.get(api_url)
|
||||||
|
response_dict = response.json()
|
||||||
|
gh_username = response_dict['items'][0]['login']
|
||||||
|
return gh_username
|
||||||
|
'''
|
||||||
|
35
main.py
35
main.py
@ -5,15 +5,10 @@ import perceval_tasks as pt
|
|||||||
import github_api_req as gha
|
import github_api_req as gha
|
||||||
import datetime as dt
|
import datetime as dt
|
||||||
|
|
||||||
|
|
||||||
#TODO: get lists of authors -> get lists of contributors -> compute 'Mean Membership Type'
|
|
||||||
|
|
||||||
#TODO: compute Formality level metric
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# we should discuss whether we're using the 93 day window that seems to be widely used or if we want a longer window
|
# we should discuss whether we're using the 93 day window that seems to be widely used or if we want a longer window
|
||||||
early_cutoff = dt.datetime(2015,3, 17)
|
early_cutoff = dt.datetime(2023,6, 17)
|
||||||
print("earliest date examined: " + str(early_cutoff))
|
print("Earliest date examined: " + str(early_cutoff))
|
||||||
#placeholder for now
|
#placeholder for now
|
||||||
manifest = '../kaylea_dissertation/lifecycle/package_metadata/jupyter-notebook_manifest.yaml'
|
manifest = '../kaylea_dissertation/lifecycle/package_metadata/jupyter-notebook_manifest.yaml'
|
||||||
with open(manifest, 'r') as stream:
|
with open(manifest, 'r') as stream:
|
||||||
@ -21,14 +16,36 @@ def main():
|
|||||||
config = yaml.safe_load(stream)
|
config = yaml.safe_load(stream)
|
||||||
#below lines will probably need to be refactored as tasks expand
|
#below lines will probably need to be refactored as tasks expand
|
||||||
vcs_path = config['Upstream_VCS']
|
vcs_path = config['Upstream_VCS']
|
||||||
|
print("------------------")
|
||||||
|
print(vcs_path)
|
||||||
perceval_obj = pt.main(vcs_path, early_cutoff)
|
perceval_obj = pt.main(vcs_path, early_cutoff)
|
||||||
gha_obj = gha.main(vcs_path, early_cutoff)
|
gha_obj = gha.main(vcs_path, early_cutoff)
|
||||||
#these are the two variables in the denominator of the formality measure
|
#these are the two variables in the denominator of the formality measure
|
||||||
print(perceval_obj['age_of_project'])
|
print("Age of Project: " + str(perceval_obj['age_of_project']))
|
||||||
print(gha_obj['milestone_count'])
|
print('Contributor Count: ' + str(len(perceval_obj['contributors'])))
|
||||||
|
print('Collaborator Count: ' + str(len(perceval_obj['collaborators'])))
|
||||||
|
print('Number of Milestones: ' + str(gha_obj['milestone_count']))
|
||||||
|
new_mmt = compute_new_mmt(len(perceval_obj['contributors']), len(perceval_obj['collaborators']))
|
||||||
|
print('New MMT: ' + str(new_mmt))
|
||||||
|
old_mmt = compute_old_mmt(len(perceval_obj['contributors']), len(perceval_obj['collaborators']))
|
||||||
|
print('Old MMT: ' + str(old_mmt))
|
||||||
|
#new mmt formality score
|
||||||
|
new_formality = compute_formality_score(new_mmt, gha_obj['milestone_count'], perceval_obj['age_of_project'])
|
||||||
|
print(new_formality)
|
||||||
except yaml.YAMLOError as err:
|
except yaml.YAMLOError as err:
|
||||||
print(err)
|
print(err)
|
||||||
|
|
||||||
|
#this is Yoshi 2 MMT per van Meijel
|
||||||
|
def compute_new_mmt(contrib_count, collab_count):
|
||||||
|
return (contrib_count + collab_count * 2) / (contrib_count + collab_count)
|
||||||
|
|
||||||
|
#this is Yoshi 1 mmt per Tamburri
|
||||||
|
def compute_old_mmt(contrib_count, collab_count):
|
||||||
|
return (contrib_count) / (contrib_count + collab_count)
|
||||||
|
|
||||||
|
#formality score
|
||||||
|
def compute_formality_score(mmt, milestones, lifetime):
|
||||||
|
return mmt / (milestones / lifetime)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
@ -1,19 +1,23 @@
|
|||||||
import datetime as dt
|
import datetime as dt
|
||||||
from perceval.backends.core.git import Git
|
from perceval.backends.core.git import Git
|
||||||
|
import argparse
|
||||||
|
|
||||||
#globals
|
#globals
|
||||||
repo_dir = '/tmp/perceval.git'
|
repo_dir = '/tmp/'
|
||||||
|
|
||||||
#main function for all subsequent tasks using perceval
|
#main function for all subsequent tasks using perceval
|
||||||
def main(vcs_path, begin_date):
|
def main(vcs_path, begin_date):
|
||||||
perceval_info = {}
|
perceval_info = {}
|
||||||
perceval_info['list_of_commits'] = get_perceval_log(vcs_path, begin_date)
|
perceval_info['list_of_commits'] = get_perceval_log(vcs_path, begin_date)
|
||||||
perceval_info['age_of_project'] = get_repo_age(perceval_info['list_of_commits'] )
|
perceval_info['age_of_project'] = get_repo_age(perceval_info['list_of_commits'])
|
||||||
|
perceval_info['contributors'], perceval_info['collaborators'] = get_all_actors(perceval_info['list_of_commits'])
|
||||||
return perceval_info
|
return perceval_info
|
||||||
|
|
||||||
|
|
||||||
# this is the primary function for getting the list of commits from perceval
|
# this is the primary function for getting the list of commits from perceval
|
||||||
def get_perceval_log(vcs_path, begin_date):
|
def get_perceval_log(vcs_path, begin_date):
|
||||||
|
print(vcs_path)
|
||||||
|
repo_dir = '/tmp/' + str(vcs_path[0].split('/')[-1]) + ".git"
|
||||||
repo = Git(uri=vcs_path[0], gitpath=repo_dir)
|
repo = Git(uri=vcs_path[0], gitpath=repo_dir)
|
||||||
# this is a temporary date_from, will need to be more inclusive in the future
|
# this is a temporary date_from, will need to be more inclusive in the future
|
||||||
fetched_commits = repo.fetch(from_date=begin_date)
|
fetched_commits = repo.fetch(from_date=begin_date)
|
||||||
@ -25,15 +29,25 @@ def get_repo_age(all_commits):
|
|||||||
last_commit = all_commits[-1]
|
last_commit = all_commits[-1]
|
||||||
first_date = dt.datetime.strptime(first_commit['data']["CommitDate"], '%c %z')
|
first_date = dt.datetime.strptime(first_commit['data']["CommitDate"], '%c %z')
|
||||||
last_date = dt.datetime.strptime(last_commit['data']["CommitDate"], '%c %z')
|
last_date = dt.datetime.strptime(last_commit['data']["CommitDate"], '%c %z')
|
||||||
print(first_date)
|
|
||||||
print("---------------------")
|
|
||||||
print(last_date)
|
|
||||||
#project life, as defined in YOSHI, unit is days
|
#project life, as defined in YOSHI, unit is days
|
||||||
project_life = last_date - first_date
|
project_life = last_date - first_date
|
||||||
print(project_life)
|
return project_life.total_seconds() / 86400
|
||||||
return project_life
|
|
||||||
|
|
||||||
|
#attempt at getting the rosters, though need to make sure that we can get the MR
|
||||||
|
def get_all_actors(all_commits):
|
||||||
|
#collaborators are more senior than contributors, doing it by author/commit
|
||||||
|
authors = []
|
||||||
|
committers = []
|
||||||
|
for commit in all_commits:
|
||||||
|
author = commit['data']['Author']
|
||||||
|
committer = commit['data']['Commit']
|
||||||
|
if committer not in committers:
|
||||||
|
committers.append(committer)
|
||||||
|
if author not in authors:
|
||||||
|
authors.append(author)
|
||||||
|
# now cleaning lists
|
||||||
|
for committer in committers:
|
||||||
|
if committer in authors:
|
||||||
|
authors.remove(committer)
|
||||||
|
return authors, committers
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
manifest = '../kaylea_dissertation/lifecycle/package_metadata/woof_manifest.yaml'
|
|
||||||
main(manifest)
|
|
Loading…
Reference in New Issue
Block a user