replicated project LT and milestones

This commit is contained in:
mjgaughan 2023-10-23 15:40:24 -05:00
parent 5111c3530f
commit 5e6a08471a
3 changed files with 97 additions and 20 deletions

32
github_api_req.py Normal file
View File

@ -0,0 +1,32 @@
import requests
import datetime as dt
def main(vcs, begin_date):
gha_info = {}
#this is the entire list of Github 'milestones' grabbed from the API
gha_info['milestones'] = get_milestone_information(vcs)
#this is the count of milestones that occur after the cutoff date
gha_info['milestone_count'] = parse_milestones(gha_info['milestones'], begin_date)
return gha_info
#this simple API call has been working for now but may need to be updated as more information is desired
def get_milestone_information(vcs_path):
repo_uri=vcs_path[0]
repo_uri_list = repo_uri.split('/')
api_url = "https://api.github.com/repos/" + repo_uri_list[-2] + "/" + repo_uri_list[-1] + "/milestones"
response = requests.get(api_url)
response_dict = response.json()
return response_dict
def parse_milestones(milestones, earliest_date):
count_of_milestones = 0
for entry in milestones:
#if entry date is more recent than the earliest date we're looking at
if dt.datetime.fromisoformat(entry['created_at'][:-1]) > earliest_date:
count_of_milestones += 1
return count_of_milestones
if __name__ == "__main__":
vcs = ['https://github.com/fabiangreffrath/woof']
main(vcs)

34
main.py
View File

@ -1,2 +1,34 @@
import perceval import perceval
import os import os
import yaml
import perceval_tasks as pt
import github_api_req as gha
import datetime as dt
#TODO: get lists of authors -> get lists of contributors -> compute 'Mean Membership Type'
#TODO: compute Formality level metric
def main():
# we should discuss whether we're using the 93 day window that seems to be widely used or if we want a longer window
early_cutoff = dt.datetime(2015,3, 17)
print("earliest date examined: " + str(early_cutoff))
#placeholder for now
manifest = '../kaylea_dissertation/lifecycle/package_metadata/jupyter-notebook_manifest.yaml'
with open(manifest, 'r') as stream:
try:
config = yaml.safe_load(stream)
#below lines will probably need to be refactored as tasks expand
vcs_path = config['Upstream_VCS']
perceval_obj = pt.main(vcs_path, early_cutoff)
gha_obj = gha.main(vcs_path, early_cutoff)
#these are the two variables in the denominator of the formality measure
print(perceval_obj['age_of_project'])
print(gha_obj['milestone_count'])
except yaml.YAMLOError as err:
print(err)
if __name__ == "__main__":
main()

View File

@ -1,24 +1,37 @@
import perceval import datetime as dt
import venv from perceval.backends.core.git import Git
import yaml
import subprocess
def main(manifest): #globals
with open(manifest, 'r') as stream: repo_dir = '/tmp/perceval.git'
#try:
config = yaml.safe_load(stream)
get_perceval_log(config['Upstream_VCS'])
print(config['Upstream_VCS'])
#except yaml.YAMLOError as err:
# print(err)
def get_perceval_log(vcs_path): #main function for all subsequent tasks using perceval
print(vcs_path) def main(vcs_path, begin_date):
perceval_output = subprocess.run(['perceval', 'git', vcs_path[0], '--from-date', '2023-08-08'], capture_output=True) perceval_info = {}
print(type(perceval_output.__str__())) perceval_info['list_of_commits'] = get_perceval_log(vcs_path, begin_date)
with open('test_perceval_output.txt', 'w') as f: perceval_info['age_of_project'] = get_repo_age(perceval_info['list_of_commits'] )
f.write(perceval_output.__str__()) return perceval_info
print('COMPLETE')
# this is the primary function for getting the list of commits from perceval
def get_perceval_log(vcs_path, begin_date):
repo = Git(uri=vcs_path[0], gitpath=repo_dir)
# this is a temporary date_from, will need to be more inclusive in the future
fetched_commits = repo.fetch(from_date=begin_date)
return list(fetched_commits)
#this function is just to evaluate the repository age, as defined by Tamburri and used by van Meijel
def get_repo_age(all_commits):
first_commit = all_commits[0]
last_commit = all_commits[-1]
first_date = dt.datetime.strptime(first_commit['data']["CommitDate"], '%c %z')
last_date = dt.datetime.strptime(last_commit['data']["CommitDate"], '%c %z')
print(first_date)
print("---------------------")
print(last_date)
#project life, as defined in YOSHI, unit is days
project_life = last_date - first_date
print(project_life)
return project_life
if __name__ == "__main__": if __name__ == "__main__":