120 lines
4.7 KiB
Python
120 lines
4.7 KiB
Python
import perceval
|
|
import os
|
|
import yaml
|
|
import datetime as dt
|
|
import json
|
|
#functions from other files
|
|
import perceval_tasks as pt
|
|
import github_api_req as gha
|
|
import gh_gsql_req as ghs
|
|
|
|
# In total, the data will look like:
|
|
# - repository VCS url
|
|
# - perceval object
|
|
# list of all commits to the project
|
|
# count of contributors and collaborators to the project
|
|
# age of the project
|
|
# - github api object
|
|
# object of milestones from the project
|
|
# count of milestones from the project
|
|
# - github gsql object
|
|
# - list of discussion comments from repo
|
|
# list left blank if none
|
|
|
|
def main():
|
|
# we should discuss whether we're using the 93 day window that seems to be widely used or if we want a longer window
|
|
early_cutoff = dt.datetime(2008,2, 8)
|
|
print("Earliest date examined: " + str(early_cutoff))
|
|
#largest_object = {}
|
|
#manifest = '../kaylea_dissertation/lifecycle/package_metadata/jupyter-notebook_manifest.yaml'
|
|
directory='../kaylea_dissertation/lifecycle/package_metadata/'
|
|
count_of_dir = 0
|
|
for filename in os.listdir(directory):
|
|
f = os.path.join(directory, filename)
|
|
# checking if it is a file
|
|
if os.path.isfile(f):
|
|
print(f)
|
|
get_everything(f, early_cutoff)
|
|
#remove this and it should just run? for the most part at least I think
|
|
count_of_dir += 1
|
|
#this is what needs to be commented out
|
|
#if count_of_dir > 4:
|
|
# break
|
|
#print(largest_object.keys())
|
|
#print(len(largest_object.keys()))
|
|
'''
|
|
for repo in largest_object:
|
|
print(largest_object[repo]['new_formality'])
|
|
'''
|
|
#with open('/data/users/mgaughan/kkex_data_110723/result.json', 'w') as results_path:
|
|
# json.dump(largest_object, results_path)
|
|
|
|
def get_everything(manifest_path, early_cutoff):
|
|
largest_object = {}
|
|
with open(manifest_path, 'r') as stream:
|
|
try:
|
|
config = yaml.safe_load(stream)
|
|
#below lines will probably need to be refactored as tasks expand
|
|
try:
|
|
vcs_path = config['Upstream_VCS']
|
|
except:
|
|
print('error with the keys, i guess')
|
|
return
|
|
print("------------------")
|
|
#print(vcs_path) return
|
|
try:
|
|
repo_path = vcs_path[0]
|
|
except:
|
|
print('vcs error')
|
|
return
|
|
largest_object[repo_path] = {}
|
|
largest_object[repo_path]["perceval_obj"] = pt.main(vcs_path, early_cutoff)
|
|
if len(largest_object[repo_path]["perceval_obj"]) == 0:
|
|
print("PERCEVAL ERROR")
|
|
del largest_object[repo_path]
|
|
return
|
|
largest_object[repo_path]["gha_obj"] = gha.main(vcs_path, early_cutoff)
|
|
# testing out beneath:
|
|
largest_object[repo_path]['ghs_obj'] = ghs.main(vcs_path, early_cutoff)
|
|
#print(ghs_obj["time_cleaned_comm"])
|
|
repo_uri_list = repo_path.split('/')
|
|
with open('/data/users/mgaughan/kkex_data_111023/' + repo_uri_list[-2] + '_' + repo_uri_list[-1] + '_result.json', 'w') as data_path:
|
|
json.dump(largest_object[repo_path], data_path)
|
|
|
|
except yaml.YAMLError as err:
|
|
print(err)
|
|
print("----------------------")
|
|
|
|
|
|
#this is Yoshi 2 MMT per van Meijel
|
|
def compute_new_mmt(contrib_count, collab_count):
|
|
return (contrib_count + collab_count * 2) / (contrib_count + collab_count)
|
|
|
|
#this is Yoshi 1 mmt per Tamburri
|
|
def compute_old_mmt(contrib_count, collab_count):
|
|
return (contrib_count) / (contrib_count + collab_count)
|
|
|
|
#formality score
|
|
def compute_formality_score(mmt, milestones, lifetime):
|
|
return mmt / (milestones / lifetime)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|
|
|
|
|
|
'''
|
|
if largest_object[repo_path]["gha_obj"]['milestone_count'] == 0:
|
|
#del largest_object[repo_path]
|
|
#return
|
|
#this is to ensure that projects which don't use milestones are counted
|
|
largest_object[repo_path]["gha_obj"]['milestone_count'] = 0.1
|
|
largest_object[repo_path]['new_mmt'] = compute_new_mmt(largest_object[repo_path]["perceval_obj"]['contributors'], largest_object[repo_path]["perceval_obj"]['collaborators'])
|
|
#print('New MMT: ' + str(largest_object[repo_path]['new_mmt']))
|
|
largest_object[repo_path]['old_mmt'] = compute_old_mmt(largest_object[repo_path]["perceval_obj"]['contributors'], largest_object[repo_path]["perceval_obj"]['collaborators'])
|
|
#print('Old MMT: ' + str(largest_object[repo_path]['old_mmt']))
|
|
#new mmt formality score
|
|
largest_object[repo_path]['new_formality'] = compute_formality_score(largest_object[repo_path]['new_mmt'], largest_object[repo_path]["gha_obj"]['milestone_count'], largest_object[repo_path]["perceval_obj"]['age_of_project'])
|
|
print(largest_object[repo_path]['new_formality'])
|
|
'''
|