24_deb_pkg_gov/main.py

import perceval
import os
import yaml
import datetime as dt

import perceval_tasks as pt
import github_api_req as gha
import gh_gsql_req as ghs

def main():
    # we should discuss whether we're using the 93 day window that seems to be widely used or if we want a longer window
    early_cutoff = dt.datetime(2023,10, 11)
    print("Earliest date examined: " + str(early_cutoff))
    largest_object = {}
    #manifest = '../kaylea_dissertation/lifecycle/package_metadata/jupyter-notebook_manifest.yaml'
    directory='../kaylea_dissertation/lifecycle/package_metadata/'
    for filename in os.listdir(directory):
        f = os.path.join(directory, filename)
        # checking if it is a file
        if os.path.isfile(f):
            print(f)
        get_everything(f, largest_object, early_cutoff)
        #remove this and it should just run? for the most part at least I think
        break
    print(largest_object.keys())

def get_everything(manifest_path, largest_object, early_cutoff):
    with open(manifest_path, 'r') as stream:
        try:
            config = yaml.safe_load(stream)
            #below lines will probably need to be refactored as tasks expand
            vcs_path = config['Upstream_VCS']
            #print("------------------")
            #print(vcs_path)
            repo_path = vcs_path[0]
            largest_object[repo_path] = {}
            largest_object[repo_path]["perceval_obj"] = pt.main(vcs_path, early_cutoff)
            largest_object[repo_path]["gha_obj"] = gha.main(vcs_path, early_cutoff)
            #these are the two variables in the denominator of the formality measure
            #print("Age of Project: " + str(largest_object[repo_path]["perceval_obj"]['age_of_project']))
            #print('Contributor Count: ' + str(len(largest_object[repo_path]["perceval_obj"]['contributors'])))
            #print('Collaborator Count: ' + str(len(largest_object[repo_path]["perceval_obj"]['collaborators'])))
            #print('Number of Milestones: ' + str(largest_object[repo_path]["gha_obj"]['milestone_count']))
            largest_object[repo_path]['new_mmt'] = compute_new_mmt(len(largest_object[repo_path]["perceval_obj"]['contributors']), len(largest_object[repo_path]["perceval_obj"]['collaborators']))
            #print('New MMT: ' + str(largest_object[repo_path]['new_mmt']))
            largest_object[repo_path]['old_mmt'] = compute_old_mmt(len(largest_object[repo_path]["perceval_obj"]['contributors']), len(largest_object[repo_path]["perceval_obj"]['collaborators']))
            #print('Old MMT: ' + str(largest_object[repo_path]['old_mmt']))
            #new mmt formality score
            largest_object[repo_path]['new_formality'] = compute_formality_score(largest_object[repo_path]['new_mmt'], largest_object[repo_path]["gha_obj"]['milestone_count'], largest_object[repo_path]["perceval_obj"]['age_of_project'])
            print(largest_object[repo_path]['new_formality'])
            # testing out beneath:
            largest_object[repo_path]['ghs_obj'] = ghs.main(vcs_path, early_cutoff)
            #print(ghs_obj["time_cleaned_comm"])
        except yaml.YAMLOError as err:
            print(err)
    print("----------------------")


#this is Yoshi 2 MMT per van Meijel
def compute_new_mmt(contrib_count, collab_count):
    return (contrib_count + collab_count * 2) / (contrib_count + collab_count)

#this is Yoshi 1 mmt per Tamburri
def compute_old_mmt(contrib_count, collab_count):
    return (contrib_count) / (contrib_count + collab_count)

#formality score
def compute_formality_score(mmt, milestones, lifetime):
    return mmt / (milestones / lifetime)

if __name__ == "__main__":
    main()