diff --git a/gh_gsql_req.py b/gh_gsql_req.py index 34aa3ca..97118b2 100644 --- a/gh_gsql_req.py +++ b/gh_gsql_req.py @@ -60,7 +60,7 @@ def get_discussion_gql(repo_owner, repo_name): data_json = json.dumps(data) headers = {'content-type': 'application/json', 'Accept-Charset': 'UTF-8', 'Authorization': 'bearer ' + key} r = requests.post(url=url, data=data_json, headers=headers) - print(r.content) + #print(r.content) return r def within_time(comment_content, early_cutoff): diff --git a/main.py b/main.py index 17f08e9..0c30785 100644 --- a/main.py +++ b/main.py @@ -11,34 +11,50 @@ def main(): # we should discuss whether we're using the 93 day window that seems to be widely used or if we want a longer window early_cutoff = dt.datetime(2023,10, 11) print("Earliest date examined: " + str(early_cutoff)) - #placeholder for now - manifest = '../kaylea_dissertation/lifecycle/package_metadata/jupyter-notebook_manifest.yaml' - with open(manifest, 'r') as stream: + largest_object = {} + #manifest = '../kaylea_dissertation/lifecycle/package_metadata/jupyter-notebook_manifest.yaml' + directory='../kaylea_dissertation/lifecycle/package_metadata/' + for filename in os.listdir(directory): + f = os.path.join(directory, filename) + # checking if it is a file + if os.path.isfile(f): + print(f) + get_everything(f, largest_object, early_cutoff) + #remove this and it should just run? for the most part at least I think + break + print(largest_object.keys()) + +def get_everything(manifest_path, largest_object, early_cutoff): + with open(manifest_path, 'r') as stream: try: config = yaml.safe_load(stream) #below lines will probably need to be refactored as tasks expand vcs_path = config['Upstream_VCS'] - print("------------------") - print(vcs_path) - perceval_obj = pt.main(vcs_path, early_cutoff) - gha_obj = gha.main(vcs_path, early_cutoff) + #print("------------------") + #print(vcs_path) + repo_path = vcs_path[0] + largest_object[repo_path] = {} + largest_object[repo_path]["perceval_obj"] = pt.main(vcs_path, early_cutoff) + largest_object[repo_path]["gha_obj"] = gha.main(vcs_path, early_cutoff) #these are the two variables in the denominator of the formality measure - print("Age of Project: " + str(perceval_obj['age_of_project'])) - print('Contributor Count: ' + str(len(perceval_obj['contributors']))) - print('Collaborator Count: ' + str(len(perceval_obj['collaborators']))) - print('Number of Milestones: ' + str(gha_obj['milestone_count'])) - new_mmt = compute_new_mmt(len(perceval_obj['contributors']), len(perceval_obj['collaborators'])) - print('New MMT: ' + str(new_mmt)) - old_mmt = compute_old_mmt(len(perceval_obj['contributors']), len(perceval_obj['collaborators'])) - print('Old MMT: ' + str(old_mmt)) + #print("Age of Project: " + str(largest_object[repo_path]["perceval_obj"]['age_of_project'])) + #print('Contributor Count: ' + str(len(largest_object[repo_path]["perceval_obj"]['contributors']))) + #print('Collaborator Count: ' + str(len(largest_object[repo_path]["perceval_obj"]['collaborators']))) + #print('Number of Milestones: ' + str(largest_object[repo_path]["gha_obj"]['milestone_count'])) + largest_object[repo_path]['new_mmt'] = compute_new_mmt(len(largest_object[repo_path]["perceval_obj"]['contributors']), len(largest_object[repo_path]["perceval_obj"]['collaborators'])) + #print('New MMT: ' + str(largest_object[repo_path]['new_mmt'])) + largest_object[repo_path]['old_mmt'] = compute_old_mmt(len(largest_object[repo_path]["perceval_obj"]['contributors']), len(largest_object[repo_path]["perceval_obj"]['collaborators'])) + #print('Old MMT: ' + str(largest_object[repo_path]['old_mmt'])) #new mmt formality score - new_formality = compute_formality_score(new_mmt, gha_obj['milestone_count'], perceval_obj['age_of_project']) - print(new_formality) + largest_object[repo_path]['new_formality'] = compute_formality_score(largest_object[repo_path]['new_mmt'], largest_object[repo_path]["gha_obj"]['milestone_count'], largest_object[repo_path]["perceval_obj"]['age_of_project']) + print(largest_object[repo_path]['new_formality']) # testing out beneath: - ghs_obj = ghs.main(vcs_path, early_cutoff) - print(ghs_obj["time_cleaned_comm"]) + largest_object[repo_path]['ghs_obj'] = ghs.main(vcs_path, early_cutoff) + #print(ghs_obj["time_cleaned_comm"]) except yaml.YAMLOError as err: print(err) + print("----------------------") + #this is Yoshi 2 MMT per van Meijel def compute_new_mmt(contrib_count, collab_count):