diff --git a/main.py b/main.py index a13fe0d..82a9ced 100644 --- a/main.py +++ b/main.py @@ -23,7 +23,7 @@ import gh_gsql_req as ghs def main(): # we should discuss whether we're using the 93 day window that seems to be widely used or if we want a longer window - early_cutoff = dt.datetime(2013,11, 6) + early_cutoff = dt.datetime(2013,11, 8) print("Earliest date examined: " + str(early_cutoff)) #largest_object = {} #manifest = '../kaylea_dissertation/lifecycle/package_metadata/jupyter-notebook_manifest.yaml' @@ -57,35 +57,38 @@ def get_everything(manifest_path, early_cutoff): #below lines will probably need to be refactored as tasks expand vcs_path = config['Upstream_VCS'] print("------------------") - #print(vcs_path) - repo_path = vcs_path[0] - largest_object[repo_path] = {} - largest_object[repo_path]["perceval_obj"] = pt.main(vcs_path, early_cutoff) - if len(largest_object[repo_path]["perceval_obj"]) == 0: - print("PERCEVAL ERROR") - del largest_object[repo_path] - return - largest_object[repo_path]["gha_obj"] = gha.main(vcs_path, early_cutoff) - ''' - if largest_object[repo_path]["gha_obj"]['milestone_count'] == 0: - #del largest_object[repo_path] - #return - #this is to ensure that projects which don't use milestones are counted - largest_object[repo_path]["gha_obj"]['milestone_count'] = 0.1 - largest_object[repo_path]['new_mmt'] = compute_new_mmt(largest_object[repo_path]["perceval_obj"]['contributors'], largest_object[repo_path]["perceval_obj"]['collaborators']) - #print('New MMT: ' + str(largest_object[repo_path]['new_mmt'])) - largest_object[repo_path]['old_mmt'] = compute_old_mmt(largest_object[repo_path]["perceval_obj"]['contributors'], largest_object[repo_path]["perceval_obj"]['collaborators']) - #print('Old MMT: ' + str(largest_object[repo_path]['old_mmt'])) - #new mmt formality score - largest_object[repo_path]['new_formality'] = compute_formality_score(largest_object[repo_path]['new_mmt'], largest_object[repo_path]["gha_obj"]['milestone_count'], largest_object[repo_path]["perceval_obj"]['age_of_project']) - print(largest_object[repo_path]['new_formality']) - ''' - # testing out beneath: - largest_object[repo_path]['ghs_obj'] = ghs.main(vcs_path, early_cutoff) - #print(ghs_obj["time_cleaned_comm"]) - repo_uri_list = repo_path.split('/') - with open('/data/users/mgaughan/kkex_data_110823/' + repo_uri_list[-2] + '_' + repo_uri_list[-1] + '_result.json', 'w') as data_path: - json.dump(largest_object[repo_path], data_path) + #print(vcs_path) return + try: + repo_path = vcs_path[0] + largest_object[repo_path] = {} + largest_object[repo_path]["perceval_obj"] = pt.main(vcs_path, early_cutoff) + if len(largest_object[repo_path]["perceval_obj"]) == 0: + print("PERCEVAL ERROR") + del largest_object[repo_path] + return + largest_object[repo_path]["gha_obj"] = gha.main(vcs_path, early_cutoff) + ''' + if largest_object[repo_path]["gha_obj"]['milestone_count'] == 0: + #del largest_object[repo_path] + #return + #this is to ensure that projects which don't use milestones are counted + largest_object[repo_path]["gha_obj"]['milestone_count'] = 0.1 + largest_object[repo_path]['new_mmt'] = compute_new_mmt(largest_object[repo_path]["perceval_obj"]['contributors'], largest_object[repo_path]["perceval_obj"]['collaborators']) + #print('New MMT: ' + str(largest_object[repo_path]['new_mmt'])) + largest_object[repo_path]['old_mmt'] = compute_old_mmt(largest_object[repo_path]["perceval_obj"]['contributors'], largest_object[repo_path]["perceval_obj"]['collaborators']) + #print('Old MMT: ' + str(largest_object[repo_path]['old_mmt'])) + #new mmt formality score + largest_object[repo_path]['new_formality'] = compute_formality_score(largest_object[repo_path]['new_mmt'], largest_object[repo_path]["gha_obj"]['milestone_count'], largest_object[repo_path]["perceval_obj"]['age_of_project']) + print(largest_object[repo_path]['new_formality']) + ''' + # testing out beneath: + largest_object[repo_path]['ghs_obj'] = ghs.main(vcs_path, early_cutoff) + #print(ghs_obj["time_cleaned_comm"]) + repo_uri_list = repo_path.split('/') + with open('/data/users/mgaughan/kkex_data_110823_1/' + repo_uri_list[-2] + '_' + repo_uri_list[-1] + '_result.json', 'w') as data_path: + json.dump(largest_object[repo_path], data_path) + except: + print("miscellanea error in the main body") except yaml.YAMLError as err: print(err) print("----------------------") diff --git a/perceval_tasks.py b/perceval_tasks.py index 519c079..d398a36 100644 --- a/perceval_tasks.py +++ b/perceval_tasks.py @@ -23,8 +23,8 @@ def main(vcs_path, begin_date): # this is the primary function for getting the list of commits from perceval def get_perceval_log(vcs_path, begin_date): print(vcs_path) - repo_dir = '/data/users/mgaughan/tmp/' + str(vcs_path[0].split('/')[-1]) try: + repo_dir = '/data/users/mgaughan/tmp/' + str(vcs_path[0].split('/')[-1]) #gitpath=repo_dir repo = Git(uri=vcs_path[0], gitpath=repo_dir) # this is a temporary date_from, will need to be more inclusive in the future