merge fixes

This commit is contained in:
Matthew Gaughan 2023-11-08 10:55:48 -06:00
commit 268ca229a2
2 changed files with 34 additions and 31 deletions

63
main.py
View File

@ -23,7 +23,7 @@ import gh_gsql_req as ghs
def main(): def main():
# we should discuss whether we're using the 93 day window that seems to be widely used or if we want a longer window # we should discuss whether we're using the 93 day window that seems to be widely used or if we want a longer window
early_cutoff = dt.datetime(2013,11, 6) early_cutoff = dt.datetime(2013,11, 8)
print("Earliest date examined: " + str(early_cutoff)) print("Earliest date examined: " + str(early_cutoff))
#largest_object = {} #largest_object = {}
#manifest = '../kaylea_dissertation/lifecycle/package_metadata/jupyter-notebook_manifest.yaml' #manifest = '../kaylea_dissertation/lifecycle/package_metadata/jupyter-notebook_manifest.yaml'
@ -57,35 +57,38 @@ def get_everything(manifest_path, early_cutoff):
#below lines will probably need to be refactored as tasks expand #below lines will probably need to be refactored as tasks expand
vcs_path = config['Upstream_VCS'] vcs_path = config['Upstream_VCS']
print("------------------") print("------------------")
#print(vcs_path) #print(vcs_path) return
repo_path = vcs_path[0] try:
largest_object[repo_path] = {} repo_path = vcs_path[0]
largest_object[repo_path]["perceval_obj"] = pt.main(vcs_path, early_cutoff) largest_object[repo_path] = {}
if len(largest_object[repo_path]["perceval_obj"]) == 0: largest_object[repo_path]["perceval_obj"] = pt.main(vcs_path, early_cutoff)
print("PERCEVAL ERROR") if len(largest_object[repo_path]["perceval_obj"]) == 0:
del largest_object[repo_path] print("PERCEVAL ERROR")
return del largest_object[repo_path]
largest_object[repo_path]["gha_obj"] = gha.main(vcs_path, early_cutoff) return
''' largest_object[repo_path]["gha_obj"] = gha.main(vcs_path, early_cutoff)
if largest_object[repo_path]["gha_obj"]['milestone_count'] == 0: '''
#del largest_object[repo_path] if largest_object[repo_path]["gha_obj"]['milestone_count'] == 0:
#return #del largest_object[repo_path]
#this is to ensure that projects which don't use milestones are counted #return
largest_object[repo_path]["gha_obj"]['milestone_count'] = 0.1 #this is to ensure that projects which don't use milestones are counted
largest_object[repo_path]['new_mmt'] = compute_new_mmt(largest_object[repo_path]["perceval_obj"]['contributors'], largest_object[repo_path]["perceval_obj"]['collaborators']) largest_object[repo_path]["gha_obj"]['milestone_count'] = 0.1
#print('New MMT: ' + str(largest_object[repo_path]['new_mmt'])) largest_object[repo_path]['new_mmt'] = compute_new_mmt(largest_object[repo_path]["perceval_obj"]['contributors'], largest_object[repo_path]["perceval_obj"]['collaborators'])
largest_object[repo_path]['old_mmt'] = compute_old_mmt(largest_object[repo_path]["perceval_obj"]['contributors'], largest_object[repo_path]["perceval_obj"]['collaborators']) #print('New MMT: ' + str(largest_object[repo_path]['new_mmt']))
#print('Old MMT: ' + str(largest_object[repo_path]['old_mmt'])) largest_object[repo_path]['old_mmt'] = compute_old_mmt(largest_object[repo_path]["perceval_obj"]['contributors'], largest_object[repo_path]["perceval_obj"]['collaborators'])
#new mmt formality score #print('Old MMT: ' + str(largest_object[repo_path]['old_mmt']))
largest_object[repo_path]['new_formality'] = compute_formality_score(largest_object[repo_path]['new_mmt'], largest_object[repo_path]["gha_obj"]['milestone_count'], largest_object[repo_path]["perceval_obj"]['age_of_project']) #new mmt formality score
print(largest_object[repo_path]['new_formality']) largest_object[repo_path]['new_formality'] = compute_formality_score(largest_object[repo_path]['new_mmt'], largest_object[repo_path]["gha_obj"]['milestone_count'], largest_object[repo_path]["perceval_obj"]['age_of_project'])
''' print(largest_object[repo_path]['new_formality'])
# testing out beneath: '''
largest_object[repo_path]['ghs_obj'] = ghs.main(vcs_path, early_cutoff) # testing out beneath:
#print(ghs_obj["time_cleaned_comm"]) largest_object[repo_path]['ghs_obj'] = ghs.main(vcs_path, early_cutoff)
repo_uri_list = repo_path.split('/') #print(ghs_obj["time_cleaned_comm"])
with open('/data/users/mgaughan/kkex_data_110823/' + repo_uri_list[-2] + '_' + repo_uri_list[-1] + '_result.json', 'w') as data_path: repo_uri_list = repo_path.split('/')
json.dump(largest_object[repo_path], data_path) with open('/data/users/mgaughan/kkex_data_110823_1/' + repo_uri_list[-2] + '_' + repo_uri_list[-1] + '_result.json', 'w') as data_path:
json.dump(largest_object[repo_path], data_path)
except:
print("miscellanea error in the main body")
except yaml.YAMLError as err: except yaml.YAMLError as err:
print(err) print(err)
print("----------------------") print("----------------------")

View File

@ -23,8 +23,8 @@ def main(vcs_path, begin_date):
# this is the primary function for getting the list of commits from perceval # this is the primary function for getting the list of commits from perceval
def get_perceval_log(vcs_path, begin_date): def get_perceval_log(vcs_path, begin_date):
print(vcs_path) print(vcs_path)
repo_dir = '/data/users/mgaughan/tmp/' + str(vcs_path[0].split('/')[-1])
try: try:
repo_dir = '/data/users/mgaughan/tmp/' + str(vcs_path[0].split('/')[-1])
#gitpath=repo_dir #gitpath=repo_dir
repo = Git(uri=vcs_path[0], gitpath=repo_dir) repo = Git(uri=vcs_path[0], gitpath=repo_dir)
# this is a temporary date_from, will need to be more inclusive in the future # this is a temporary date_from, will need to be more inclusive in the future