import datetime as dt from perceval.backends.core.git import Git import argparse from wrapt_timeout_decorator import * import json #globals #repo_dir = '/tmp/' #main function for all subsequent tasks using perceval def main(vcs_path, begin_date): perceval_info = {} perceval_info['list_of_commits'] = get_perceval_log(vcs_path, begin_date) if len(perceval_info['list_of_commits']) > 0: perceval_info['age_of_project'] = get_repo_age(perceval_info['list_of_commits']) perceval_info['contributors'], perceval_info['collaborators'], perceval_info['contributors_list'], perceval_info['collaborators_list'] = get_all_actors(perceval_info['list_of_commits']) with open("/data/users/mgaughan/kkex_commit_data_121323/" + "commits_" + vcs_path.split('/')[-1] + '.json', 'w') as commits_path: json.dump(perceval_info, commits_path) del perceval_info['list_of_commits'] print(perceval_info) return perceval_info else: print('error, no commits found?') return {} # this is the primary function for getting the list of commits from perceval @timeout(600, use_signals=False) def get_perceval_log(vcs_path, begin_date): vcs_path = vcs_path.strip() print(vcs_path) try: repo_dir = '/data/users/mgaughan/tmp/' + str(vcs_path.split('/')[-1]) #gitpath=repo_dir repo = Git(uri=vcs_path, gitpath=repo_dir) # this is a temporary date_from, will need to be more inclusive in the future fetched_commits = repo.fetch(from_date=begin_date) return list(fetched_commits) except: print("error, cannot fetch repo data?") return {} #this function is just to evaluate the repository age, as defined by Tamburri and used by van Meijel def get_repo_age(all_commits): first_commit = all_commits[0] last_commit = all_commits[-1] first_date = dt.datetime.strptime(first_commit['data']["CommitDate"], '%c %z') last_date = dt.datetime.strptime(last_commit['data']["CommitDate"], '%c %z') #project life, as defined in YOSHI, unit is days project_life = last_date - first_date return project_life.total_seconds() / 86400 #attempt at getting the rosters, though need to make sure that we can get the MR def get_all_actors(all_commits): #collaborators are more senior than contributors, doing it by author/commit authors = [] committers = [] for commit in all_commits: author = commit['data']['Author'] committer = commit['data']['Commit'] if committer not in committers: committers.append(committer) if author not in authors: authors.append(author) # now cleaning lists for committer in committers: if committer in authors: authors.remove(committer) return len(authors), len(committers), authors, committers if __name__ == "__main__": main(" https://github.com/pali/0xFFFF".strip(),dt.datetime(2008,2, 8))