2023-10-23 20:40:24 +00:00
|
|
|
import datetime as dt
|
|
|
|
from perceval.backends.core.git import Git
|
2023-10-24 01:11:51 +00:00
|
|
|
import argparse
|
2023-10-19 16:46:00 +00:00
|
|
|
|
2023-10-23 20:40:24 +00:00
|
|
|
#globals
|
2023-11-06 22:20:35 +00:00
|
|
|
#repo_dir = '/tmp/'
|
2023-10-19 16:46:00 +00:00
|
|
|
|
2023-10-23 20:40:24 +00:00
|
|
|
#main function for all subsequent tasks using perceval
|
|
|
|
def main(vcs_path, begin_date):
|
|
|
|
perceval_info = {}
|
|
|
|
perceval_info['list_of_commits'] = get_perceval_log(vcs_path, begin_date)
|
2023-11-06 22:20:35 +00:00
|
|
|
if len(perceval_info['list_of_commits']) > 0:
|
|
|
|
perceval_info['age_of_project'] = get_repo_age(perceval_info['list_of_commits'])
|
|
|
|
perceval_info['contributors'], perceval_info['collaborators'] = get_all_actors(perceval_info['list_of_commits'])
|
|
|
|
del perceval_info['list_of_commits']
|
|
|
|
return perceval_info
|
|
|
|
else:
|
|
|
|
print('error, no commits found?')
|
|
|
|
return {}
|
|
|
|
|
2023-10-23 20:40:24 +00:00
|
|
|
|
|
|
|
|
|
|
|
# this is the primary function for getting the list of commits from perceval
|
|
|
|
def get_perceval_log(vcs_path, begin_date):
|
2023-10-24 01:11:51 +00:00
|
|
|
print(vcs_path)
|
2023-11-06 22:20:35 +00:00
|
|
|
try:
|
2023-11-08 16:55:48 +00:00
|
|
|
repo_dir = '/data/users/mgaughan/tmp/' + str(vcs_path[0].split('/')[-1])
|
2023-11-06 22:20:35 +00:00
|
|
|
#gitpath=repo_dir
|
|
|
|
repo = Git(uri=vcs_path[0], gitpath=repo_dir)
|
|
|
|
# this is a temporary date_from, will need to be more inclusive in the future
|
|
|
|
fetched_commits = repo.fetch(from_date=begin_date)
|
|
|
|
return list(fetched_commits)
|
|
|
|
except:
|
|
|
|
print("error, cannot fetch repo data?")
|
|
|
|
return {}
|
2023-10-23 20:40:24 +00:00
|
|
|
|
|
|
|
#this function is just to evaluate the repository age, as defined by Tamburri and used by van Meijel
|
|
|
|
def get_repo_age(all_commits):
|
|
|
|
first_commit = all_commits[0]
|
|
|
|
last_commit = all_commits[-1]
|
|
|
|
first_date = dt.datetime.strptime(first_commit['data']["CommitDate"], '%c %z')
|
|
|
|
last_date = dt.datetime.strptime(last_commit['data']["CommitDate"], '%c %z')
|
|
|
|
#project life, as defined in YOSHI, unit is days
|
|
|
|
project_life = last_date - first_date
|
2023-10-24 01:11:51 +00:00
|
|
|
return project_life.total_seconds() / 86400
|
2023-10-19 16:46:00 +00:00
|
|
|
|
2023-11-06 22:20:35 +00:00
|
|
|
|
2023-10-24 01:11:51 +00:00
|
|
|
#attempt at getting the rosters, though need to make sure that we can get the MR
|
|
|
|
def get_all_actors(all_commits):
|
|
|
|
#collaborators are more senior than contributors, doing it by author/commit
|
|
|
|
authors = []
|
|
|
|
committers = []
|
|
|
|
for commit in all_commits:
|
|
|
|
author = commit['data']['Author']
|
|
|
|
committer = commit['data']['Commit']
|
|
|
|
if committer not in committers:
|
|
|
|
committers.append(committer)
|
|
|
|
if author not in authors:
|
|
|
|
authors.append(author)
|
|
|
|
# now cleaning lists
|
|
|
|
for committer in committers:
|
|
|
|
if committer in authors:
|
|
|
|
authors.remove(committer)
|
2023-11-06 22:20:35 +00:00
|
|
|
return len(authors), len(committers)
|
2023-10-19 16:46:00 +00:00
|
|
|
|