backing up pr data work

This commit is contained in:
Matthew Gaughan 2024-03-06 20:20:44 -06:00
parent 8c02a5f307
commit fa4e6ae48c

View File

@ -1,15 +1,28 @@
from perceval.backends.core.git import Git from perceval.backends.core.git import Git
import os import os
import datetime as dt import datetime as dt
import time
import shutil import shutil
import pandas as pd
import dateutil
from tqdm import tqdm
key = os.environ.get('KKEXKEY') key = os.environ.get('KKEXKEY')
early_cutoff = dt.datetime(2008,2, 8) early_cutoff = dt.datetime(2008,2, 8)
temp_dir = "/data/users/mgaughan/tmp" temp_dir = "/data/users/mgaughan/tmp"
'''
- what we really want is the 6 weeks before and after these events
- lets start by getting 6 weeks (42 days) before and after the first appearance of a given document
- rate of change, rate of PRs/day
'''
def file_get_pr(upstream_vcs_link): def file_get_pr(upstream_vcs_link):
#this is the window of days on either side of the event that we're looking at
window = 42
#print(upstream_vcs_link.split('/')[4]) #print(upstream_vcs_link.split('/')[4])
project_dict = {}
project_dict['upstream_vcs_link'] = upstream_vcs_link
full_temp_path = temp_dir + upstream_vcs_link.split('/')[4] + ".git" full_temp_path = temp_dir + upstream_vcs_link.split('/')[4] + ".git"
repo = Git(uri=upstream_vcs_link, gitpath=full_temp_path) repo = Git(uri=upstream_vcs_link, gitpath=full_temp_path)
try: try:
@ -20,27 +33,59 @@ def file_get_pr(upstream_vcs_link):
has_readme = False has_readme = False
has_contributing = False has_contributing = False
merge_pre_rm, merge_post_rm, merge_pre_cont, merge_post_cont = 0, 0, 0, 0 merge_pre_rm, merge_post_rm, merge_pre_cont, merge_post_cont = 0, 0, 0, 0
#list of tuples which has date and whether it was a merge
commit_list = []
for commit in commits: for commit in commits:
if "Merge" in commit['data'].keys(): if "Merge" in commit['data'].keys():
commit_list.append([commit['data']['CommitDate'], True])
if has_contributing: if has_contributing:
merge_post_cont += 1 merge_post_cont += 1
else: else:
merge_pre_cont += 1 merge_pre_cont += 1
print('merge') else:
else: commit_list.append([commit['data']['CommitDate'], False])
print('not')
files = commit['data']['files'] files = commit['data']['files']
#print(commit['data']['CommitDate']) #print(commit['data']['CommitDate'])
#print(type(dateutil.parser.parse(commit['data']['CommitDate'])))
for file in files: for file in files:
if "CONTRIBUTING.md" == file['file']: if "CONTRIBUTING.md" == file['file'] and has_contributing == False:
has_contributing = True has_contributing = True
if "README.md" == file['file']: first_date_contributing = dateutil.parser.parse(commit['data']['CommitDate'])
if "README.md" == file['file'] and has_readme == False:
has_readme = True has_readme = True
first_date_readme = dateutil.parser.parse(commit['data']['CommitDate'])
shutil.rmtree(full_temp_path, ignore_errors=True) shutil.rmtree(full_temp_path, ignore_errors=True)
print("merge pre cont: " + str(merge_pre_cont)) project_dict['first_contributing'] = first_date_contributing
print('merge post cont: ' + str(merge_post_cont)) project_dict['first_readme'] = first_date_readme
before_cont = pr_count(first_date_contributing + dt.timedelta(days=-window, hours=0), first_date_contributing, commit_list)
project_dict['b6w_prs_cont'] = before_cont[0]
project_dict['b6w_mrg_cont'] = before_cont[1]
after_cont = pr_count(first_date_contributing, first_date_contributing + dt.timedelta(days=window, hours=0), commit_list)
project_dict['a6w_prs_cont'] = after_cont[0]
project_dict['a6w_mrg_cont'] = after_cont[1]
before_read = pr_count(first_date_readme+ dt.timedelta(days=-window, hours=0), first_date_readme, commit_list)
project_dict['b6w_prs_read'] = before_read[0]
project_dict['b6w_mrg_read'] = before_read[1]
after_read = pr_count(first_date_readme, first_date_readme + dt.timedelta(days=window, hours=0), commit_list)
project_dict['a6w_prs_read'] = after_read[0]
project_dict['a6w_mrg_read'] = after_read[1]
print(project_dict)
return project_dict
def pr_count(start, end, commits):
count = 0
merge_count = 0
for commit in tqdm(commits):
if dateutil.parser.parse(commit[0]) > start:
count += 1
if commit[1]:
merge_count += 1
if dateutil.parser.parse(commit[0]) > end:
return [count, merge_count]
if __name__ == "__main__": if __name__ == "__main__":
file_get_pr("https://github.com/tqdm/tqdm") file_get_pr("https://github.com/tqdm/tqdm")
file_get_pr("https://github.com/GameServerManagers/LinuxGSM")