24_deb_pkg_gov/new_contrib_window.py

116 lines
4.8 KiB
Python

import csv
from perceval.backends.core.git import Git
import os
import datetime as dt
from datetime import timezone
import time
import shutil
import dateutil
from tqdm import tqdm
import math
key = os.environ.get('KKEXKEY')
temp_dir = "/data/users/mgaughan/tmp/"
temp_dir1 = "tmp/"
'''
- rate of change, rate of all/day
'''
def file_get_contribs(upstream_vcs_link, event_date):
# if we're looking at readmes me_read is true and if not, if we're looking at contributing files, it's false
#this is the window of days on either side of the event that we're looking at
window = 56
#print(upstream_vcs_link.split('/')[4])
project_dict = {}
project_dict['upstream_vcs_link'] = upstream_vcs_link
project_dict['event_date'] = event_date
event_date = dateutil.parser.parse(event_date).astimezone(timezone.utc)
upstream_vcs_link = upstream_vcs_link.strip()
if "github" in upstream_vcs_link or "gitlab" in upstream_vcs_link:
#making an evaluation that sub branches aren't being used and that people would fork if needed
#this only looks at main
upstream_vcs_link = "/".join(upstream_vcs_link.split("/")[0:5])
print(upstream_vcs_link)
full_temp_path = temp_dir + upstream_vcs_link.split('/')[4] + ".git"
else:
full_temp_path = temp_dir + upstream_vcs_link.split('/')[- 1] + ".git"
print(upstream_vcs_link)
if upstream_vcs_link == "https://gitlab.com/ubports/core" or upstream_vcs_link == "https://gitlab.freedesktop.org/xorg/lib":
shutil.rmtree(full_temp_path, ignore_errors=True)
return {}
repo = Git(uri=upstream_vcs_link, gitpath=full_temp_path)
commits = repo.fetch()
merge_pre, merge_post = 0, 0
#list of tuples which has date and whether it was a merge
commit_list = []
first_date_readme = ""
for commit in commits:
#print(commit['data'])
if "Merge" in commit['data'].keys():
commit_list.append([commit['data']['CommitDate'], True, commit['data']['Author'], commit['data']['Commit']])
if dateutil.parser.parse(commit['data']['CommitDate']) < event_date:
merge_post += 1
else:
merge_pre += 1
else:
commit_list.append([commit['data']['CommitDate'], False, commit['data']['Author'], commit['data']['Commit']])
shutil.rmtree(full_temp_path, ignore_errors=True)
project_dict['merge_pre'] = merge_pre
project_dict['merge_post'] = merge_post
collab_roster = []
contrib_roster = []
new_collab_pre = 0
new_collab_post = 0
new_contrib_pre = 0
new_contrib_post = 0
for commit in commit_list:
commit_td = dateutil.parser.parse(commit[0])
if commit_td >= (event_date + dt.timedelta(days=-window, hours=0)) and commit_td <= event_date:
if commit[3] not in collab_roster:
new_collab_pre += 1
if commit[2] not in contrib_roster:
new_contrib_pre += 1
if commit_td > event_date and commit_td <= (event_date + dt.timedelta(days=window, hours=0)):
if commit[3] not in collab_roster:
new_collab_post += 1
if commit[2] not in contrib_roster:
new_contrib_post += 1
if commit[3] not in collab_roster:
collab_roster.append(commit[3])
if commit[2] not in contrib_roster:
contrib_roster.append(commit[2])
project_dict["before_contrib_new"] = new_contrib_pre
project_dict['after_contrib_new'] = new_contrib_post
project_dict['before_collab_new'] = new_collab_pre
project_dict['after_collab_new'] = new_collab_post
print(project_dict)
return project_dict
def for_files():
csv_path = "final_data/deb_readme_did.csv"
count = 0
with open(csv_path, 'r') as file:
csv_reader = csv.DictReader(file)
with open('062424_did_first_commit.csv', "w") as writing_file:
# this would also have to get switched fro the cont dataset
keys = ['upstream_vcs_link', "event_date", 'before_contrib_new', 'after_contrib_new', 'after_collab_new', 'before_collab_new', 'merge_pre', 'merge_post']
dict_writer = csv.DictWriter(writing_file, keys)
dict_writer.writeheader()
for row in csv_reader:
count += 1
print(row['upstream_vcs_link'])
# this would have to get switched to false for the cont dataset
dict_row = file_get_contribs(row['upstream_vcs_link'].strip(), row['event_date'])
dict_writer.writerow(dict_row)
if __name__ == "__main__":
for_files()
#file_get_pr("https://github.com/tqdm/tqdm", True)
#file_get_pr("https://github.com/GameServerManagers/LinuxGSM", True)
#file_get_pr("https://github.com/walling/unorm/issues/new/", True)
#file_get_pr("https://github.com/krahets/hello-algo/tree/dev1", True)