open questions around time specific data

This commit is contained in:
Matthew Gaughan 2024-03-27 11:40:25 -05:00
parent 86584096e2
commit 231189b64c

View File

@ -1,5 +1,6 @@
import csv import csv
from git import Repo from git import Repo
from perceval.backends.core.git import Git
import os import os
import datetime as dt import datetime as dt
import time import time
@ -21,28 +22,55 @@ def get_file(vcs_link, commit_hash, is_readme):
#this only looks at main #this only looks at main
vcs_link = "/".join(vcs_link.split("/")[0:5]) vcs_link = "/".join(vcs_link.split("/")[0:5])
full_temp_path = temp_dir + vcs_link.split('/')[4] + ".git" full_temp_path = temp_dir + vcs_link.split('/')[4] + ".git"
other_temp_path = temp_dir + vcs_link.split('/')[4] + ".git0"
else: else:
full_temp_path = temp_dir + vcs_link.split('/')[- 1] + ".git" full_temp_path = temp_dir + vcs_link.split('/')[- 1] + ".git"
other_temp_path = temp_dir + vcs_link.split('/')[- 1] + ".git0"
repo = Repo.clone_from(vcs_link, full_temp_path) repo = Repo.clone_from(vcs_link, full_temp_path)
commit = repo.commit(commit_hash) repo0 = Git(uri=vcs_link, gitpath=other_temp_path)
commit0 = repo.commit(commit_hash)
commits = repo0.fetch()
target_filename = ""
for commit in commits:
files = commit['data']['files']
for file in files:
if is_readme:
if "README" in file['file']:
print(file['file'])
if "/" in file['file']:
target_filename = file['file'].split("/")[-1]
else:
target_filename = file['file']
else:
if "CONTRIBUTING" in file['file']:
if "/" in file['file']:
target_filename = file['file'].split("/")[-1]
else:
target_filename = file['file']
#print(commit.tree)
#getting the name of the file from the root directory #getting the name of the file from the root directory
'''
target_filename = "" target_filename = ""
for filename in os.listdir(full_temp_path): for filename in os.listdir(full_temp_path):
if is_readme: if is_readme:
#target_filename = "README.md" #target_filename = "README.md"
if "README" in filename: if "README" in filename or "readme" in filename:
target_filename = filename target_filename = filename
else: else:
#target_filename = "CONTRIBUTING.md" #target_filename = "CONTRIBUTING.md"
if "CONTRIBUTING" in filename: if "CONTRIBUTING" in filename or "contributing" in filename:
target_filename = filename target_filename = filename
'''
if target_filename == "": if target_filename == "":
return "NoFile" return "NoFile"
try: #issue with searching through the tree this way
targetfile = commit.tree / target_filename targetfile = commit0.tree / target_filename
except KeyError: #try:
# targetfile = commit.tree / target_filename
#except KeyError:
# why would a file not be in the commit tree? but would be in the directory? # why would a file not be in the commit tree? but would be in the directory?
return "KeyError" #shutil.rmtree(full_temp_path, ignore_errors=True)
# return "KeyError -- the file is not in the commit tree"
if is_readme: if is_readme:
last_path = "readme" last_path = "readme"
else: else:
@ -52,6 +80,7 @@ def get_file(vcs_link, commit_hash, is_readme):
file.write(f.read().decode('utf-8')) file.write(f.read().decode('utf-8'))
file.close() file.close()
shutil.rmtree(full_temp_path, ignore_errors=True) shutil.rmtree(full_temp_path, ignore_errors=True)
shutil.rmtree(other_temp_path, ignore_errors=True)
return "NoError" return "NoError"
def for_all_files(): def for_all_files():