updating tree matching

This commit is contained in:
Matthew Gaughan 2024-03-28 12:05:50 -05:00
parent cb5f29012d
commit a3d3cb9361

View File

@ -10,6 +10,7 @@ import dateutil
from tqdm import tqdm from tqdm import tqdm
import math import math
import io import io
import re
working_dir = "/data/users/mgaughan/kkex/time_specific_files/readme" working_dir = "/data/users/mgaughan/kkex/time_specific_files/readme"
temp_dir = "/data/users/mgaughan/tmp3/" temp_dir = "/data/users/mgaughan/tmp3/"
@ -26,27 +27,29 @@ def get_file(vcs_link, commit_hash, is_readme):
else: else:
full_temp_path = temp_dir + vcs_link.split('/')[- 1] + ".git" full_temp_path = temp_dir + vcs_link.split('/')[- 1] + ".git"
other_temp_path = temp_dir + vcs_link.split('/')[- 1] + ".git0" other_temp_path = temp_dir + vcs_link.split('/')[- 1] + ".git0"
repo = Repo.clone_from(vcs_link, full_temp_path) repo0 = Repo.clone_from(vcs_link, full_temp_path)
repo0 = Git(uri=vcs_link, gitpath=other_temp_path) repo = Git(uri=vcs_link, gitpath=other_temp_path)
commit0 = repo.commit(commit_hash) commit0 = repo0.commit(commit_hash)
commits = repo0.fetch() commits = repo.fetch()
target_filename = "" target_filename = ""
for commit in commits: for commit in commits:
files = commit['data']['files'] files = commit['data']['files']
for file in files: for file in files:
if is_readme: if is_readme:
if "README" in file['file']: if "README" in file['file']:
print(file['file']) #print(file['file'])
if "/" in file['file']: if "/" in file['file']:
target_filename = file['file'].split("/")[-1] target_filename = file['file'].split("/")[-1]
else: else:
target_filename = file['file'] target_filename = file['file']
else: else:
if "CONTRIBUTING" in file['file']: if "CONTRIBUTING" in file['file']:
'''
if "/" in file['file']: if "/" in file['file']:
target_filename = file['file'].split("/")[-1] target_filename = file['file'].split("/")[-1]
else: else:
target_filename = file['file'] '''
target_filename = str(file['file'])
#print(commit.tree) #print(commit.tree)
#getting the name of the file from the root directory #getting the name of the file from the root directory
''' '''
@ -63,11 +66,20 @@ def get_file(vcs_link, commit_hash, is_readme):
''' '''
if target_filename == "": if target_filename == "":
return "NoFile" return "NoFile"
target_filename = "README.md"
#issue with searching through the tree this way #issue with searching through the tree this way
targetfile = commit0.tree / target_filename # need to match the tree w blobs
#try: #print(commit0.tree.blobs)
# targetfile = commit.tree / target_filename #file_id = commit0.tree[r'README.*'].hexsha
#except KeyError: #targetfile = repo0.blob(file_id)
try:
targetfile = commit0.tree / target_filename
except KeyError:
target_filename = "README.rst"
try:
targetfile = commit0.tree / target_filename
except KeyError:
return "KeyError -- the file is not in the commit tree"
# why would a file not be in the commit tree? but would be in the directory? # why would a file not be in the commit tree? but would be in the directory?
#shutil.rmtree(full_temp_path, ignore_errors=True) #shutil.rmtree(full_temp_path, ignore_errors=True)
# return "KeyError -- the file is not in the commit tree" # return "KeyError -- the file is not in the commit tree"