updates to time_specific_file grab

This commit is contained in:
Matthew Gaughan 2024-03-28 16:38:13 -05:00
parent 78490ff65b
commit 72d26cd834

View File

@ -65,13 +65,17 @@ def get_file(vcs_link, commit_hash, is_readme):
target_filename = filename
'''
if target_filename == "":
shutil.rmtree(full_temp_path, ignore_errors=True)
shutil.rmtree(other_temp_path, ignore_errors=True)
return "NoFile"
target_filename = "README.md"
#target_filename = r"/README/"
#issue with searching through the tree this way
# need to match the tree w blobs
#print(commit0.tree.blobs)
#file_id = commit0.tree[r'README.*'].hexsha
#targetfile = repo0.blob(file_id)
#targetfile = commit0.tree / target_filename
'''
try:
targetfile = commit0.tree / target_filename
except KeyError:
@ -79,7 +83,23 @@ def get_file(vcs_link, commit_hash, is_readme):
try:
targetfile = commit0.tree / target_filename
except KeyError:
target_filename = "README"
try:
targetfile = commit0.tree / target_filename
except KeyError:
target_filename = "README.txt"
try:
targetfile = commit0.tree / target_filename
except KeyError:
shutil.rmtree(full_temp_path, ignore_errors=True)
shutil.rmtree(other_temp_path, ignore_errors=True)
return "KeyError -- the file is not in the commit tree"
'''
print(target_filename)
for blob in commit0.tree.blobs:
if "README" in blob.path:
targetfile = blob
print(blob.path)
# why would a file not be in the commit tree? but would be in the directory?
#shutil.rmtree(full_temp_path, ignore_errors=True)
# return "KeyError -- the file is not in the commit tree"
@ -101,7 +121,7 @@ def for_all_files():
csv_path = "kk_031624_pr_did.csv"
index = -1
with open(csv_path, 'r') as file:
with open('031824_spec_errors.csv', "w") as writing_file:
with open('a_031824_spec_errors.csv', "w") as writing_file:
csv_writer = csv.writer(writing_file)
#csv_reader = csv.DictReader(file)
lines = [line for line in file]
@ -119,5 +139,5 @@ def for_all_files():
if __name__ == "__main__":
for_all_files()
#get_file('https://github.com/tqdm/tqdm', 'fbe7952cce11e8073378b063bdae7ab277a96eb8', True)
#print(get_file('https://github.com/the-tcpdump-group/tcpslice', 'ffac277bf41946a1d985afae7fe2535d7a28546f', True))
#get_file('https://github.com/krahets/hello-algo/tree/dev1', 'f615ad42ef3c58cfc6f080b8fb0cd0eb741706a9', True )