diff --git a/get_spec_file.py b/get_spec_file.py index 8c0f83d..4077f4d 100644 --- a/get_spec_file.py +++ b/get_spec_file.py @@ -10,6 +10,7 @@ import dateutil from tqdm import tqdm import math import io +import re working_dir = "/data/users/mgaughan/kkex/time_specific_files/readme" temp_dir = "/data/users/mgaughan/tmp3/" @@ -26,27 +27,29 @@ def get_file(vcs_link, commit_hash, is_readme): else: full_temp_path = temp_dir + vcs_link.split('/')[- 1] + ".git" other_temp_path = temp_dir + vcs_link.split('/')[- 1] + ".git0" - repo = Repo.clone_from(vcs_link, full_temp_path) - repo0 = Git(uri=vcs_link, gitpath=other_temp_path) - commit0 = repo.commit(commit_hash) - commits = repo0.fetch() + repo0 = Repo.clone_from(vcs_link, full_temp_path) + repo = Git(uri=vcs_link, gitpath=other_temp_path) + commit0 = repo0.commit(commit_hash) + commits = repo.fetch() target_filename = "" for commit in commits: files = commit['data']['files'] for file in files: if is_readme: if "README" in file['file']: - print(file['file']) + #print(file['file']) if "/" in file['file']: target_filename = file['file'].split("/")[-1] else: target_filename = file['file'] else: if "CONTRIBUTING" in file['file']: + ''' if "/" in file['file']: target_filename = file['file'].split("/")[-1] else: - target_filename = file['file'] + ''' + target_filename = str(file['file']) #print(commit.tree) #getting the name of the file from the root directory ''' @@ -63,11 +66,20 @@ def get_file(vcs_link, commit_hash, is_readme): ''' if target_filename == "": return "NoFile" + target_filename = "README.md" #issue with searching through the tree this way - targetfile = commit0.tree / target_filename - #try: - # targetfile = commit.tree / target_filename - #except KeyError: + # need to match the tree w blobs + #print(commit0.tree.blobs) + #file_id = commit0.tree[r'README.*'].hexsha + #targetfile = repo0.blob(file_id) + try: + targetfile = commit0.tree / target_filename + except KeyError: + target_filename = "README.rst" + try: + targetfile = commit0.tree / target_filename + except KeyError: + return "KeyError -- the file is not in the commit tree" # why would a file not be in the commit tree? but would be in the directory? #shutil.rmtree(full_temp_path, ignore_errors=True) # return "KeyError -- the file is not in the commit tree"