updates to getting first version file
This commit is contained in:
parent
4dae764bf8
commit
da8e1c0e45
154
12825_revision/spec_file/updated_get_spec_file.py
Normal file
154
12825_revision/spec_file/updated_get_spec_file.py
Normal file
@ -0,0 +1,154 @@
|
|||||||
|
import git
|
||||||
|
from tqdm import tqdm
|
||||||
|
import csv
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import pandas as pd
|
||||||
|
import time
|
||||||
|
|
||||||
|
#destination DIR:
|
||||||
|
dest_dir="/data/users/mgaughan/kkex/012825_cam_revision_main/first_version_documents/013025_readme/"
|
||||||
|
#temp DIR:
|
||||||
|
temp_dir="/data/users/mgaughan/tmp3/"
|
||||||
|
|
||||||
|
def temp_clone(vcs_link, temp_location):
|
||||||
|
"""
|
||||||
|
ARGS
|
||||||
|
vcs_link : url link to upstream repo vcs
|
||||||
|
temp_location : filepath to where the repo should be cloned to
|
||||||
|
|
||||||
|
RETURNS
|
||||||
|
repo : the GitRepository object of the cloned repo
|
||||||
|
repo_path : the filepath to the cloned repository
|
||||||
|
"""
|
||||||
|
#print(temp_location)
|
||||||
|
vcs_link = vcs_link.strip()
|
||||||
|
os.makedirs(temp_location)
|
||||||
|
repo_path = temp_location
|
||||||
|
repo = git.Repo.clone_from(vcs_link, repo_path)
|
||||||
|
print(f"Successfully Cloned {vcs_link}")
|
||||||
|
return repo, repo_path
|
||||||
|
|
||||||
|
|
||||||
|
def delete_clone(temp_location):
|
||||||
|
"""
|
||||||
|
ARGS
|
||||||
|
temp_location : filepath to the cloned repository
|
||||||
|
|
||||||
|
RETURNS
|
||||||
|
whether or not the deletion was a success
|
||||||
|
"""
|
||||||
|
if os.path.exists(temp_location):
|
||||||
|
shutil.rmtree(temp_location)
|
||||||
|
print(f"{temp_location} has been deleted.")
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
print("No clone at location")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# getting the specific readme or contributing file from a given commit
|
||||||
|
# inputs: upstream vcs link, commit hash, yes/no is it a readme
|
||||||
|
def get_file(vcs_link, commit_hash, is_readme):
|
||||||
|
cwd = os.getcwd()
|
||||||
|
os.environ['GIT_SSH_COMMAND'] = 'ssh -o StrictHostKeyChecking=no'
|
||||||
|
os.environ['GIT_ASKPASS'] = 'false'
|
||||||
|
os.environ['GIT_TERMINAL_PROMPT'] = '0'
|
||||||
|
try:
|
||||||
|
ssh_url = ""
|
||||||
|
try:
|
||||||
|
if "github" in vcs_link:
|
||||||
|
repo_id = vcs_link[len('https://github.com/'):]
|
||||||
|
ssh_url = f'git@github.com:{repo_id}.git'
|
||||||
|
if ssh_url.endswith('.git.git'):
|
||||||
|
ssh_url = ssh_url[:-4]
|
||||||
|
temp_repo, temp_repo_path = temp_clone(ssh_url, temp_dir)
|
||||||
|
else:
|
||||||
|
parts = vcs_link.split('/')
|
||||||
|
domain = parts[2]
|
||||||
|
repo_id = '/'.join(parts[3:])
|
||||||
|
try:
|
||||||
|
temp_repo, temp_repo_path = temp_clone(vcs_link, temp_dir)
|
||||||
|
except Exception as e:
|
||||||
|
print(f'non-Github cloning error, assuming HTTPS issue: {e}')
|
||||||
|
delete_clone(temp_dir)
|
||||||
|
ssh_url = f'git@{domain}:{repo_id}.git'
|
||||||
|
if ssh_url.endswith('.git.git'):
|
||||||
|
ssh_url = ssh_url[:-4]
|
||||||
|
temp_repo, temp_repo_path = temp_clone(ssh_url, temp_dir)
|
||||||
|
except Exception as e:
|
||||||
|
print(f'cloning error at {vcs_link}')
|
||||||
|
print(f'inside cloning error: {e}')
|
||||||
|
raise ValueError(e)
|
||||||
|
|
||||||
|
os.chdir(temp_repo_path)
|
||||||
|
reset_command = f"git reset --hard {commit_hash}"
|
||||||
|
os.system(reset_command)
|
||||||
|
|
||||||
|
doc_name = "CONTRIBUTING"
|
||||||
|
if is_readme:
|
||||||
|
doc_name = "README"
|
||||||
|
|
||||||
|
und_repo_id = '_'.join(repo_id.split("/"))
|
||||||
|
|
||||||
|
for root, dirs, files in os.walk(temp_repo_path):
|
||||||
|
for file in files:
|
||||||
|
if file.startswith(doc_name):
|
||||||
|
doc_found = True
|
||||||
|
doc_filename = file
|
||||||
|
doc_path = os.path.join(root, file)
|
||||||
|
break
|
||||||
|
if doc_found:
|
||||||
|
break
|
||||||
|
dest_path = os.path.join(dest_dir, f"{und_repo_id}_hullabaloo_{doc_filename}")
|
||||||
|
shutil.copy(doc_path, dest_path)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"outside cloning error: {e}")
|
||||||
|
return False, ""
|
||||||
|
finally:
|
||||||
|
und_repo_id = ""
|
||||||
|
delete_clone(temp_dir)
|
||||||
|
os.chdir(cwd)
|
||||||
|
|
||||||
|
return True, dest_path
|
||||||
|
|
||||||
|
def for_all_files(csv_path, is_readme):
|
||||||
|
doc = "CONTRIBUTING"
|
||||||
|
if is_readme:
|
||||||
|
doc = "README"
|
||||||
|
try:
|
||||||
|
with open(csv_path, 'r') as file:
|
||||||
|
csv_reader = csv.DictReader(file)
|
||||||
|
lines = [line for line in file]
|
||||||
|
new_manifest_list = []
|
||||||
|
index = -1
|
||||||
|
for row in tqdm(csv.reader(lines), total=len(lines)):
|
||||||
|
index += 1
|
||||||
|
if index < 1:
|
||||||
|
continue
|
||||||
|
time.sleep(4)
|
||||||
|
manifest_df = pd.DataFrame({
|
||||||
|
'commit_hash': [row[0]],
|
||||||
|
'upstream_vcs_link': [row[14]],
|
||||||
|
'repo_id': [row[12]],
|
||||||
|
'project_handle': [row[13]]
|
||||||
|
})
|
||||||
|
|
||||||
|
_check, new_filepath = get_file(manifest_df['upstream_vcs_link'][0], manifest_df['commit_hash'][0], is_readme)
|
||||||
|
manifest_df['new_filepath'] = new_filepath
|
||||||
|
|
||||||
|
if _check == False:
|
||||||
|
break
|
||||||
|
|
||||||
|
new_manifest_list.append(manifest_df)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("KeyBoardInterrrupt")
|
||||||
|
finally:
|
||||||
|
manifest_df = pd.concat(new_manifest_list, ignore_index=True)
|
||||||
|
manifest_df.to_csv(f"013025_{doc}_manifest.csv", index=False)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
for_all_files("../misc_data_files/README_for_download.csv", True)
|
||||||
|
#get_file("https://github.com/breakfastquay/rubberband", "a94f3f33577bf9d71166392febbfdf3cace6f1c8", True)
|
||||||
|
#get_file("https://gitlab.freedesktop.org/gstreamer/gstreamer", "1762dfbf982a75d895676b0063379e33b4f9b96a", True)
|
||||||
|
#get_file("https://github.com/ranger/ranger.git", "ea355f491fb10d5ce054c7813d9abdfd3fc68991" ,False)
|
||||||
|
#get_file("https://gitlab.gnome.org/Archive/glade", "7e5dfa8ccd211945e624b0fab7fe2b19fb1b9907" ,False)
|
Loading…
Reference in New Issue
Block a user