updated scripts
This commit is contained in:
parent
a342c2a67f
commit
c57b760d5c
@ -26,16 +26,23 @@ def get_file(vcs_link, commit_hash, is_readme):
|
|||||||
repo = Repo.clone_from(vcs_link, full_temp_path)
|
repo = Repo.clone_from(vcs_link, full_temp_path)
|
||||||
commit = repo.commit(commit_hash)
|
commit = repo.commit(commit_hash)
|
||||||
#getting the name of the file from the root directory
|
#getting the name of the file from the root directory
|
||||||
|
target_filename = ""
|
||||||
for filename in os.listdir(full_temp_path):
|
for filename in os.listdir(full_temp_path):
|
||||||
if is_readme:
|
if is_readme:
|
||||||
target_filename = "README.md"
|
#target_filename = "README.md"
|
||||||
if "README" in filename:
|
if "README" in filename:
|
||||||
target_filename = filename
|
target_filename = filename
|
||||||
else:
|
else:
|
||||||
target_filename = "CONTRIBUTING.md"
|
#target_filename = "CONTRIBUTING.md"
|
||||||
if "CONTRIBUTING" in filename:
|
if "CONTRIBUTING" in filename:
|
||||||
target_filename = filename
|
target_filename = filename
|
||||||
targetfile = commit.tree / target_filename
|
if target_filename == "":
|
||||||
|
return "NoFile"
|
||||||
|
try:
|
||||||
|
targetfile = commit.tree / target_filename
|
||||||
|
except KeyError:
|
||||||
|
# why would a file not be in the commit tree? but would be in the directory?
|
||||||
|
return "KeyError"
|
||||||
if is_readme:
|
if is_readme:
|
||||||
last_path = "readme"
|
last_path = "readme"
|
||||||
else:
|
else:
|
||||||
@ -45,6 +52,7 @@ def get_file(vcs_link, commit_hash, is_readme):
|
|||||||
file.write(f.read().decode('utf-8'))
|
file.write(f.read().decode('utf-8'))
|
||||||
file.close()
|
file.close()
|
||||||
shutil.rmtree(full_temp_path, ignore_errors=True)
|
shutil.rmtree(full_temp_path, ignore_errors=True)
|
||||||
|
return "NoError"
|
||||||
|
|
||||||
def for_all_files():
|
def for_all_files():
|
||||||
#toggle this based on readme or contributing files
|
#toggle this based on readme or contributing files
|
||||||
@ -52,17 +60,20 @@ def for_all_files():
|
|||||||
csv_path = "kk_031624_pr_did.csv"
|
csv_path = "kk_031624_pr_did.csv"
|
||||||
index = -1
|
index = -1
|
||||||
with open(csv_path, 'r') as file:
|
with open(csv_path, 'r') as file:
|
||||||
#csv_reader = csv.DictReader(file)
|
with open('031824_spec_errors.csv', "w") as writing_file:
|
||||||
lines = [line for line in file]
|
#csv_reader = csv.DictReader(file)
|
||||||
for row in tqdm(csv.reader(lines), total=len(lines)):
|
lines = [line for line in file]
|
||||||
index += 1
|
for row in tqdm(csv.reader(lines), total=len(lines)):
|
||||||
if index == 0:
|
index += 1
|
||||||
continue
|
if index == 0:
|
||||||
if row[0] == "":
|
continue
|
||||||
continue
|
if row[0] == "":
|
||||||
#print(row[0])
|
continue
|
||||||
get_file(row[0], row[2], readme_is)
|
#print(row[0])
|
||||||
#get_file('https://github.com/tqdm/tqdm', 'fbe7952cce11e8073378b063bdae7ab277a96eb8', True)
|
return_value = get_file(row[0], row[2], readme_is)
|
||||||
|
if return_value != "NoError":
|
||||||
|
writing_file.write(row[0], row[2], readme_is, return_value)
|
||||||
|
#get_file('https://github.com/tqdm/tqdm', 'fbe7952cce11e8073378b063bdae7ab277a96eb8', True)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
for_all_files()
|
for_all_files()
|
||||||
|
@ -34,7 +34,7 @@ def file_get_pr(upstream_vcs_link, me_read):
|
|||||||
else:
|
else:
|
||||||
full_temp_path = temp_dir + upstream_vcs_link.split('/')[- 1] + ".git"
|
full_temp_path = temp_dir + upstream_vcs_link.split('/')[- 1] + ".git"
|
||||||
print(upstream_vcs_link)
|
print(upstream_vcs_link)
|
||||||
if upstream_vcs_link == "https://gitlab.com/ubports/core":
|
if upstream_vcs_link == "https://gitlab.com/ubports/core" or upstream_vcs_link == "https://gitlab.freedesktop.org/xorg/lib":
|
||||||
shutil.rmtree(full_temp_path, ignore_errors=True)
|
shutil.rmtree(full_temp_path, ignore_errors=True)
|
||||||
return {}
|
return {}
|
||||||
repo = Git(uri=upstream_vcs_link, gitpath=full_temp_path)
|
repo = Git(uri=upstream_vcs_link, gitpath=full_temp_path)
|
||||||
@ -145,11 +145,12 @@ def pr_count(start, end, commits, author_roster, commit_roster):
|
|||||||
return [by_week, by_week_merge, new_authors, new_committers, author_roster, commit_roster]
|
return [by_week, by_week_merge, new_authors, new_committers, author_roster, commit_roster]
|
||||||
|
|
||||||
def for_files():
|
def for_files():
|
||||||
|
#csv_path = "final_data/deb_contribfile_roster.csv"
|
||||||
csv_path = "final_data/deb_readme_roster.csv"
|
csv_path = "final_data/deb_readme_roster.csv"
|
||||||
count = 0
|
count = 0
|
||||||
with open(csv_path, 'r') as file:
|
with open(csv_path, 'r') as file:
|
||||||
csv_reader = csv.DictReader(file)
|
csv_reader = csv.DictReader(file)
|
||||||
with open('kk_test_031624_pr_did.csv', "w") as writing_file:
|
with open('kk_031624_pr_did.csv', "w") as writing_file:
|
||||||
# this would also have to get switched fro the cont dataset
|
# this would also have to get switched fro the cont dataset
|
||||||
keys = ['upstream_vcs_link', "first_readme", "readme_commit_hash", "before_allcom_read", "before_mrg_read", "after_allcom_read", "after_mrg_read", 'before_auth_new', 'after_commit_new', 'after_auth_new', 'before_commit_new']
|
keys = ['upstream_vcs_link', "first_readme", "readme_commit_hash", "before_allcom_read", "before_mrg_read", "after_allcom_read", "after_mrg_read", 'before_auth_new', 'after_commit_new', 'after_auth_new', 'before_commit_new']
|
||||||
dict_writer = csv.DictWriter(writing_file, keys)
|
dict_writer = csv.DictWriter(writing_file, keys)
|
||||||
@ -158,7 +159,10 @@ def for_files():
|
|||||||
count += 1
|
count += 1
|
||||||
print(row['upstream_vcs_link'])
|
print(row['upstream_vcs_link'])
|
||||||
# this would have to get switched to false for the cont dataset
|
# this would have to get switched to false for the cont dataset
|
||||||
dict_row = file_get_pr(row['upstream_vcs_link'].strip(), True)
|
try:
|
||||||
|
dict_row = file_get_pr(row['upstream_vcs_link'].strip(), True)
|
||||||
|
except:
|
||||||
|
dict_row = {}
|
||||||
dict_writer.writerow(dict_row)
|
dict_writer.writerow(dict_row)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user