updated scripts
This commit is contained in:
parent
a342c2a67f
commit
c57b760d5c
@ -26,16 +26,23 @@ def get_file(vcs_link, commit_hash, is_readme):
|
||||
repo = Repo.clone_from(vcs_link, full_temp_path)
|
||||
commit = repo.commit(commit_hash)
|
||||
#getting the name of the file from the root directory
|
||||
target_filename = ""
|
||||
for filename in os.listdir(full_temp_path):
|
||||
if is_readme:
|
||||
target_filename = "README.md"
|
||||
#target_filename = "README.md"
|
||||
if "README" in filename:
|
||||
target_filename = filename
|
||||
else:
|
||||
target_filename = "CONTRIBUTING.md"
|
||||
#target_filename = "CONTRIBUTING.md"
|
||||
if "CONTRIBUTING" in filename:
|
||||
target_filename = filename
|
||||
if target_filename == "":
|
||||
return "NoFile"
|
||||
try:
|
||||
targetfile = commit.tree / target_filename
|
||||
except KeyError:
|
||||
# why would a file not be in the commit tree? but would be in the directory?
|
||||
return "KeyError"
|
||||
if is_readme:
|
||||
last_path = "readme"
|
||||
else:
|
||||
@ -45,6 +52,7 @@ def get_file(vcs_link, commit_hash, is_readme):
|
||||
file.write(f.read().decode('utf-8'))
|
||||
file.close()
|
||||
shutil.rmtree(full_temp_path, ignore_errors=True)
|
||||
return "NoError"
|
||||
|
||||
def for_all_files():
|
||||
#toggle this based on readme or contributing files
|
||||
@ -52,6 +60,7 @@ def for_all_files():
|
||||
csv_path = "kk_031624_pr_did.csv"
|
||||
index = -1
|
||||
with open(csv_path, 'r') as file:
|
||||
with open('031824_spec_errors.csv', "w") as writing_file:
|
||||
#csv_reader = csv.DictReader(file)
|
||||
lines = [line for line in file]
|
||||
for row in tqdm(csv.reader(lines), total=len(lines)):
|
||||
@ -61,7 +70,9 @@ def for_all_files():
|
||||
if row[0] == "":
|
||||
continue
|
||||
#print(row[0])
|
||||
get_file(row[0], row[2], readme_is)
|
||||
return_value = get_file(row[0], row[2], readme_is)
|
||||
if return_value != "NoError":
|
||||
writing_file.write(row[0], row[2], readme_is, return_value)
|
||||
#get_file('https://github.com/tqdm/tqdm', 'fbe7952cce11e8073378b063bdae7ab277a96eb8', True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -34,7 +34,7 @@ def file_get_pr(upstream_vcs_link, me_read):
|
||||
else:
|
||||
full_temp_path = temp_dir + upstream_vcs_link.split('/')[- 1] + ".git"
|
||||
print(upstream_vcs_link)
|
||||
if upstream_vcs_link == "https://gitlab.com/ubports/core":
|
||||
if upstream_vcs_link == "https://gitlab.com/ubports/core" or upstream_vcs_link == "https://gitlab.freedesktop.org/xorg/lib":
|
||||
shutil.rmtree(full_temp_path, ignore_errors=True)
|
||||
return {}
|
||||
repo = Git(uri=upstream_vcs_link, gitpath=full_temp_path)
|
||||
@ -145,11 +145,12 @@ def pr_count(start, end, commits, author_roster, commit_roster):
|
||||
return [by_week, by_week_merge, new_authors, new_committers, author_roster, commit_roster]
|
||||
|
||||
def for_files():
|
||||
#csv_path = "final_data/deb_contribfile_roster.csv"
|
||||
csv_path = "final_data/deb_readme_roster.csv"
|
||||
count = 0
|
||||
with open(csv_path, 'r') as file:
|
||||
csv_reader = csv.DictReader(file)
|
||||
with open('kk_test_031624_pr_did.csv', "w") as writing_file:
|
||||
with open('kk_031624_pr_did.csv', "w") as writing_file:
|
||||
# this would also have to get switched fro the cont dataset
|
||||
keys = ['upstream_vcs_link', "first_readme", "readme_commit_hash", "before_allcom_read", "before_mrg_read", "after_allcom_read", "after_mrg_read", 'before_auth_new', 'after_commit_new', 'after_auth_new', 'before_commit_new']
|
||||
dict_writer = csv.DictWriter(writing_file, keys)
|
||||
@ -158,7 +159,10 @@ def for_files():
|
||||
count += 1
|
||||
print(row['upstream_vcs_link'])
|
||||
# this would have to get switched to false for the cont dataset
|
||||
try:
|
||||
dict_row = file_get_pr(row['upstream_vcs_link'].strip(), True)
|
||||
except:
|
||||
dict_row = {}
|
||||
dict_writer.writerow(dict_row)
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user