trying to reconstruct dataset

This commit is contained in:
Matthew Gaughan 2024-04-01 10:00:55 -05:00
parent cc590c6ecc
commit 6e4399d575

View File

@ -57,7 +57,7 @@ def get_file(vcs_link, commit_hash, is_readme):
targetfile = "" targetfile = ""
for blob in commit0.tree.blobs: for blob in commit0.tree.blobs:
#print(type(blob.path)) #print(type(blob.path))
if "CONTRIBUTING" in blob.path: if "README" in blob.path:
targetfile = blob targetfile = blob
#print(blob.path) #print(blob.path)
# why would a file not be in the commit tree? but would be in the directory? # why would a file not be in the commit tree? but would be in the directory?
@ -85,6 +85,7 @@ def for_all_files():
readme_is = True readme_is = True
csv_path = "kk_031624_pr_did.csv" csv_path = "kk_031624_pr_did.csv"
index = -1 index = -1
saved = []
with open(csv_path, 'r') as file: with open(csv_path, 'r') as file:
with open('d_031824_spec_errors.csv', "w") as writing_file: with open('d_031824_spec_errors.csv', "w") as writing_file:
csv_writer = csv.writer(writing_file) csv_writer = csv.writer(writing_file)
@ -103,6 +104,9 @@ def for_all_files():
if return_value != "NoError": if return_value != "NoError":
csv_writer.writerow([row[0], row[2], readme_is, return_value]) csv_writer.writerow([row[0], row[2], readme_is, return_value])
else: else:
if row[0] in saved:
continue
saved.append(row[0])
csv_writer2.writerow(row) csv_writer2.writerow(row)
# if it is noError, just write the row down in a different csv # if it is noError, just write the row down in a different csv
# there's an issue of duplicates, but just keep it moving # there's an issue of duplicates, but just keep it moving