From 6e4399d5758fdec734cbba6f2d3f716560d18e73 Mon Sep 17 00:00:00 2001 From: Matthew Gaughan Date: Mon, 1 Apr 2024 10:00:55 -0500 Subject: [PATCH] trying to reconstruct dataset --- get_spec_file.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/get_spec_file.py b/get_spec_file.py index 010ba59..34669cf 100644 --- a/get_spec_file.py +++ b/get_spec_file.py @@ -57,7 +57,7 @@ def get_file(vcs_link, commit_hash, is_readme): targetfile = "" for blob in commit0.tree.blobs: #print(type(blob.path)) - if "CONTRIBUTING" in blob.path: + if "README" in blob.path: targetfile = blob #print(blob.path) # why would a file not be in the commit tree? but would be in the directory? @@ -85,6 +85,7 @@ def for_all_files(): readme_is = True csv_path = "kk_031624_pr_did.csv" index = -1 + saved = [] with open(csv_path, 'r') as file: with open('d_031824_spec_errors.csv', "w") as writing_file: csv_writer = csv.writer(writing_file) @@ -103,6 +104,9 @@ def for_all_files(): if return_value != "NoError": csv_writer.writerow([row[0], row[2], readme_is, return_value]) else: + if row[0] in saved: + continue + saved.append(row[0]) csv_writer2.writerow(row) # if it is noError, just write the row down in a different csv # there's an issue of duplicates, but just keep it moving