diff --git a/cleaning_did_data.py b/cleaning_did_data.py
index d097c1b..eb7a99d 100644
--- a/cleaning_did_data.py
+++ b/cleaning_did_data.py
@@ -2,11 +2,13 @@ import csv
 import pandas as pd
 import os
 
+# the repo of files is the record of what projects we have the specific files for
+# use the repo of files to then match back to the DiD data
 
 temp_dir = "/data/users/mgaughan/tmp3/"
 with open("final_readme_did.csv", "w") as writing_file:
         csv_writer = csv.writer(writing_file)
-        for filename in [f for f in os.listdir("/data/users/mgaughan/kkex/time_specific_files/readme/readme")]:   
+        for filename in [f for f in os.listdir("/data/users/mgaughan/kkex/time_specific_files/readme/")]:   
             file_project = "".join(filename.split("_")[:-1])
             with open("kk_031624_pr_did.csv", "r") as file1: 
                 reader_obj = csv.reader(file1)
@@ -22,6 +24,7 @@ with open("final_readme_did.csv", "w") as writing_file:
                         project_name = temp_dir + line[0].split('/')[- 1]
                     if file_project == project_name:
                         csv_writer.writerow(line)
+                        break
                         
 '''
 for filename in [f for f in os.listdir("/data/users/mgaughan/kkex/time_specific_files/readme/readme/")]:
diff --git a/get_spec_file.py b/get_spec_file.py
index f450538..010ba59 100644
--- a/get_spec_file.py
+++ b/get_spec_file.py
@@ -12,7 +12,7 @@ import math
 import io 
 import re
 
-working_dir = "/data/users/mgaughan/kkex/time_specific_files/contributing"
+working_dir = "/data/users/mgaughan/kkex/time_specific_files/readme2"
 temp_dir = "/data/users/mgaughan/tmp3/"
 
 # getting the specific readme or contributing file from a given commit 
@@ -48,20 +48,7 @@ def get_file(vcs_link, commit_hash, is_readme):
                         target_filename = file['file'].split("/")[-1]
                     else:
                         target_filename = file['file']
-    #print(commit.tree)
-    #getting the name of the file from the root directory
-    '''
-    target_filename = ""
-    for filename in os.listdir(full_temp_path):
-        if is_readme:
-            #target_filename = "README.md"
-            if "README" in filename or "readme" in filename:
-                target_filename = filename
-        else:
-            #target_filename = "CONTRIBUTING.md"
-            if "CONTRIBUTING" in filename or "contributing" in filename:
-                target_filename = filename
-    '''
+
     if target_filename == "":
         shutil.rmtree(full_temp_path, ignore_errors=True)
         shutil.rmtree(other_temp_path, ignore_errors=True)
@@ -80,9 +67,8 @@ def get_file(vcs_link, commit_hash, is_readme):
         shutil.rmtree(full_temp_path, ignore_errors=True)
         shutil.rmtree(other_temp_path, ignore_errors=True)
         return "KeyError -- the file is not in the commit tree"
-
     if is_readme:
-        last_path = "readme"
+        last_path = "readme2"
     else:
         last_path = "contributing"
     with open("/data/users/mgaughan/kkex/time_specific_files/" + last_path + "/" + full_temp_path[len(temp_dir):-4] + "_" + targetfile.path , "w") as file:
@@ -96,27 +82,31 @@ def get_file(vcs_link, commit_hash, is_readme):
 
 def for_all_files():
     #toggle this based on readme or contributing files
-    readme_is = False
-    csv_path = "final_data/deb_contrib_did_data.csv"
+    readme_is = True
+    csv_path = "kk_031624_pr_did.csv"
     index = -1
     with open(csv_path, 'r') as file:
-        with open('c_031824_spec_errors.csv', "w") as writing_file:
+        with open('d_031824_spec_errors.csv', "w") as writing_file:
             csv_writer = csv.writer(writing_file)
-            #csv_reader = csv.DictReader(file)
-            lines = [line for line in file]
-            for row in tqdm(csv.reader(lines), total=len(lines)):
-                index += 1
-                if index == 0:
-                    continue
-                if row[0] == "":
-                    continue
-                #print(row[0])
-                return_value = get_file(row[0], row[2], readme_is)
-                if return_value != "NoError":
-                    csv_writer.writerow([row[0], row[2], readme_is, return_value])
-                # if it is noError, just write the row down in a different csv
-                # there's an issue of duplicates, but just keep it moving 
-                # if no duplicates -- just run it through
+            with open("readme_completed_downloads.csv", "w") as writing_file2:
+                csv_writer2 = csv.writer(writing_file2)
+                #csv_reader = csv.DictReader(file)
+                lines = [line for line in file]
+                for row in tqdm(csv.reader(lines), total=len(lines)):
+                    index += 1
+                    if index == 0:
+                        continue
+                    if row[0] == "":
+                        continue
+                    #print(row[0])
+                    return_value = get_file(row[0], row[2], readme_is)
+                    if return_value != "NoError":
+                        csv_writer.writerow([row[0], row[2], readme_is, return_value])
+                    else:
+                        csv_writer2.writerow(row)
+                    # if it is noError, just write the row down in a different csv
+                    # there's an issue of duplicates, but just keep it moving 
+                    # if no duplicates -- just run it through
 
 if __name__ == "__main__":
     for_all_files()
\ No newline at end of file