import csv import pandas as pd import os # the repo of files is the record of what projects we have the specific files for # use the repo of files to then match back to the DiD data temp_dir = "/data/users/mgaughan/tmp3/" with open("final_contributing_did.csv", "w") as writing_file: csv_writer = csv.writer(writing_file) for filename in [f for f in os.listdir("/data/users/mgaughan/kkex/time_specific_files/contributing/")]: file_project = "".join(filename.split("_")[:-1]) with open("final_data/deb_contrib_did_data.csv", "r") as file1: reader_obj = csv.reader(file1) for line in reader_obj: if line[0] == "": continue if "github" in line[0] or "gitlab" in line[0]: #making an evaluation that sub branches aren't being used and that people would fork if needed #this only looks at main temp_vcs = "/".join(line[0].split("/")[0:5]) project_name = temp_vcs.split('/')[4] else: project_name = temp_dir + line[0].split('/')[- 1] if file_project == project_name: csv_writer.writerow(line) break ''' for filename in [f for f in os.listdir("/data/users/mgaughan/kkex/time_specific_files/readme/readme/")]: file_project = "".join(filename.split("_")[:-1]) for filename2 in [f for f in os.listdir("/data/users/mgaughan/kkex/time_specific_files/readme/readme/")]: file_project2 = "".join(filename2.split("_")[:-1]) if filename != filename2 and file_project == file_project2: os.remove("/data/users/mgaughan/kkex/time_specific_files/readme/readme/" + filename2) '''