35 lines
1.7 KiB
Python
35 lines
1.7 KiB
Python
|
import csv
|
||
|
import pandas as pd
|
||
|
import os
|
||
|
|
||
|
|
||
|
temp_dir = "/data/users/mgaughan/tmp3/"
|
||
|
with open("final_readme_did.csv", "w") as writing_file:
|
||
|
csv_writer = csv.writer(writing_file)
|
||
|
for filename in [f for f in os.listdir("/data/users/mgaughan/kkex/time_specific_files/readme/readme")]:
|
||
|
file_project = "".join(filename.split("_")[:-1])
|
||
|
with open("kk_031624_pr_did.csv", "r") as file1:
|
||
|
reader_obj = csv.reader(file1)
|
||
|
for line in reader_obj:
|
||
|
if line[0] == "":
|
||
|
continue
|
||
|
if "github" in line[0] or "gitlab" in line[0]:
|
||
|
#making an evaluation that sub branches aren't being used and that people would fork if needed
|
||
|
#this only looks at main
|
||
|
temp_vcs = "/".join(line[0].split("/")[0:5])
|
||
|
project_name = temp_vcs.split('/')[4]
|
||
|
else:
|
||
|
project_name = temp_dir + line[0].split('/')[- 1]
|
||
|
if file_project == project_name:
|
||
|
csv_writer.writerow(line)
|
||
|
|
||
|
'''
|
||
|
for filename in [f for f in os.listdir("/data/users/mgaughan/kkex/time_specific_files/readme/readme/")]:
|
||
|
file_project = "".join(filename.split("_")[:-1])
|
||
|
for filename2 in [f for f in os.listdir("/data/users/mgaughan/kkex/time_specific_files/readme/readme/")]:
|
||
|
file_project2 = "".join(filename2.split("_")[:-1])
|
||
|
if filename != filename2 and file_project == file_project2:
|
||
|
os.remove("/data/users/mgaughan/kkex/time_specific_files/readme/readme/" + filename2)
|
||
|
'''
|
||
|
|
||
|
|