import csv import os import json import pandas as pd def csv_count(): with open("cleaned_0118_uni_constrib.csv", "w") as writefile: keys = ["project_name", "project_owner", "api_contrib_count", "issue_contrib_count", "file_contrib_count", "wiki_contrib_count"] writer = csv.writer(writefile) writer.writerow(keys) with open("011824_uni_contrib.csv", "r") as file: reader = csv.reader(file) true_rep_counter = 0 for i, line in enumerate(reader): if line[2] == line[3] == line[4] == line[5] == '0': print("zeroes") else: writer.writerow(line) print(line) true_rep_counter += 1 print(true_rep_counter) def checking_cross(): with open("cleaned_0118_uni_constrib.csv", "r") as readfile: reader = csv.reader(readfile) checking_sum = 0 for i, line in enumerate(reader): if os.path.exists("/data/users/mgaughan/kkex_contrib_files_122023/contribute_inst/" + line[0] + "_inst.md" ): checking_sum += 1 if os.path.exists("/data/users/mgaughan/kkex_contrib_files_122023/contribute_inst/" + line[0] + ".git_inst.md" ): checking_sum += 1 print(checking_sum) def consolidate_csv(): contributor_count_csv = pd.read_csv("cleaned_0118_uni_contrib.csv") print(contributor_count_csv.head()) total_underprod_csv = pd.read_csv("expanded_data_final.csv") print(total_underprod_csv.head()) columns = list(total_underprod_csv.columns) columns.extend(["api_contrib_count", "issue_contrib_count", "file_contrib_count", "wiki_contrib_count"]) list_of_links = total_underprod_csv["upstream_vcs_link"].tolist() count = 0 with open("octo_data_total.csv", 'w', newline='') as output_file: writer = csv.writer(output_file, columns) writer.writerow(columns) for index, row in contributor_count_csv.iterrows(): row_value = [] string_value = row['project_owner'] + "/" + row['project_name'] for item in list_of_links: if string_value in item: row_value = total_underprod_csv.loc[total_underprod_csv["upstream_vcs_link"] == item].values.tolist()[0] row_value.extend([row["api_contrib_count"], row["issue_contrib_count"], row["file_contrib_count"], row["wiki_contrib_count"]]) print(row_value) if len(row_value) > 4: writer.writerow(row_value) count += 1 print(count) if __name__ == "__main__": consolidate_csv()