67 lines
2.6 KiB
Python
67 lines
2.6 KiB
Python
import csv
|
|
import os
|
|
import json
|
|
import pandas as pd
|
|
|
|
|
|
def csv_count():
|
|
with open("cleaned_0118_uni_constrib.csv", "w") as writefile:
|
|
keys = ["project_name", "project_owner", "api_contrib_count", "issue_contrib_count", "file_contrib_count", "wiki_contrib_count"]
|
|
writer = csv.writer(writefile)
|
|
writer.writerow(keys)
|
|
with open("011824_uni_contrib.csv", "r") as file:
|
|
reader = csv.reader(file)
|
|
true_rep_counter = 0
|
|
for i, line in enumerate(reader):
|
|
if line[2] == line[3] == line[4] == line[5] == '0':
|
|
print("zeroes")
|
|
else:
|
|
writer.writerow(line)
|
|
print(line)
|
|
true_rep_counter += 1
|
|
print(true_rep_counter)
|
|
|
|
def checking_cross():
|
|
with open("cleaned_0118_uni_constrib.csv", "r") as readfile:
|
|
reader = csv.reader(readfile)
|
|
checking_sum = 0
|
|
for i, line in enumerate(reader):
|
|
if os.path.exists("/data/users/mgaughan/kkex_contrib_files_122023/contribute_inst/" + line[0] + "_inst.md" ):
|
|
checking_sum += 1
|
|
if os.path.exists("/data/users/mgaughan/kkex_contrib_files_122023/contribute_inst/" + line[0] + ".git_inst.md" ):
|
|
checking_sum += 1
|
|
print(checking_sum)
|
|
|
|
def consolidate_csv():
|
|
contributor_count_csv = pd.read_csv("cleaned_0118_uni_constrib.csv")
|
|
print(contributor_count_csv.head())
|
|
total_underprod_csv = pd.read_csv("expanded_data_final.csv")
|
|
print(total_underprod_csv.head())
|
|
columns = list(total_underprod_csv.columns)
|
|
columns.extend(["api_contrib_count", "issue_contrib_count", "file_contrib_count", "wiki_contrib_count"])
|
|
list_of_links = total_underprod_csv["upstream_vcs_link"].tolist()
|
|
count = 0
|
|
with open("octo_data_total.csv", 'w', newline='') as output_file:
|
|
writer = csv.writer(output_file, columns)
|
|
writer.writerow(columns)
|
|
for index, row in contributor_count_csv.iterrows():
|
|
row_value = []
|
|
string_value = row['project_owner'] + "/" + row['project_name']
|
|
for item in list_of_links:
|
|
if string_value in item:
|
|
row_value = total_underprod_csv.loc[total_underprod_csv["upstream_vcs_link"] == item].values.tolist()[0]
|
|
row_value.extend([row["api_contrib_count"], row["issue_contrib_count"], row["file_contrib_count"], row["wiki_contrib_count"]])
|
|
print(row_value)
|
|
if len(row_value) > 4:
|
|
writer.writerow(row_value)
|
|
count += 1
|
|
print(count)
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
consolidate_csv()
|
|
|
|
|
|
|