consolidate comment_data
This commit is contained in:
parent
d51e959b5c
commit
c52e129be1
@ -108,6 +108,29 @@ def consolidate_csv_2():
|
|||||||
print(row_value)
|
print(row_value)
|
||||||
writer.writerow(row_value)
|
writer.writerow(row_value)
|
||||||
|
|
||||||
|
def consolidate_csv_3():
|
||||||
|
rosters_dir = "/data/users/mgaughan/kkex_comment_data_121323/"
|
||||||
|
total_underprod_csv = pd.read_csv("kk_final_expanded_data_final.csv")
|
||||||
|
list_of_links = total_underprod_csv["project_name"].tolist()
|
||||||
|
columns = list(total_underprod_csv.columns)
|
||||||
|
columns.append("comments_filepath")
|
||||||
|
total_count=0
|
||||||
|
with open("kk_final_commentlist.csv", 'w', newline='') as output_file:
|
||||||
|
writer = csv.writer(output_file, columns)
|
||||||
|
writer.writerow(columns)
|
||||||
|
for filename in os.listdir(rosters_dir):
|
||||||
|
total_count += 1
|
||||||
|
row_value = []
|
||||||
|
cleaning_files = "gh_comments_"
|
||||||
|
pkg_name = filename[len(cleaning_files):-len(".json")]
|
||||||
|
print(pkg_name)
|
||||||
|
for item in list_of_links:
|
||||||
|
if pkg_name == item:
|
||||||
|
row_value = total_underprod_csv.loc[total_underprod_csv["project_name"] == item].values.tolist()[0]
|
||||||
|
row_value.append(rosters_dir + filename)
|
||||||
|
print(row_value)
|
||||||
|
writer.writerow(row_value)
|
||||||
|
|
||||||
def get_main_for_splice():
|
def get_main_for_splice():
|
||||||
inst_doc_df = pd.read_csv("kk_final_doclist_roster.csv")
|
inst_doc_df = pd.read_csv("kk_final_doclist_roster.csv")
|
||||||
inst_doc_df = inst_doc_df.sort_values(by=['underproduction_mean'])
|
inst_doc_df = inst_doc_df.sort_values(by=['underproduction_mean'])
|
||||||
@ -150,4 +173,4 @@ def get_main_for_splice():
|
|||||||
print(median(all_header_counts))
|
print(median(all_header_counts))
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
consolidate_readme()
|
consolidate_csv_3()
|
||||||
|
3692
kk_final_commentlist.csv
Normal file
3692
kk_final_commentlist.csv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user