2023-12-12 19:23:04 +00:00
|
|
|
import os
|
|
|
|
import csv
|
|
|
|
import json
|
|
|
|
|
2023-12-18 00:12:22 +00:00
|
|
|
path = '/data/users/mgaughan/kkex_comment_final/'
|
2023-12-12 19:23:04 +00:00
|
|
|
empty_file_dict = {'data': {'repository': {'issues': {'edges': []}}}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pruning directory of bad data files/things that cannot be used
|
|
|
|
def check_files_for_content(filelist):
|
|
|
|
x = 0
|
|
|
|
bad_data_files = 0
|
|
|
|
for file in filelist:
|
|
|
|
filepath = path + file
|
|
|
|
opened_file = open(filepath)
|
|
|
|
file_contents = json.load(opened_file)
|
|
|
|
bad_comment_data = 'errors' in file_contents.keys() or file_contents == empty_file_dict
|
|
|
|
x += 1
|
|
|
|
if bad_comment_data:
|
|
|
|
bad_data_files += 1
|
|
|
|
os.remove(filepath)
|
|
|
|
opened_file.close()
|
|
|
|
continue
|
2023-12-18 00:12:22 +00:00
|
|
|
#list_of_issues= file_contents['data']['repository']['issues']['edges']
|
|
|
|
#handle_repo_issues(list_of_issues)
|
2023-12-13 23:15:43 +00:00
|
|
|
#print(bad_data_files)
|
2023-12-12 19:23:04 +00:00
|
|
|
|
|
|
|
def handle_repo_issues(list_of_issues):
|
|
|
|
for issue in list_of_issues:
|
|
|
|
print(issue['node']['author']['url'])
|
|
|
|
list_of_comments = issue['node']['comments']['edges']
|
|
|
|
handle_issue_comments(list_of_comments)
|
|
|
|
|
|
|
|
def handle_issue_comments(list_of_comments):
|
|
|
|
for comment in list_of_comments:
|
|
|
|
comment_body = comment['node']['body']
|
|
|
|
comment_author = comment['node']['author']
|
|
|
|
|
2023-12-13 23:15:43 +00:00
|
|
|
print("------------------")
|
2023-12-12 19:23:04 +00:00
|
|
|
print(comment_body)
|
|
|
|
|
2023-12-18 00:12:22 +00:00
|
|
|
def concat_csv():
|
|
|
|
with open("121223_expanded_data.csv", "r") as f1:
|
|
|
|
first_block = f1.read()
|
|
|
|
with open("121323_expanded_data.csv", "r") as f2:
|
|
|
|
second_block = f2.read()
|
|
|
|
with open("expanded_data_final.csv", "w") as f3:
|
|
|
|
f3.write(first_block)
|
|
|
|
f3.write("\n")
|
|
|
|
f3.write(second_block)
|
|
|
|
|
|
|
|
|
2023-12-12 19:23:04 +00:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2023-12-18 00:12:22 +00:00
|
|
|
#check_files_for_content(os.listdir(path))
|
|
|
|
concat_csv()
|