24_deb_pkg_gov/text_pp.py

import os 
import csv 
import json 

path = '/data/users/mgaughan/kkex_comment_final/'
empty_file_dict = {'data': {'repository': {'issues': {'edges': []}}}}


#pruning directory of bad data files/things that cannot be used
def check_files_for_content(filelist):
    x = 0
    bad_data_files = 0
    for file in filelist:
        filepath = path + file
        opened_file = open(filepath)
        file_contents = json.load(opened_file)
        bad_comment_data = 'errors' in file_contents.keys() or file_contents == empty_file_dict
        x += 1
        if bad_comment_data:
            bad_data_files += 1
            os.remove(filepath)
            opened_file.close()
            continue
        #list_of_issues= file_contents['data']['repository']['issues']['edges']
        #handle_repo_issues(list_of_issues)
    #print(bad_data_files)
            
def handle_repo_issues(list_of_issues):
    for issue in list_of_issues:
        print(issue['node']['author']['url'])
        list_of_comments = issue['node']['comments']['edges']
        handle_issue_comments(list_of_comments)

def handle_issue_comments(list_of_comments):
    for comment in list_of_comments:
        comment_body = comment['node']['body']
        comment_author = comment['node']['author']

        print("------------------")
        print(comment_body)

def concat_csv():
    with open("121223_expanded_data.csv", "r") as f1:
        first_block = f1.read()
    with open("121323_expanded_data.csv", "r") as f2:
        second_block = f2.read()
    with open("expanded_data_final.csv", "w") as f3:
        f3.write(first_block)
        f3.write("\n")
        f3.write(second_block)


if __name__ == "__main__":
    #check_files_for_content(os.listdir(path))
    concat_csv()
poking at pre processing 2023-12-12 19:23:04 +00:00			`import os`
			`import csv`
			`import json`

concat data for final stuff 2023-12-18 00:12:22 +00:00			`path = '/data/users/mgaughan/kkex_comment_final/'`
poking at pre processing 2023-12-12 19:23:04 +00:00			`empty_file_dict = {'data': {'repository': {'issues': {'edges': []}}}}`



			`#pruning directory of bad data files/things that cannot be used`
			`def check_files_for_content(filelist):`
			`x = 0`
			`bad_data_files = 0`
			`for file in filelist:`
			`filepath = path + file`
			`opened_file = open(filepath)`
			`file_contents = json.load(opened_file)`
			`bad_comment_data = 'errors' in file_contents.keys() or file_contents == empty_file_dict`
			`x += 1`
			`if bad_comment_data:`
			`bad_data_files += 1`
			`os.remove(filepath)`
			`opened_file.close()`
			`continue`
concat data for final stuff 2023-12-18 00:12:22 +00:00			`#list_of_issues= file_contents['data']['repository']['issues']['edges']`
			`#handle_repo_issues(list_of_issues)`
updating for new run 2023-12-13 23:15:43 +00:00			`#print(bad_data_files)`
poking at pre processing 2023-12-12 19:23:04 +00:00
			`def handle_repo_issues(list_of_issues):`
			`for issue in list_of_issues:`
			`print(issue['node']['author']['url'])`
			`list_of_comments = issue['node']['comments']['edges']`
			`handle_issue_comments(list_of_comments)`

			`def handle_issue_comments(list_of_comments):`
			`for comment in list_of_comments:`
			`comment_body = comment['node']['body']`
			`comment_author = comment['node']['author']`

updating for new run 2023-12-13 23:15:43 +00:00			`print("------------------")`
poking at pre processing 2023-12-12 19:23:04 +00:00			`print(comment_body)`

concat data for final stuff 2023-12-18 00:12:22 +00:00			`def concat_csv():`
			`with open("121223_expanded_data.csv", "r") as f1:`
			`first_block = f1.read()`
			`with open("121323_expanded_data.csv", "r") as f2:`
			`second_block = f2.read()`
			`with open("expanded_data_final.csv", "w") as f3:`
			`f3.write(first_block)`
			`f3.write("\n")`
			`f3.write(second_block)`


poking at pre processing 2023-12-12 19:23:04 +00:00
			`if __name__ == "__main__":`
concat data for final stuff 2023-12-18 00:12:22 +00:00			`#check_files_for_content(os.listdir(path))`
			`concat_csv()`