final updates for first data pass
This commit is contained in:
		
							parent
							
								
									c473a982f8
								
							
						
					
					
						commit
						f2b3396b0d
					
				@ -19,7 +19,7 @@ def get_discussion_gql(repo_owner, repo_name):
 | 
			
		||||
    data_string = ("""
 | 
			
		||||
    query {
 | 
			
		||||
        repository(owner: """ + repo_owner + """, name: """ + repo_name + """) {
 | 
			
		||||
            issues(last: 2) {
 | 
			
		||||
            issues(last: 50) {
 | 
			
		||||
                edges {
 | 
			
		||||
                    node {
 | 
			
		||||
                        id 
 | 
			
		||||
@ -37,7 +37,7 @@ def get_discussion_gql(repo_owner, repo_name):
 | 
			
		||||
                                }
 | 
			
		||||
                            }
 | 
			
		||||
                        }
 | 
			
		||||
                        comments(first: 10) {
 | 
			
		||||
                        comments(first: 20) {
 | 
			
		||||
                            # edges.node is where the actual `Comment` object is
 | 
			
		||||
                            edges {
 | 
			
		||||
                            node {
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										17
									
								
								main.py
									
									
									
									
									
								
							
							
						
						
									
										17
									
								
								main.py
									
									
									
									
									
								
							@ -25,7 +25,7 @@ def main():
 | 
			
		||||
    # we should discuss whether we're using the 93 day window that seems to be widely used or if we want a longer window
 | 
			
		||||
    early_cutoff = dt.datetime(2013,11, 6)
 | 
			
		||||
    print("Earliest date examined: " + str(early_cutoff))
 | 
			
		||||
    largest_object = {}
 | 
			
		||||
    #largest_object = {}
 | 
			
		||||
    #manifest = '../kaylea_dissertation/lifecycle/package_metadata/jupyter-notebook_manifest.yaml'
 | 
			
		||||
    directory='../kaylea_dissertation/lifecycle/package_metadata/'
 | 
			
		||||
    count_of_dir = 0
 | 
			
		||||
@ -34,22 +34,23 @@ def main():
 | 
			
		||||
        # checking if it is a file
 | 
			
		||||
        if os.path.isfile(f):
 | 
			
		||||
            print(f)   
 | 
			
		||||
        get_everything(f, largest_object, early_cutoff)
 | 
			
		||||
        get_everything(f, early_cutoff)
 | 
			
		||||
        #remove this and it should just run? for the most part at least I think
 | 
			
		||||
        count_of_dir += 1
 | 
			
		||||
        #this is what needs to be commented out
 | 
			
		||||
        if count_of_dir > 4:
 | 
			
		||||
            break
 | 
			
		||||
    print(largest_object.keys())
 | 
			
		||||
    print(len(largest_object.keys()))
 | 
			
		||||
    #print(largest_object.keys())
 | 
			
		||||
    #print(len(largest_object.keys()))
 | 
			
		||||
    '''
 | 
			
		||||
    for repo in largest_object:
 | 
			
		||||
        print(largest_object[repo]['new_formality'])
 | 
			
		||||
    '''
 | 
			
		||||
    with open('/data/users/mgaughan/kkex_data_110723/result.json', 'w') as results_path:
 | 
			
		||||
        json.dump(largest_object, results_path)
 | 
			
		||||
    #with open('/data/users/mgaughan/kkex_data_110723/result.json', 'w') as results_path:
 | 
			
		||||
    #    json.dump(largest_object, results_path)
 | 
			
		||||
 | 
			
		||||
def get_everything(manifest_path, largest_object, early_cutoff):
 | 
			
		||||
def get_everything(manifest_path, early_cutoff):
 | 
			
		||||
    largest_object = {}
 | 
			
		||||
    with open(manifest_path, 'r') as stream:
 | 
			
		||||
        try:
 | 
			
		||||
            config = yaml.safe_load(stream)
 | 
			
		||||
@ -85,7 +86,7 @@ def get_everything(manifest_path, largest_object, early_cutoff):
 | 
			
		||||
            repo_uri_list = repo_path.split('/')
 | 
			
		||||
            with open('/data/users/mgaughan/kkex_data_110723/' + repo_uri_list[-2]  + '_' + repo_uri_list[-1] + '_result.json', 'w') as data_path:
 | 
			
		||||
                json.dump(largest_object[repo_path], data_path)
 | 
			
		||||
        except yaml.YAMLOError as err:
 | 
			
		||||
        except yaml.YAMLError as err:
 | 
			
		||||
            print(err)
 | 
			
		||||
    print("----------------------")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user