diff --git a/gh_gsql_req.py b/gh_gsql_req.py index 78abf73..28c9cbd 100644 --- a/gh_gsql_req.py +++ b/gh_gsql_req.py @@ -19,7 +19,7 @@ def get_discussion_gql(repo_owner, repo_name): data_string = (""" query { repository(owner: """ + repo_owner + """, name: """ + repo_name + """) { - issues(last: 2) { + issues(last: 50) { edges { node { id @@ -37,7 +37,7 @@ def get_discussion_gql(repo_owner, repo_name): } } } - comments(first: 10) { + comments(first: 20) { # edges.node is where the actual `Comment` object is edges { node { diff --git a/main.py b/main.py index b3d8868..62e77d4 100644 --- a/main.py +++ b/main.py @@ -25,7 +25,7 @@ def main(): # we should discuss whether we're using the 93 day window that seems to be widely used or if we want a longer window early_cutoff = dt.datetime(2013,11, 6) print("Earliest date examined: " + str(early_cutoff)) - largest_object = {} + #largest_object = {} #manifest = '../kaylea_dissertation/lifecycle/package_metadata/jupyter-notebook_manifest.yaml' directory='../kaylea_dissertation/lifecycle/package_metadata/' count_of_dir = 0 @@ -34,22 +34,23 @@ def main(): # checking if it is a file if os.path.isfile(f): print(f) - get_everything(f, largest_object, early_cutoff) + get_everything(f, early_cutoff) #remove this and it should just run? for the most part at least I think count_of_dir += 1 #this is what needs to be commented out if count_of_dir > 4: break - print(largest_object.keys()) - print(len(largest_object.keys())) + #print(largest_object.keys()) + #print(len(largest_object.keys())) ''' for repo in largest_object: print(largest_object[repo]['new_formality']) ''' - with open('/data/users/mgaughan/kkex_data_110723/result.json', 'w') as results_path: - json.dump(largest_object, results_path) + #with open('/data/users/mgaughan/kkex_data_110723/result.json', 'w') as results_path: + # json.dump(largest_object, results_path) -def get_everything(manifest_path, largest_object, early_cutoff): +def get_everything(manifest_path, early_cutoff): + largest_object = {} with open(manifest_path, 'r') as stream: try: config = yaml.safe_load(stream) @@ -85,7 +86,7 @@ def get_everything(manifest_path, largest_object, early_cutoff): repo_uri_list = repo_path.split('/') with open('/data/users/mgaughan/kkex_data_110723/' + repo_uri_list[-2] + '_' + repo_uri_list[-1] + '_result.json', 'w') as data_path: json.dump(largest_object[repo_path], data_path) - except yaml.YAMLOError as err: + except yaml.YAMLError as err: print(err) print("----------------------")