final updates for first data pass

This commit is contained in:
mjgaughan 2023-11-07 17:25:22 -06:00
parent c473a982f8
commit f2b3396b0d
2 changed files with 11 additions and 10 deletions

View File

@ -19,7 +19,7 @@ def get_discussion_gql(repo_owner, repo_name):
data_string = (""" data_string = ("""
query { query {
repository(owner: """ + repo_owner + """, name: """ + repo_name + """) { repository(owner: """ + repo_owner + """, name: """ + repo_name + """) {
issues(last: 2) { issues(last: 50) {
edges { edges {
node { node {
id id
@ -37,7 +37,7 @@ def get_discussion_gql(repo_owner, repo_name):
} }
} }
} }
comments(first: 10) { comments(first: 20) {
# edges.node is where the actual `Comment` object is # edges.node is where the actual `Comment` object is
edges { edges {
node { node {

17
main.py
View File

@ -25,7 +25,7 @@ def main():
# we should discuss whether we're using the 93 day window that seems to be widely used or if we want a longer window # we should discuss whether we're using the 93 day window that seems to be widely used or if we want a longer window
early_cutoff = dt.datetime(2013,11, 6) early_cutoff = dt.datetime(2013,11, 6)
print("Earliest date examined: " + str(early_cutoff)) print("Earliest date examined: " + str(early_cutoff))
largest_object = {} #largest_object = {}
#manifest = '../kaylea_dissertation/lifecycle/package_metadata/jupyter-notebook_manifest.yaml' #manifest = '../kaylea_dissertation/lifecycle/package_metadata/jupyter-notebook_manifest.yaml'
directory='../kaylea_dissertation/lifecycle/package_metadata/' directory='../kaylea_dissertation/lifecycle/package_metadata/'
count_of_dir = 0 count_of_dir = 0
@ -34,22 +34,23 @@ def main():
# checking if it is a file # checking if it is a file
if os.path.isfile(f): if os.path.isfile(f):
print(f) print(f)
get_everything(f, largest_object, early_cutoff) get_everything(f, early_cutoff)
#remove this and it should just run? for the most part at least I think #remove this and it should just run? for the most part at least I think
count_of_dir += 1 count_of_dir += 1
#this is what needs to be commented out #this is what needs to be commented out
if count_of_dir > 4: if count_of_dir > 4:
break break
print(largest_object.keys()) #print(largest_object.keys())
print(len(largest_object.keys())) #print(len(largest_object.keys()))
''' '''
for repo in largest_object: for repo in largest_object:
print(largest_object[repo]['new_formality']) print(largest_object[repo]['new_formality'])
''' '''
with open('/data/users/mgaughan/kkex_data_110723/result.json', 'w') as results_path: #with open('/data/users/mgaughan/kkex_data_110723/result.json', 'w') as results_path:
json.dump(largest_object, results_path) # json.dump(largest_object, results_path)
def get_everything(manifest_path, largest_object, early_cutoff): def get_everything(manifest_path, early_cutoff):
largest_object = {}
with open(manifest_path, 'r') as stream: with open(manifest_path, 'r') as stream:
try: try:
config = yaml.safe_load(stream) config = yaml.safe_load(stream)
@ -85,7 +86,7 @@ def get_everything(manifest_path, largest_object, early_cutoff):
repo_uri_list = repo_path.split('/') repo_uri_list = repo_path.split('/')
with open('/data/users/mgaughan/kkex_data_110723/' + repo_uri_list[-2] + '_' + repo_uri_list[-1] + '_result.json', 'w') as data_path: with open('/data/users/mgaughan/kkex_data_110723/' + repo_uri_list[-2] + '_' + repo_uri_list[-1] + '_result.json', 'w') as data_path:
json.dump(largest_object[repo_path], data_path) json.dump(largest_object[repo_path], data_path)
except yaml.YAMLOError as err: except yaml.YAMLError as err:
print(err) print(err)
print("----------------------") print("----------------------")