misc fire remediations for scraper
This commit is contained in:
parent
154a8b9d92
commit
7aa3af05ea
@ -11,7 +11,7 @@ import debian_queries as dqs
|
||||
|
||||
|
||||
key = os.environ.get('KKEXKEY')
|
||||
test_csv_path = "121123_expanded_data.csv"
|
||||
test_csv_path = "121223_expanded_data.csv"
|
||||
|
||||
def main():
|
||||
early_cutoff = dt.datetime(2008,2, 8)
|
||||
@ -43,16 +43,20 @@ def main():
|
||||
if project_dict["upstream_vcs_link"] == "":
|
||||
no_upstream += 1
|
||||
continue
|
||||
perceval_data = pt.main(project_dict["upstream_vcs_link"], early_cutoff)
|
||||
try:
|
||||
perceval_data = pt.main(project_dict["upstream_vcs_link"], early_cutoff)
|
||||
except:
|
||||
print("perceval timeout")
|
||||
continue
|
||||
if perceval_data == {}:
|
||||
continue
|
||||
project_rosters = {}
|
||||
project_dict["age_of_project"], project_dict["contributors"], project_dict["collaborators"], project_rosters['contributors_list'], project_rosters['collaborators_list'] = perceval_data['age_of_project'], perceval_data['contributors'], perceval_data['collaborators'], perceval_data['contributors_list'], perceval_data['collaborators_list']
|
||||
with open("/data/users/mgaughan/kkex_roster_data_121123/" + "rosters_" + project_dict["project_name"] + '.json', 'w') as roster_path:
|
||||
with open("/data/users/mgaughan/kkex_roster_data_121223/" + "rosters_" + project_dict["project_name"] + '.json', 'w') as roster_path:
|
||||
json.dump(project_rosters, roster_path)
|
||||
if "github" in project_dict["upstream_vcs_link"]:
|
||||
project_dict['milestone_count'] = gha.main(project_dict["upstream_vcs_link"], early_cutoff)
|
||||
with open('/data/users/mgaughan/kkex_comment_data_121123/' + 'gh_comments_' + project_dict["project_name"] + '.json', 'w') as data_path:
|
||||
with open('/data/users/mgaughan/kkex_comment_data_121223/' + 'gh_comments_' + project_dict["project_name"] + '.json', 'w') as data_path:
|
||||
json.dump(ghs.main(project_dict["upstream_vcs_link"], early_cutoff), data_path)
|
||||
else:
|
||||
project_dict['milestone_count'] = 0
|
||||
@ -64,7 +68,7 @@ def main():
|
||||
meta_dict['total_success'] = successful_count
|
||||
meta_dict['no_upstream_info'] = no_upstream
|
||||
#print("success rate: " + str(successful_count/index) + "; total success count: " + str(successful_count))
|
||||
with open('121123_metadata_expanded.json', 'w') as data_path:
|
||||
with open('121223_metadata_expanded.json', 'w') as data_path:
|
||||
json.dump(meta_dict, data_path)
|
||||
|
||||
|
||||
|
@ -2,10 +2,14 @@ import requests
|
||||
import datetime as dt
|
||||
import json
|
||||
import os
|
||||
from wrapt_timeout_decorator import *
|
||||
import time
|
||||
|
||||
|
||||
key = os.environ.get('KKEXKEY')
|
||||
|
||||
def main(vcs, early_cutoff):
|
||||
time.sleep(1)
|
||||
gsql_dict = {}
|
||||
vcs_list = vcs.split('/')
|
||||
repo_name = '"' + vcs_list[-1] + '"'
|
||||
@ -28,6 +32,7 @@ def get_discussion_gql(repo_owner, repo_name):
|
||||
number
|
||||
state
|
||||
author {
|
||||
login
|
||||
url
|
||||
}
|
||||
labels(first:5) {
|
||||
@ -41,8 +46,10 @@ def get_discussion_gql(repo_owner, repo_name):
|
||||
# edges.node is where the actual `Comment` object is
|
||||
edges {
|
||||
node {
|
||||
id
|
||||
author {
|
||||
avatarUrl
|
||||
login
|
||||
url
|
||||
}
|
||||
body
|
||||
}
|
||||
|
@ -1,6 +1,8 @@
|
||||
import datetime as dt
|
||||
from perceval.backends.core.git import Git
|
||||
import argparse
|
||||
from wrapt_timeout_decorator import *
|
||||
import json
|
||||
|
||||
#globals
|
||||
#repo_dir = '/tmp/'
|
||||
@ -12,6 +14,8 @@ def main(vcs_path, begin_date):
|
||||
if len(perceval_info['list_of_commits']) > 0:
|
||||
perceval_info['age_of_project'] = get_repo_age(perceval_info['list_of_commits'])
|
||||
perceval_info['contributors'], perceval_info['collaborators'], perceval_info['contributors_list'], perceval_info['collaborators_list'] = get_all_actors(perceval_info['list_of_commits'])
|
||||
with open("/data/users/mgaughan/kkex_commit_data_121223/" + "commits_" + vcs_path.split('/')[-1] + '.json', 'w') as commits_path:
|
||||
json.dump(perceval_info, commits_path)
|
||||
del perceval_info['list_of_commits']
|
||||
print(perceval_info)
|
||||
return perceval_info
|
||||
@ -22,6 +26,7 @@ def main(vcs_path, begin_date):
|
||||
|
||||
|
||||
# this is the primary function for getting the list of commits from perceval
|
||||
@timeout(600, use_signals=False)
|
||||
def get_perceval_log(vcs_path, begin_date):
|
||||
vcs_path = vcs_path.strip()
|
||||
print(vcs_path)
|
||||
|
Loading…
Reference in New Issue
Block a user