expanded data collection script edits
This commit is contained in:
parent
d5863e5735
commit
29a6ef7074
@ -1,10 +1,11 @@
|
||||
import os
|
||||
import requests
|
||||
import wget
|
||||
from perceval.backends.core.git import Git
|
||||
#from perceval.backends.core.git import Git
|
||||
from git import Repo
|
||||
import shutil
|
||||
|
||||
import pexpect
|
||||
import subprocess
|
||||
|
||||
def debian_vcs_query(debian_vcs_link):
|
||||
upstream_repo_link = ""
|
||||
@ -12,11 +13,21 @@ def debian_vcs_query(debian_vcs_link):
|
||||
output_directory = "/data/users/mgaughan/tmp1/" + project_name
|
||||
print(output_directory)
|
||||
#url = debian_vcs_link + '/-/blob/master/debian/upstream/metadata'
|
||||
# the below try is the password handling
|
||||
try:
|
||||
Repo.clone_from(debian_vcs_link, output_directory)
|
||||
#Repo.clone_from(debian_vcs_link, output_directory, env={"Username for 'https://salsa.debian.org": "", "Password for 'https://salsa.debian.org":""})
|
||||
command = "git clone " + debian_vcs_link + " " + output_directory
|
||||
p = pexpect.spawn(command)
|
||||
p.expect("Username for 'https://salsa.debian.org':")
|
||||
p.sendline ("")
|
||||
p.expect ("Password for 'https://salsa.debian.org':")
|
||||
p.sendline ("")
|
||||
p.expect(pexpect.EOF, timeout=None)
|
||||
#p.expect([pexpect.TIMEOUT, 'pattern', pexpect.EOF])
|
||||
p.close()
|
||||
except:
|
||||
print("error cloning")
|
||||
return upstream_repo_link
|
||||
print("handling misc pexpect issues")
|
||||
#return upstream_repo_link
|
||||
try:
|
||||
upstream_metadata = open(output_directory + "/debian/upstream/metadata", "r").read().split("\n")
|
||||
upstream_repo_array = upstream_metadata[3].split(":")[-2:]
|
||||
@ -28,6 +39,7 @@ def debian_vcs_query(debian_vcs_link):
|
||||
#filename = wget.download(url, out=output_directory)
|
||||
#print(type(filename))
|
||||
shutil.rmtree(output_directory, ignore_errors=True)
|
||||
print('success')
|
||||
return upstream_repo_link
|
||||
|
||||
|
||||
@ -65,4 +77,5 @@ def debian_query(package_name):
|
||||
|
||||
if __name__ == "__main__":
|
||||
#main()
|
||||
debian_vcs_query("https://salsa.debian.org/debian/0xffff")
|
||||
#debian_query("broccoli-ruby")
|
||||
debian_vcs_query(debian_query("brotli"))
|
@ -11,7 +11,7 @@ import debian_queries as dqs
|
||||
|
||||
|
||||
key = os.environ.get('KKEXKEY')
|
||||
test_csv_path = "120523_expanded_data.csv"
|
||||
test_csv_path = "121123_expanded_data.csv"
|
||||
|
||||
def main():
|
||||
early_cutoff = dt.datetime(2008,2, 8)
|
||||
@ -49,7 +49,7 @@ def main():
|
||||
project_dict["age_of_project"], project_dict["contributors"], project_dict["collaborators"] = perceval_data['age_of_project'], perceval_data['contributors'], perceval_data['collaborators']
|
||||
if "github" in project_dict["upstream_vcs_link"]:
|
||||
project_dict['milestone_count'] = gha.main(project_dict["upstream_vcs_link"], early_cutoff)
|
||||
with open('/data/users/mgaughan/kkex_comment_data_120523/' + 'gh_comments_' + project_dict["project_name"] + '.json', 'w') as data_path:
|
||||
with open('/data/users/mgaughan/kkex_comment_data_121123/' + 'gh_comments_' + project_dict["project_name"] + '.json', 'w') as data_path:
|
||||
json.dump(ghs.main(project_dict["upstream_vcs_link"], early_cutoff), data_path)
|
||||
else:
|
||||
project_dict['milestone_count'] = 0
|
||||
@ -61,7 +61,7 @@ def main():
|
||||
meta_dict['total_success'] = successful_count
|
||||
meta_dict['no_upstream_info'] = no_upstream
|
||||
#print("success rate: " + str(successful_count/index) + "; total success count: " + str(successful_count))
|
||||
with open('120523_metadata_expanded.json', 'w') as data_path:
|
||||
with open('121123_metadata_expanded.json', 'w') as data_path:
|
||||
json.dump(meta_dict, data_path)
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user