expanded data collection script edits
This commit is contained in:
parent
d5863e5735
commit
29a6ef7074
@ -1,10 +1,11 @@
|
|||||||
import os
|
import os
|
||||||
import requests
|
import requests
|
||||||
import wget
|
import wget
|
||||||
from perceval.backends.core.git import Git
|
#from perceval.backends.core.git import Git
|
||||||
from git import Repo
|
from git import Repo
|
||||||
import shutil
|
import shutil
|
||||||
|
import pexpect
|
||||||
|
import subprocess
|
||||||
|
|
||||||
def debian_vcs_query(debian_vcs_link):
|
def debian_vcs_query(debian_vcs_link):
|
||||||
upstream_repo_link = ""
|
upstream_repo_link = ""
|
||||||
@ -12,11 +13,21 @@ def debian_vcs_query(debian_vcs_link):
|
|||||||
output_directory = "/data/users/mgaughan/tmp1/" + project_name
|
output_directory = "/data/users/mgaughan/tmp1/" + project_name
|
||||||
print(output_directory)
|
print(output_directory)
|
||||||
#url = debian_vcs_link + '/-/blob/master/debian/upstream/metadata'
|
#url = debian_vcs_link + '/-/blob/master/debian/upstream/metadata'
|
||||||
|
# the below try is the password handling
|
||||||
try:
|
try:
|
||||||
Repo.clone_from(debian_vcs_link, output_directory)
|
#Repo.clone_from(debian_vcs_link, output_directory, env={"Username for 'https://salsa.debian.org": "", "Password for 'https://salsa.debian.org":""})
|
||||||
|
command = "git clone " + debian_vcs_link + " " + output_directory
|
||||||
|
p = pexpect.spawn(command)
|
||||||
|
p.expect("Username for 'https://salsa.debian.org':")
|
||||||
|
p.sendline ("")
|
||||||
|
p.expect ("Password for 'https://salsa.debian.org':")
|
||||||
|
p.sendline ("")
|
||||||
|
p.expect(pexpect.EOF, timeout=None)
|
||||||
|
#p.expect([pexpect.TIMEOUT, 'pattern', pexpect.EOF])
|
||||||
|
p.close()
|
||||||
except:
|
except:
|
||||||
print("error cloning")
|
print("handling misc pexpect issues")
|
||||||
return upstream_repo_link
|
#return upstream_repo_link
|
||||||
try:
|
try:
|
||||||
upstream_metadata = open(output_directory + "/debian/upstream/metadata", "r").read().split("\n")
|
upstream_metadata = open(output_directory + "/debian/upstream/metadata", "r").read().split("\n")
|
||||||
upstream_repo_array = upstream_metadata[3].split(":")[-2:]
|
upstream_repo_array = upstream_metadata[3].split(":")[-2:]
|
||||||
@ -28,6 +39,7 @@ def debian_vcs_query(debian_vcs_link):
|
|||||||
#filename = wget.download(url, out=output_directory)
|
#filename = wget.download(url, out=output_directory)
|
||||||
#print(type(filename))
|
#print(type(filename))
|
||||||
shutil.rmtree(output_directory, ignore_errors=True)
|
shutil.rmtree(output_directory, ignore_errors=True)
|
||||||
|
print('success')
|
||||||
return upstream_repo_link
|
return upstream_repo_link
|
||||||
|
|
||||||
|
|
||||||
@ -65,4 +77,5 @@ def debian_query(package_name):
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
#main()
|
#main()
|
||||||
debian_vcs_query("https://salsa.debian.org/debian/0xffff")
|
#debian_query("broccoli-ruby")
|
||||||
|
debian_vcs_query(debian_query("brotli"))
|
@ -11,7 +11,7 @@ import debian_queries as dqs
|
|||||||
|
|
||||||
|
|
||||||
key = os.environ.get('KKEXKEY')
|
key = os.environ.get('KKEXKEY')
|
||||||
test_csv_path = "120523_expanded_data.csv"
|
test_csv_path = "121123_expanded_data.csv"
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
early_cutoff = dt.datetime(2008,2, 8)
|
early_cutoff = dt.datetime(2008,2, 8)
|
||||||
@ -49,7 +49,7 @@ def main():
|
|||||||
project_dict["age_of_project"], project_dict["contributors"], project_dict["collaborators"] = perceval_data['age_of_project'], perceval_data['contributors'], perceval_data['collaborators']
|
project_dict["age_of_project"], project_dict["contributors"], project_dict["collaborators"] = perceval_data['age_of_project'], perceval_data['contributors'], perceval_data['collaborators']
|
||||||
if "github" in project_dict["upstream_vcs_link"]:
|
if "github" in project_dict["upstream_vcs_link"]:
|
||||||
project_dict['milestone_count'] = gha.main(project_dict["upstream_vcs_link"], early_cutoff)
|
project_dict['milestone_count'] = gha.main(project_dict["upstream_vcs_link"], early_cutoff)
|
||||||
with open('/data/users/mgaughan/kkex_comment_data_120523/' + 'gh_comments_' + project_dict["project_name"] + '.json', 'w') as data_path:
|
with open('/data/users/mgaughan/kkex_comment_data_121123/' + 'gh_comments_' + project_dict["project_name"] + '.json', 'w') as data_path:
|
||||||
json.dump(ghs.main(project_dict["upstream_vcs_link"], early_cutoff), data_path)
|
json.dump(ghs.main(project_dict["upstream_vcs_link"], early_cutoff), data_path)
|
||||||
else:
|
else:
|
||||||
project_dict['milestone_count'] = 0
|
project_dict['milestone_count'] = 0
|
||||||
@ -61,7 +61,7 @@ def main():
|
|||||||
meta_dict['total_success'] = successful_count
|
meta_dict['total_success'] = successful_count
|
||||||
meta_dict['no_upstream_info'] = no_upstream
|
meta_dict['no_upstream_info'] = no_upstream
|
||||||
#print("success rate: " + str(successful_count/index) + "; total success count: " + str(successful_count))
|
#print("success rate: " + str(successful_count/index) + "; total success count: " + str(successful_count))
|
||||||
with open('120523_metadata_expanded.json', 'w') as data_path:
|
with open('121123_metadata_expanded.json', 'w') as data_path:
|
||||||
json.dump(meta_dict, data_path)
|
json.dump(meta_dict, data_path)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user