updates to gerrit collection, at least intermediate thing for gerrit results
This commit is contained in:
parent
6a760decfe
commit
542a4f5323
151688
072525_gerrit_collection/072825_gerrit_filled_df.csv
Normal file
151688
072525_gerrit_collection/072825_gerrit_filled_df.csv
Normal file
File diff suppressed because one or more lines are too long
@ -6,6 +6,7 @@ import requests
|
||||
import re
|
||||
import datetime
|
||||
import time
|
||||
from tqdm import tqdm
|
||||
#from urllib.parse import quote_plus
|
||||
|
||||
#from requests.auth import HTTPDigestAuth
|
||||
@ -21,14 +22,20 @@ def query_change_detail(
|
||||
):
|
||||
time.sleep(sleep)
|
||||
short_change_id = written_url.split("/")[-1]
|
||||
|
||||
username = os.environ.get("GERRIT_USERNAME")
|
||||
http_password = os.environ.get("GERRIT_HTTP_PASSWORD")
|
||||
|
||||
api_url = f"https://gerrit.wikimedia.org/r/changes/{short_change_id}/detail"
|
||||
|
||||
response = requests.get(api_url, headers={'Content-Type': 'application/json'})
|
||||
response = requests.get(api_url, auth=(username, http_password), headers={'Content-Type': 'application/json'})
|
||||
|
||||
result = json.loads(response.text[5:])
|
||||
|
||||
select_change_dict = {}
|
||||
#making note of what the url was in the message
|
||||
select_change_dict['written_url_in_message'] = written_url
|
||||
result['written_url_in_message'] = written_url
|
||||
#getting ID
|
||||
select_change_dict['id'] = result['change_id']
|
||||
select_change_dict['project'] = result['project']
|
||||
@ -49,7 +56,89 @@ def query_change_detail(
|
||||
select_change_dict['reviewers'] = result['reviewers']['REVIEWER']
|
||||
print(result)
|
||||
print(select_change_dict)
|
||||
return [select_change_dict, result]
|
||||
|
||||
|
||||
def query_gerrit_changes(df, sleep=20):
|
||||
tqdm.pandas()
|
||||
#get the information from the Gerrit change from the URL that's written in the message
|
||||
def get_change_details(written_url):
|
||||
time.sleep(sleep)
|
||||
short_change_id = written_url.split("/")[-1]
|
||||
api_url = f"https://gerrit.wikimedia.org/r/changes/{short_change_id}/detail"
|
||||
|
||||
username = os.environ.get("GERRIT_USERNAME")
|
||||
http_password = os.environ.get("GERRIT_HTTP_PASSWORD")
|
||||
|
||||
response = requests.get(api_url, auth=(username, http_password), headers={'Content-Type': 'application/json'})
|
||||
if response.status_code == 200:
|
||||
try:
|
||||
result = json.loads(response.text[5:])
|
||||
return {"written_url_in_message": written_url, "full_result": result}
|
||||
except Exception as e:
|
||||
print("JSON decode error:", e)
|
||||
print("Text was:", repr(response.text))
|
||||
return {"written_url_in_message": written_url, "full_result": None}
|
||||
else:
|
||||
print("Bad response:", response.status_code, response.text)
|
||||
return {"written_url_in_message": written_url, "full_result": None}
|
||||
|
||||
def parse_selected_metadata(written_url, full_result):
|
||||
try:
|
||||
select_change_dict = {}
|
||||
select_change_dict['written_url_in_message'] = written_url
|
||||
#getting ID
|
||||
select_change_dict['id'] = full_result['change_id']
|
||||
select_change_dict['project'] = full_result['project']
|
||||
select_change_dict['description'] = full_result['subject']
|
||||
#getting owner name and email
|
||||
select_change_dict['owner_dict'] = full_result['owner']
|
||||
select_change_dict['owner_email'] = full_result['owner']['email']
|
||||
#current revision count
|
||||
select_change_dict['revision_count'] = full_result['current_revision_number']
|
||||
#insertions
|
||||
select_change_dict['code_insertions'] = full_result['insertions']
|
||||
#deletions
|
||||
select_change_dict['code_deletions'] = full_result['deletions']
|
||||
#status
|
||||
select_change_dict['status'] = full_result['status']
|
||||
#reviewers
|
||||
reviewers = full_result.get('reviewers', {}).get('REVIEWER', [])
|
||||
if reviewers:
|
||||
select_change_dict['reviewer_count'] = len(reviewers)
|
||||
select_change_dict['reviewers'] = reviewers
|
||||
else:
|
||||
select_change_dict['reviewer_count'] = "NA"
|
||||
select_change_dict['reviewers'] = "NA"
|
||||
#print(select_change_dict)
|
||||
return select_change_dict
|
||||
except Exception as e:
|
||||
print(f'Error in this URL: {written_url}')
|
||||
print(f'KeyError in this dictionary: {full_result}')
|
||||
return {}
|
||||
|
||||
df['gerrit_full_results'] = df["gerrit_urls"].progress_apply(
|
||||
lambda urls: [get_change_details(url) for url in urls] if urls else []
|
||||
)
|
||||
|
||||
df['selected_gerrit_results'] = df['gerrit_full_results'].progress_apply(
|
||||
lambda results: [parse_selected_metadata(item['written_url_in_message'], item['full_result'])
|
||||
for item in results
|
||||
if item.get('full_result') and item.get('written_url_in_message')]
|
||||
)
|
||||
|
||||
return df
|
||||
|
||||
def add_gerrit_urls_column(df, text_column="comment_text", new_column="gerrit_urls"):
|
||||
pattern = r"(https://gerrit\.wikimedia\.org[^\s]*)"
|
||||
df[new_column] = df[text_column].astype(str).apply(lambda msg: re.findall(pattern, msg))
|
||||
return df
|
||||
|
||||
if __name__ == "__main__":
|
||||
query_change_detail("https://gerrit.wikimedia.org/r/85783", 1)
|
||||
df = pd.read_csv("/home/SOC.NORTHWESTERN.EDU/nws8519/git/mw-convo-collections/072525_gerrit_collection/071425_master_discussion_data.csv")
|
||||
#df = df.head(30)
|
||||
df = add_gerrit_urls_column(df)
|
||||
df = query_gerrit_changes(df)
|
||||
df.to_csv("/home/SOC.NORTHWESTERN.EDU/nws8519/git/mw-convo-collections/072525_gerrit_collection/080425_gerrit_filled_df.csv", index=False)
|
||||
|
||||
#query_change_tail("https://gerrit.wikimedia.org/r/85783")
|
||||
|
Loading…
Reference in New Issue
Block a user