155 lines
6.6 KiB
Python
155 lines
6.6 KiB
Python
import os, sys
|
|
import json
|
|
import numpy as np
|
|
import pandas as pd
|
|
import requests
|
|
import re
|
|
import datetime
|
|
import time
|
|
from tqdm import tqdm
|
|
#from urllib.parse import quote_plus
|
|
|
|
#from requests.auth import HTTPDigestAuth
|
|
#from pygerrit2 import GerritRestAPI, HTTPBasicAuth
|
|
|
|
# format of the Gerrit links: https://gerrit.wikimedia.org/r/85783
|
|
# but needs to go in like this https://gerrit.wikimedia.org/r/changes/85783
|
|
# curl https://gerrit.wikimedia.org/r/changes/Ic69c2ad275389a31c9fbaf47f3665dcdbb7ac2af/detail
|
|
|
|
def query_change_detail(
|
|
written_url,
|
|
sleep = 10
|
|
):
|
|
time.sleep(sleep)
|
|
short_change_id = written_url.split("/")[-1]
|
|
|
|
username = os.environ.get("GERRIT_USERNAME")
|
|
http_password = os.environ.get("GERRIT_HTTP_PASSWORD")
|
|
|
|
api_url = f"https://gerrit.wikimedia.org/r/changes/{short_change_id}/detail"
|
|
|
|
response = requests.get(api_url, auth=(username, http_password), headers={'Content-Type': 'application/json'})
|
|
|
|
result = json.loads(response.text[5:])
|
|
|
|
select_change_dict = {}
|
|
#making note of what the url was in the message
|
|
select_change_dict['written_url_in_message'] = written_url
|
|
result['written_url_in_message'] = written_url
|
|
#getting ID
|
|
select_change_dict['id'] = result['change_id']
|
|
select_change_dict['project'] = result['project']
|
|
select_change_dict['description'] = result['subject']
|
|
#getting owner name and email
|
|
select_change_dict['owner_dict'] = result['owner']
|
|
select_change_dict['owner_email'] = result['owner']['email']
|
|
#current revision count
|
|
select_change_dict['revision_count'] = result['current_revision_number']
|
|
#insertions
|
|
select_change_dict['code_insertions'] = result['insertions']
|
|
#deletions
|
|
select_change_dict['code_deletions'] = result['deletions']
|
|
#status
|
|
select_change_dict['status'] = result['status']
|
|
#reviewers
|
|
select_change_dict['reviewer_count'] = len(result['reviewers']['REVIEWER'])
|
|
select_change_dict['reviewers'] = result['reviewers']['REVIEWER']
|
|
print(result)
|
|
print(select_change_dict)
|
|
return [select_change_dict, result]
|
|
|
|
|
|
def query_gerrit_changes(df, sleep=25):
|
|
tqdm.pandas()
|
|
#get the information from the Gerrit change from the URL that's written in the message
|
|
def get_change_details(written_url):
|
|
time.sleep(sleep)
|
|
#short_change_id = written_url.split("/")[-1]
|
|
match = re.search(r"https://gerrit\.wikimedia\.org/r/(?:#/c/)?([0-9]+)(?:/|$)", written_url)
|
|
if match:
|
|
short_change_id = match.group(1)
|
|
else:
|
|
print(f"Error: No change ID found in URL: {written_url}")
|
|
return {"written_url_in_message": written_url, "full_result": None}
|
|
api_url = f"https://gerrit.wikimedia.org/r/changes/{short_change_id}/detail"
|
|
|
|
username = os.environ.get("GERRIT_USERNAME")
|
|
http_password = os.environ.get("GERRIT_HTTP_PASSWORD")
|
|
|
|
response = requests.get(api_url, auth=(username, http_password), headers={'Content-Type': 'application/json'})
|
|
if response.status_code == 200:
|
|
try:
|
|
result = json.loads(response.text[5:])
|
|
return {"written_url_in_message": written_url, "full_result": result}
|
|
except Exception as e:
|
|
print("JSON decode error:", e)
|
|
print("Text was:", repr(response.text))
|
|
return {"written_url_in_message": written_url, "full_result": None}
|
|
else:
|
|
print("Bad response:", response.status_code, response.text)
|
|
return {"written_url_in_message": written_url, "full_result": None}
|
|
|
|
def parse_selected_metadata(written_url, full_result):
|
|
try:
|
|
select_change_dict = {}
|
|
select_change_dict['written_url_in_message'] = written_url
|
|
if "/#/c/" in written_url:
|
|
select_change_dict['change_type'] = "/#/c/"
|
|
else:
|
|
select_change_dict['change_type'] = "just /r/"
|
|
#getting ID
|
|
select_change_dict['id'] = full_result['change_id']
|
|
select_change_dict['project'] = full_result['project']
|
|
select_change_dict['description'] = full_result['subject']
|
|
#getting owner name and email
|
|
select_change_dict['owner_dict'] = full_result['owner']
|
|
select_change_dict['owner_email'] = full_result['owner']['email']
|
|
#current revision count
|
|
select_change_dict['revision_count'] = full_result['current_revision_number']
|
|
#insertions
|
|
select_change_dict['code_insertions'] = full_result['insertions']
|
|
#deletions
|
|
select_change_dict['code_deletions'] = full_result['deletions']
|
|
#status
|
|
select_change_dict['status'] = full_result['status']
|
|
#reviewers
|
|
reviewers = full_result.get('reviewers', {}).get('REVIEWER', [])
|
|
if reviewers:
|
|
select_change_dict['reviewer_count'] = len(reviewers)
|
|
select_change_dict['reviewers'] = reviewers
|
|
else:
|
|
select_change_dict['reviewer_count'] = "NA"
|
|
select_change_dict['reviewers'] = "NA"
|
|
#print(select_change_dict)
|
|
return select_change_dict
|
|
except Exception as e:
|
|
print(f'Error in this URL: {written_url}')
|
|
print(f'KeyError in this dictionary: {full_result}')
|
|
return {}
|
|
|
|
df['gerrit_full_results'] = df["gerrit_change_urls"].progress_apply(
|
|
lambda urls: [get_change_details(url) for url in urls] if urls else []
|
|
)
|
|
|
|
df['selected_gerrit_results'] = df['gerrit_full_results'].progress_apply(
|
|
lambda results: [parse_selected_metadata(item['written_url_in_message'], item['full_result'])
|
|
for item in results
|
|
if item.get('full_result') and item.get('written_url_in_message')]
|
|
)
|
|
|
|
return df
|
|
|
|
def add_gerrit_urls_column(df, text_column="comment_text", new_column="gerrit_change_urls"):
|
|
pattern = r"https://gerrit\.wikimedia\.org/r/(?:#/c/)?[0-9]+(?:/[^\s]*)?"
|
|
df[new_column] = df[text_column].astype(str).apply(lambda msg: re.findall(pattern, msg))
|
|
return df
|
|
|
|
if __name__ == "__main__":
|
|
df = pd.read_csv("/home/SOC.NORTHWESTERN.EDU/nws8519/git/mw-convo-collections/072525_gerrit_collection/071425_master_discussion_data.csv")
|
|
#df = df.head(300)
|
|
df = add_gerrit_urls_column(df)
|
|
df = query_gerrit_changes(df)
|
|
#df.to_csv("/home/SOC.NORTHWESTERN.EDU/nws8519/git/mw-convo-collections/072525_gerrit_collection/080725_gerrit_filled_df.csv", index=False)
|
|
#print(df)
|
|
#query_change_tail("https://gerrit.wikimedia.org/r/85783")
|