1
0

building out the gerrit data collection

This commit is contained in:
Matthew Gaughan 2025-07-25 22:28:09 -05:00
parent 06c12c2168
commit 6a760decfe
3 changed files with 151744 additions and 1 deletions

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,55 @@
import os, sys
import json
import numpy as np
import pandas as pd
import requests
import re
import datetime
import time
#from urllib.parse import quote_plus
#from requests.auth import HTTPDigestAuth
#from pygerrit2 import GerritRestAPI, HTTPBasicAuth
# format of the Gerrit links: https://gerrit.wikimedia.org/r/85783
# but needs to go in like this https://gerrit.wikimedia.org/r/changes/85783
# curl https://gerrit.wikimedia.org/r/changes/Ic69c2ad275389a31c9fbaf47f3665dcdbb7ac2af/detail
def query_change_detail(
written_url,
sleep = 10
):
time.sleep(sleep)
short_change_id = written_url.split("/")[-1]
api_url = f"https://gerrit.wikimedia.org/r/changes/{short_change_id}/detail"
response = requests.get(api_url, headers={'Content-Type': 'application/json'})
result = json.loads(response.text[5:])
select_change_dict = {}
#getting ID
select_change_dict['id'] = result['change_id']
select_change_dict['project'] = result['project']
select_change_dict['description'] = result['subject']
#getting owner name and email
select_change_dict['owner_dict'] = result['owner']
select_change_dict['owner_email'] = result['owner']['email']
#current revision count
select_change_dict['revision_count'] = result['current_revision_number']
#insertions
select_change_dict['code_insertions'] = result['insertions']
#deletions
select_change_dict['code_deletions'] = result['deletions']
#status
select_change_dict['status'] = result['status']
#reviewers
select_change_dict['reviewer_count'] = len(result['reviewers']['REVIEWER'])
select_change_dict['reviewers'] = result['reviewers']['REVIEWER']
print(result)
print(select_change_dict)
if __name__ == "__main__":
query_change_detail("https://gerrit.wikimedia.org/r/85783", 1)

View File

@ -55,4 +55,4 @@ def query_changes(
if __name__ == "__main__":
query_strings = ['before:2016-12-31', 'visualeditor']
results = query_changes(query_strings)
print(results)
print(results)