#from phabricator import Phabricator import os, sys import json import numpy as np import pandas as pd import requests import re import datetime import time # the query task taken from MGerlach's notebook def query_task_tag( tag_term, limit = 100, ts1 = None, ts2 = None, api_url_base = 'https://phabricator.wikimedia.org/api/maniphest.search', api_token = "api-b7lr4rr2yo5kjyxwmkxqbsbelhyf", sleep = 13 ): ''' query all tasks tagged with specific tag OPTIONAL: - limit (int, default=100), number of results per query, cannot be larger than 100 - ts1, ts2 (int, default = None); timewindow for creation of tasks (timestamp) - sleep (int, default = 0), sleep between each query - api_url_base (str,) is the url for the api - api_token (str, default=martins token), RETURNS: - list of dictionary. ''' time.sleep(sleep) to_query = 1 after = None data = [] # for bot frameworks # listed on the help page as of 2-12-2024 # utilizing git as their VCS while to_query == 1: time.sleep(sleep) params = { 'api.token' : api_token, 'constraints[query]':[tag_term], ## term that task is searched for with # seemed to be artificially limiting the data that was returned, unrealistically low count values #'constraints[projects]':[tag_term], ## term that task is tagged with 'constraints[createdStart]':ts1, ## timestamp task creation (min) 'constraints[createdEnd]':ts2, ## timestamp task creation (max) 'limit':limit, 'after':after, "attachments[subscribers]":"true", } response = requests.get( api_url_base, params=params) print(response) result = json.loads(response.text)['result'] print(result) ## the data if result != None: data_tmp = result['data'] data += data_tmp ## check if there are more results to query cursor = result['cursor'] ## if after == None, no more queries if cursor['after'] == None: to_query = 0 ## if after != None, query next page by passing after-argument else: after = cursor['after'] else: to_query = 0 return data #also from MGerlach def query_transactions_phid_task( task_phid, limit = 100, api_url_base = 'https://phabricator.wikimedia.org/api/transaction.search', api_token = 'api-b7lr4rr2yo5kjyxwmkxqbsbelhyf', sleep = 13, ): ''' query all transactions for a task (task_phid). OPTIONAL: - limit (int, default=100), number of results per query, cannot be larger than 100 - sleep (int, default = 0), sleep between each query - api_url_base (str,) is the url for the api - api_token (str, default=martins token), RETURNS: - list of dictionary. ''' time.sleep(sleep) to_query = 1 after = None data = [] while to_query == 1: time.sleep(sleep) params = { 'api.token' : api_token, 'objectIdentifier':task_phid, ## task-phid 'limit':limit, 'after':after, } response = requests.get(api_url_base, params=params) try: result = json.loads(response.text)['result'] data_tmp = result['data'] data += data_tmp except json.decoder.JSONDecodeError as e: data = {} break ## the data ## check if there are more results to query cursor = result['cursor'] ## if after == None, no more queries if cursor['after'] == None: to_query = 0 ## if after != None, query next page by passing after-argument else: after = cursor['after'] return data def query_users( api_url_base = 'https://phabricator.wikimedia.org/api/user.search', api_token = 'api-b7lr4rr2yo5kjyxwmkxqbsbelhyf', sleep = 13, limit = 100, ): time.sleep(sleep) to_query = 1 after = None data = [] while to_query == 1: time.sleep(sleep) params = { 'api.token' : api_token, 'constraints[nameLike]':'WMF', 'limit':limit, 'after':after, } response = requests.get( api_url_base, params=params) try: result = json.loads(response.text)['result'] data_tmp = result['data'] data += data_tmp except json.decoder.JSONDecodeError as e: data = {} break ## the data ## check if there are more results to query cursor = result['cursor'] ## if after == None, no more queries if cursor['after'] == None: to_query = 0 ## if after != None, query next page by passing after-argument else: after = cursor['after'] return data if __name__ == "__main__": # phab=Phabricator("https://phabricator.wikimedia.org/") tags = [ "http" ] tag = "http" #set phabricator api token token = "api-b7lr4rr2yo5kjyxwmkxqbsbelhyf" api_base = 'https://phabricator.wikimedia.org/api/' #p_ts1 = int(datetime.datetime.timestamp(datetime.datetime(2011, 9, 1, 0, 0, 0))) p_ts1 = int(datetime.datetime.timestamp(datetime.datetime(2013, 10, 21, 0, 0, 0))) p_ts2 = int(datetime.datetime.timestamp(datetime.datetime(2013, 12, 5, 0, 0, 0))) p_data = query_task_tag(tag, ts1=p_ts1, ts2=p_ts2) for entry in p_data: task_id = entry['phid'] print(task_id) transactions = query_transactions_phid_task(task_id) comments = {} for item in transactions: comments[item['id']] = item['comments'] entry['task_comments'] = comments DATA_PREFIX = "/data/users/mgaughan/mw-repo-lifecycles/phab_data/" with open(f"{DATA_PREFIX}{tag}_10-21-2013_12-5-2013_phab_data.json", "w") as outfile1: json.dump(p_data, outfile1) ''' user = query_users() with open(f"022825_wmf_master_phab_roster.json", "w") as outfile1: json.dump(user, outfile1) '''