201 lines
6.1 KiB
Python
201 lines
6.1 KiB
Python
#from phabricator import Phabricator
|
|
import os, sys
|
|
import json
|
|
import numpy as np
|
|
import pandas as pd
|
|
import requests
|
|
import re
|
|
import datetime
|
|
import time
|
|
|
|
# the query task taken from MGerlach's notebook
|
|
def query_task_tag(
|
|
tag_term,
|
|
limit = 100,
|
|
ts1 = None, ts2 = None,
|
|
api_url_base = 'https://phabricator.wikimedia.org/api/maniphest.search',
|
|
api_token = "api-b7lr4rr2yo5kjyxwmkxqbsbelhyf",
|
|
sleep = 13
|
|
):
|
|
'''
|
|
query all tasks tagged with specific tag
|
|
OPTIONAL:
|
|
- limit (int, default=100), number of results per query, cannot be larger than 100
|
|
- ts1, ts2 (int, default = None); timewindow for creation of tasks (timestamp)
|
|
- sleep (int, default = 0), sleep between each query
|
|
- api_url_base (str,) is the url for the api
|
|
- api_token (str, default=martins token),
|
|
|
|
RETURNS:
|
|
- list of dictionary.
|
|
'''
|
|
time.sleep(sleep)
|
|
to_query = 1
|
|
after = None
|
|
|
|
data = []
|
|
|
|
# for bot frameworks
|
|
# listed on the help page as of 2-12-2024
|
|
# utilizing git as their VCS
|
|
|
|
while to_query == 1:
|
|
time.sleep(sleep)
|
|
params = {
|
|
'api.token' : api_token,
|
|
'constraints[query]':[tag_term], ## term that task is searched for with
|
|
# seemed to be artificially limiting the data that was returned, unrealistically low count values
|
|
#'constraints[projects]':[tag_term], ## term that task is tagged with
|
|
'constraints[createdStart]':ts1, ## timestamp task creation (min)
|
|
'constraints[createdEnd]':ts2, ## timestamp task creation (max)
|
|
'limit':limit,
|
|
'after':after,
|
|
"attachments[subscribers]":"true",
|
|
}
|
|
|
|
response = requests.get( api_url_base, params=params)
|
|
print(response)
|
|
result = json.loads(response.text)['result']
|
|
print(result)
|
|
## the data
|
|
if result != None:
|
|
data_tmp = result['data']
|
|
data += data_tmp
|
|
## check if there are more results to query
|
|
cursor = result['cursor']
|
|
## if after == None, no more queries
|
|
if cursor['after'] == None:
|
|
to_query = 0
|
|
## if after != None, query next page by passing after-argument
|
|
else:
|
|
after = cursor['after']
|
|
else:
|
|
to_query = 0
|
|
return data
|
|
|
|
#also from MGerlach
|
|
def query_transactions_phid_task(
|
|
task_phid,
|
|
limit = 100,
|
|
api_url_base = 'https://phabricator.wikimedia.org/api/transaction.search',
|
|
api_token = 'api-b7lr4rr2yo5kjyxwmkxqbsbelhyf',
|
|
sleep = 13,
|
|
):
|
|
'''
|
|
query all transactions for a task (task_phid).
|
|
OPTIONAL:
|
|
- limit (int, default=100), number of results per query, cannot be larger than 100
|
|
- sleep (int, default = 0), sleep between each query
|
|
- api_url_base (str,) is the url for the api
|
|
- api_token (str, default=martins token),
|
|
|
|
RETURNS:
|
|
- list of dictionary.
|
|
'''
|
|
time.sleep(sleep)
|
|
to_query = 1
|
|
after = None
|
|
|
|
data = []
|
|
|
|
while to_query == 1:
|
|
time.sleep(sleep)
|
|
params = {
|
|
'api.token' : api_token,
|
|
'objectIdentifier':task_phid, ## task-phid
|
|
'limit':limit,
|
|
'after':after,
|
|
}
|
|
response = requests.get(api_url_base, params=params)
|
|
try:
|
|
result = json.loads(response.text)['result']
|
|
data_tmp = result['data']
|
|
data += data_tmp
|
|
except json.decoder.JSONDecodeError as e:
|
|
data = {}
|
|
break
|
|
|
|
## the data
|
|
## check if there are more results to query
|
|
cursor = result['cursor']
|
|
## if after == None, no more queries
|
|
if cursor['after'] == None:
|
|
to_query = 0
|
|
## if after != None, query next page by passing after-argument
|
|
else:
|
|
after = cursor['after']
|
|
return data
|
|
|
|
def query_users(
|
|
api_url_base = 'https://phabricator.wikimedia.org/api/user.search',
|
|
api_token = 'api-b7lr4rr2yo5kjyxwmkxqbsbelhyf',
|
|
sleep = 13,
|
|
limit = 100,
|
|
):
|
|
time.sleep(sleep)
|
|
to_query = 1
|
|
after = None
|
|
|
|
data = []
|
|
|
|
while to_query == 1:
|
|
time.sleep(sleep)
|
|
params = {
|
|
'api.token' : api_token,
|
|
'constraints[nameLike]':'WMF',
|
|
'limit':limit,
|
|
'after':after,
|
|
}
|
|
response = requests.get( api_url_base, params=params)
|
|
try:
|
|
result = json.loads(response.text)['result']
|
|
data_tmp = result['data']
|
|
data += data_tmp
|
|
except json.decoder.JSONDecodeError as e:
|
|
data = {}
|
|
break
|
|
## the data
|
|
## check if there are more results to query
|
|
cursor = result['cursor']
|
|
## if after == None, no more queries
|
|
if cursor['after'] == None:
|
|
to_query = 0
|
|
## if after != None, query next page by passing after-argument
|
|
else:
|
|
after = cursor['after']
|
|
return data
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# phab=Phabricator("https://phabricator.wikimedia.org/")
|
|
tags = [
|
|
"http"
|
|
]
|
|
tag = "http"
|
|
#set phabricator api token
|
|
token = "api-b7lr4rr2yo5kjyxwmkxqbsbelhyf"
|
|
api_base = 'https://phabricator.wikimedia.org/api/'
|
|
|
|
#p_ts1 = int(datetime.datetime.timestamp(datetime.datetime(2011, 9, 1, 0, 0, 0)))
|
|
p_ts1 = int(datetime.datetime.timestamp(datetime.datetime(2013, 10, 21, 0, 0, 0)))
|
|
p_ts2 = int(datetime.datetime.timestamp(datetime.datetime(2013, 12, 5, 0, 0, 0)))
|
|
|
|
p_data = query_task_tag(tag, ts1=p_ts1, ts2=p_ts2)
|
|
for entry in p_data:
|
|
task_id = entry['phid']
|
|
print(task_id)
|
|
transactions = query_transactions_phid_task(task_id)
|
|
comments = {}
|
|
for item in transactions:
|
|
comments[item['id']] = item['comments']
|
|
entry['task_comments'] = comments
|
|
DATA_PREFIX = "/data/users/mgaughan/mw-repo-lifecycles/phab_data/"
|
|
with open(f"{DATA_PREFIX}{tag}_10-21-2013_12-5-2013_phab_data.json", "w") as outfile1:
|
|
json.dump(p_data, outfile1)
|
|
'''
|
|
user = query_users()
|
|
with open(f"022825_wmf_master_phab_roster.json", "w") as outfile1:
|
|
json.dump(user, outfile1)
|
|
''' |