1
0
mw-convo-collections/data_collection/phab_get.py

201 lines
6.1 KiB
Python

#from phabricator import Phabricator
import os, sys
import json
import numpy as np
import pandas as pd
import requests
import re
import datetime
import time
# the query task taken from MGerlach's notebook
def query_task_tag(
tag_term,
limit = 100,
ts1 = None, ts2 = None,
api_url_base = 'https://phabricator.wikimedia.org/api/maniphest.search',
api_token = "api-b7lr4rr2yo5kjyxwmkxqbsbelhyf",
sleep = 13
):
'''
query all tasks tagged with specific tag
OPTIONAL:
- limit (int, default=100), number of results per query, cannot be larger than 100
- ts1, ts2 (int, default = None); timewindow for creation of tasks (timestamp)
- sleep (int, default = 0), sleep between each query
- api_url_base (str,) is the url for the api
- api_token (str, default=martins token),
RETURNS:
- list of dictionary.
'''
time.sleep(sleep)
to_query = 1
after = None
data = []
# for bot frameworks
# listed on the help page as of 2-12-2024
# utilizing git as their VCS
while to_query == 1:
time.sleep(sleep)
params = {
'api.token' : api_token,
'constraints[query]':[tag_term], ## term that task is searched for with
# seemed to be artificially limiting the data that was returned, unrealistically low count values
#'constraints[projects]':[tag_term], ## term that task is tagged with
'constraints[createdStart]':ts1, ## timestamp task creation (min)
'constraints[createdEnd]':ts2, ## timestamp task creation (max)
'limit':limit,
'after':after,
"attachments[subscribers]":"true",
}
response = requests.get( api_url_base, params=params)
print(response)
result = json.loads(response.text)['result']
print(result)
## the data
if result != None:
data_tmp = result['data']
data += data_tmp
## check if there are more results to query
cursor = result['cursor']
## if after == None, no more queries
if cursor['after'] == None:
to_query = 0
## if after != None, query next page by passing after-argument
else:
after = cursor['after']
else:
to_query = 0
return data
#also from MGerlach
def query_transactions_phid_task(
task_phid,
limit = 100,
api_url_base = 'https://phabricator.wikimedia.org/api/transaction.search',
api_token = 'api-b7lr4rr2yo5kjyxwmkxqbsbelhyf',
sleep = 13,
):
'''
query all transactions for a task (task_phid).
OPTIONAL:
- limit (int, default=100), number of results per query, cannot be larger than 100
- sleep (int, default = 0), sleep between each query
- api_url_base (str,) is the url for the api
- api_token (str, default=martins token),
RETURNS:
- list of dictionary.
'''
time.sleep(sleep)
to_query = 1
after = None
data = []
while to_query == 1:
time.sleep(sleep)
params = {
'api.token' : api_token,
'objectIdentifier':task_phid, ## task-phid
'limit':limit,
'after':after,
}
response = requests.get(api_url_base, params=params)
try:
result = json.loads(response.text)['result']
data_tmp = result['data']
data += data_tmp
except json.decoder.JSONDecodeError as e:
data = {}
break
## the data
## check if there are more results to query
cursor = result['cursor']
## if after == None, no more queries
if cursor['after'] == None:
to_query = 0
## if after != None, query next page by passing after-argument
else:
after = cursor['after']
return data
def query_users(
api_url_base = 'https://phabricator.wikimedia.org/api/user.search',
api_token = 'api-b7lr4rr2yo5kjyxwmkxqbsbelhyf',
sleep = 13,
limit = 100,
):
time.sleep(sleep)
to_query = 1
after = None
data = []
while to_query == 1:
time.sleep(sleep)
params = {
'api.token' : api_token,
'constraints[nameLike]':'WMF',
'limit':limit,
'after':after,
}
response = requests.get( api_url_base, params=params)
try:
result = json.loads(response.text)['result']
data_tmp = result['data']
data += data_tmp
except json.decoder.JSONDecodeError as e:
data = {}
break
## the data
## check if there are more results to query
cursor = result['cursor']
## if after == None, no more queries
if cursor['after'] == None:
to_query = 0
## if after != None, query next page by passing after-argument
else:
after = cursor['after']
return data
if __name__ == "__main__":
# phab=Phabricator("https://phabricator.wikimedia.org/")
tags = [
"http"
]
tag = "http"
#set phabricator api token
token = "api-b7lr4rr2yo5kjyxwmkxqbsbelhyf"
api_base = 'https://phabricator.wikimedia.org/api/'
#p_ts1 = int(datetime.datetime.timestamp(datetime.datetime(2011, 9, 1, 0, 0, 0)))
p_ts1 = int(datetime.datetime.timestamp(datetime.datetime(2013, 10, 21, 0, 0, 0)))
p_ts2 = int(datetime.datetime.timestamp(datetime.datetime(2013, 12, 5, 0, 0, 0)))
p_data = query_task_tag(tag, ts1=p_ts1, ts2=p_ts2)
for entry in p_data:
task_id = entry['phid']
print(task_id)
transactions = query_transactions_phid_task(task_id)
comments = {}
for item in transactions:
comments[item['id']] = item['comments']
entry['task_comments'] = comments
DATA_PREFIX = "/data/users/mgaughan/mw-repo-lifecycles/phab_data/"
with open(f"{DATA_PREFIX}{tag}_10-21-2013_12-5-2013_phab_data.json", "w") as outfile1:
json.dump(p_data, outfile1)
'''
user = query_users()
with open(f"022825_wmf_master_phab_roster.json", "w") as outfile1:
json.dump(user, outfile1)
'''