1
0

phabricator script

This commit is contained in:
Matthew Gaughan 2024-12-18 16:55:11 -06:00
parent db6b140748
commit 80d12c0a1f
6 changed files with 372 additions and 1 deletions

0
src/expand_dumps.ipynb Normal file
View File

3
src/get_dumps.py Normal file
View File

@ -0,0 +1,3 @@
import wget
import os
import sys

View File

@ -32,5 +32,5 @@ for repo in repos.keys():
print(repos[repo]["from_date"])
print(repos[repo]["to_date"])
repo_info = repos[repo]
repo_lifecycle(repo_info['url'], repo_location + repo + "/tmp", repo_info["from_date"], repo_info["to_date"], to_save=True)
repo_lifecycle(repo_info['url'], repo_location + repo + "/tmp", repo_info["from_date"], repo_info["to_date"], to_save=True, csv_loc_prefix=file_location)

View File

@ -0,0 +1,3 @@
import os
import sys
import wget

303
src/lib/phab_get.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@ -8,8 +8,70 @@ import re
import datetime
# phab=Phabricator("https://phabricator.wikimedia.org/")
tags = [
"VisualEditor",
"Parsoid"
]
#set phabricator api token
token = "api-wurg254ciq5uvfxlr4rszn5ynpy4"
api_base = 'https://phabricator.wikimedia.org/api/'
# the query task taken from MGerlach's notebook
def query_task_tag(
tag_term,
limit = 100,
ts1 = None, ts2 = None,
api_url_base = 'https://phabricator.wikimedia.org/api/maniphest.search',
api_token = "api-wurg254ciq5uvfxlr4rszn5ynpy4"',
sleep = 1,
):
'''
query all tasks tagged with specific tag
OPTIONAL:
- limit (int, default=100), number of results per query, cannot be larger than 100
- ts1, ts2 (int, default = None); timewindow for creation of tasks (timestamp)
- sleep (int, default = 0), sleep between each query
- api_url_base (str,) is the url for the api
- api_token (str, default=martins token),
RETURNS:
- list of dictionary.
'''
to_query = 1
after = None
data = []
while to_query == 1:
params = {
'api.token' : api_token,
'constraints[tags]':tag_term, ## term that task is tagged with
'constraints[createdStart]':ts1, ## timestamp task creation (min)
'constraints[createdEnd]':ts2, ## timestamp task creation (max)
'limit':limit,
'after':after,
"attachments[subscribers]":"true",
}
response = requests.get( api_url_base, params=params)
result = json.loads(response.text)['result']
## the data
data_tmp = result['data']
data += data_tmp
## check if there are more results to query
cursor = result['cursor']
## if after == None, no more queries
if cursor['after'] == None:
to_query = 0
## if after != None, query next page by passing after-argument
else:
after = cursor['after']
return data
# for the search criteria of
# the umbrella tag VisualEditor