1
0

overdue backup

This commit is contained in:
Matthew Gaughan 2025-05-12 10:49:37 -05:00
parent da43eb1006
commit 9c7ab02e3d
10 changed files with 1370477 additions and 4631 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -10,8 +10,13 @@ repo_location = "/data/users/mgaughan/mw-repo-lifecycles/repo_artifacts/"
cst = datetime.timezone(datetime.timedelta(hours=-6)) cst = datetime.timezone(datetime.timedelta(hours=-6))
repos = { repos = {
"extensions_visualeditor" : { "siddarthvp_mwn" : {
"url": "https://gerrit.wikimedia.org/r/mediawiki/extensions/VisualEditor", "url": "https://github.com/siddharthvp/mwn",
"from_date": datetime.datetime(2010, 1, 1, 00, 00, 00, tzinfo=cst),
"to_date": datetime.datetime(2024, 12, 31, 00, 00, 00, tzinfo=cst)
},
"brynne8_mwtest" : {
"url": "https://github.com/brynne8/mwtest",
"from_date": datetime.datetime(2010, 1, 1, 00, 00, 00, tzinfo=cst), "from_date": datetime.datetime(2010, 1, 1, 00, 00, 00, tzinfo=cst),
"to_date": datetime.datetime(2024, 12, 31, 00, 00, 00, tzinfo=cst) "to_date": datetime.datetime(2024, 12, 31, 00, 00, 00, tzinfo=cst)
}, },

View File

@ -8,15 +8,14 @@ import re
import datetime import datetime
import time import time
# the query task taken from MGerlach's notebook # the query task taken from MGerlach's notebook
def query_task_tag( def query_task_tag(
tag_term, tag_term,
limit = 100, limit = 100,
ts1 = None, ts2 = None, ts1 = None, ts2 = None,
api_url_base = 'https://phabricator.wikimedia.org/api/maniphest.search', api_url_base = 'https://phabricator.wikimedia.org/api/maniphest.search',
api_token = "api-wurg254ciq5uvfxlr4rszn5ynpy4", api_token = "api-b7lr4rr2yo5kjyxwmkxqbsbelhyf",
sleep = 10 sleep = 13
): ):
''' '''
query all tasks tagged with specific tag query all tasks tagged with specific tag
@ -55,31 +54,32 @@ def query_task_tag(
} }
response = requests.get( api_url_base, params=params) response = requests.get( api_url_base, params=params)
#print(response) print(response)
result = json.loads(response.text)['result'] result = json.loads(response.text)['result']
#print(result) print(result)
## the data ## the data
data_tmp = result['data'] if result != None:
data += data_tmp data_tmp = result['data']
## check if there are more results to query data += data_tmp
cursor = result['cursor'] ## check if there are more results to query
## if after == None, no more queries cursor = result['cursor']
if cursor['after'] == None: ## if after == None, no more queries
if cursor['after'] == None:
to_query = 0
## if after != None, query next page by passing after-argument
else:
after = cursor['after']
else:
to_query = 0 to_query = 0
## if after != None, query next page by passing after-argument
else:
after = cursor['after']
return data return data
#also from MGerlach #also from MGerlach
def query_transactions_phid_task( def query_transactions_phid_task(
task_phid, task_phid,
limit = 100, limit = 100,
api_url_base = 'https://phabricator.wikimedia.org/api/transaction.search', api_url_base = 'https://phabricator.wikimedia.org/api/transaction.search',
api_token = 'api-grocgdq2767cx6v5ywckkjmetx2f', api_token = 'api-b7lr4rr2yo5kjyxwmkxqbsbelhyf',
sleep = 10, sleep = 13,
): ):
''' '''
query all transactions for a task (task_phid). query all transactions for a task (task_phid).
@ -113,6 +113,7 @@ def query_transactions_phid_task(
data += data_tmp data += data_tmp
except json.decoder.JSONDecodeError as e: except json.decoder.JSONDecodeError as e:
data = {} data = {}
break
## the data ## the data
## check if there are more results to query ## check if there are more results to query
@ -127,8 +128,8 @@ def query_transactions_phid_task(
def query_users( def query_users(
api_url_base = 'https://phabricator.wikimedia.org/api/user.search', api_url_base = 'https://phabricator.wikimedia.org/api/user.search',
api_token = 'api-grocgdq2767cx6v5ywckkjmetx2f', api_token = 'api-b7lr4rr2yo5kjyxwmkxqbsbelhyf',
sleep = 10, sleep = 13,
limit = 100, limit = 100,
): ):
time.sleep(sleep) time.sleep(sleep)
@ -152,7 +153,7 @@ def query_users(
data += data_tmp data += data_tmp
except json.decoder.JSONDecodeError as e: except json.decoder.JSONDecodeError as e:
data = {} data = {}
break
## the data ## the data
## check if there are more results to query ## check if there are more results to query
cursor = result['cursor'] cursor = result['cursor']
@ -174,11 +175,12 @@ if __name__ == "__main__":
] ]
tag = "http" tag = "http"
#set phabricator api token #set phabricator api token
token = "api-wurg254ciq5uvfxlr4rszn5ynpy4" token = "api-b7lr4rr2yo5kjyxwmkxqbsbelhyf"
api_base = 'https://phabricator.wikimedia.org/api/' api_base = 'https://phabricator.wikimedia.org/api/'
p_ts1 = int(datetime.datetime.timestamp(datetime.datetime(2014, 12, 1, 0, 0, 0))) #p_ts1 = int(datetime.datetime.timestamp(datetime.datetime(2011, 9, 1, 0, 0, 0)))
p_ts2 = int(datetime.datetime.timestamp(datetime.datetime(2015, 12, 31, 0, 0, 0))) p_ts1 = int(datetime.datetime.timestamp(datetime.datetime(2012, 6, 14, 0, 0, 0)))
p_ts2 = int(datetime.datetime.timestamp(datetime.datetime(2013, 6, 16, 0, 0, 0)))
p_data = query_task_tag(tag, ts1=p_ts1, ts2=p_ts2) p_data = query_task_tag(tag, ts1=p_ts1, ts2=p_ts2)
for entry in p_data: for entry in p_data:
@ -190,7 +192,7 @@ if __name__ == "__main__":
comments[item['id']] = item['comments'] comments[item['id']] = item['comments']
entry['task_comments'] = comments entry['task_comments'] = comments
DATA_PREFIX = "/data/users/mgaughan/mw-repo-lifecycles/phab_data/" DATA_PREFIX = "/data/users/mgaughan/mw-repo-lifecycles/phab_data/"
with open(f"{DATA_PREFIX}{tag}_2015_phab_data.json", "w") as outfile1: with open(f"{DATA_PREFIX}{tag}_06-14-2012_06-16-2013_phab_data.json", "w") as outfile1:
json.dump(p_data, outfile1) json.dump(p_data, outfile1)
''' '''
user = query_users() user = query_users()