1
0

overdue backup

This commit is contained in:
Matthew Gaughan 2025-05-12 10:49:37 -05:00
parent da43eb1006
commit 9c7ab02e3d
10 changed files with 1370477 additions and 4631 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -10,8 +10,13 @@ repo_location = "/data/users/mgaughan/mw-repo-lifecycles/repo_artifacts/"
cst = datetime.timezone(datetime.timedelta(hours=-6))
repos = {
"extensions_visualeditor" : {
"url": "https://gerrit.wikimedia.org/r/mediawiki/extensions/VisualEditor",
"siddarthvp_mwn" : {
"url": "https://github.com/siddharthvp/mwn",
"from_date": datetime.datetime(2010, 1, 1, 00, 00, 00, tzinfo=cst),
"to_date": datetime.datetime(2024, 12, 31, 00, 00, 00, tzinfo=cst)
},
"brynne8_mwtest" : {
"url": "https://github.com/brynne8/mwtest",
"from_date": datetime.datetime(2010, 1, 1, 00, 00, 00, tzinfo=cst),
"to_date": datetime.datetime(2024, 12, 31, 00, 00, 00, tzinfo=cst)
},

View File

@ -8,15 +8,14 @@ import re
import datetime
import time
# the query task taken from MGerlach's notebook
def query_task_tag(
tag_term,
limit = 100,
ts1 = None, ts2 = None,
api_url_base = 'https://phabricator.wikimedia.org/api/maniphest.search',
api_token = "api-wurg254ciq5uvfxlr4rszn5ynpy4",
sleep = 10
api_token = "api-b7lr4rr2yo5kjyxwmkxqbsbelhyf",
sleep = 13
):
'''
query all tasks tagged with specific tag
@ -55,31 +54,32 @@ def query_task_tag(
}
response = requests.get( api_url_base, params=params)
#print(response)
print(response)
result = json.loads(response.text)['result']
#print(result)
print(result)
## the data
data_tmp = result['data']
data += data_tmp
## check if there are more results to query
cursor = result['cursor']
## if after == None, no more queries
if cursor['after'] == None:
if result != None:
data_tmp = result['data']
data += data_tmp
## check if there are more results to query
cursor = result['cursor']
## if after == None, no more queries
if cursor['after'] == None:
to_query = 0
## if after != None, query next page by passing after-argument
else:
after = cursor['after']
else:
to_query = 0
## if after != None, query next page by passing after-argument
else:
after = cursor['after']
return data
#also from MGerlach
def query_transactions_phid_task(
task_phid,
limit = 100,
api_url_base = 'https://phabricator.wikimedia.org/api/transaction.search',
api_token = 'api-grocgdq2767cx6v5ywckkjmetx2f',
sleep = 10,
api_token = 'api-b7lr4rr2yo5kjyxwmkxqbsbelhyf',
sleep = 13,
):
'''
query all transactions for a task (task_phid).
@ -113,6 +113,7 @@ def query_transactions_phid_task(
data += data_tmp
except json.decoder.JSONDecodeError as e:
data = {}
break
## the data
## check if there are more results to query
@ -127,8 +128,8 @@ def query_transactions_phid_task(
def query_users(
api_url_base = 'https://phabricator.wikimedia.org/api/user.search',
api_token = 'api-grocgdq2767cx6v5ywckkjmetx2f',
sleep = 10,
api_token = 'api-b7lr4rr2yo5kjyxwmkxqbsbelhyf',
sleep = 13,
limit = 100,
):
time.sleep(sleep)
@ -152,7 +153,7 @@ def query_users(
data += data_tmp
except json.decoder.JSONDecodeError as e:
data = {}
break
## the data
## check if there are more results to query
cursor = result['cursor']
@ -174,11 +175,12 @@ if __name__ == "__main__":
]
tag = "http"
#set phabricator api token
token = "api-wurg254ciq5uvfxlr4rszn5ynpy4"
token = "api-b7lr4rr2yo5kjyxwmkxqbsbelhyf"
api_base = 'https://phabricator.wikimedia.org/api/'
p_ts1 = int(datetime.datetime.timestamp(datetime.datetime(2014, 12, 1, 0, 0, 0)))
p_ts2 = int(datetime.datetime.timestamp(datetime.datetime(2015, 12, 31, 0, 0, 0)))
#p_ts1 = int(datetime.datetime.timestamp(datetime.datetime(2011, 9, 1, 0, 0, 0)))
p_ts1 = int(datetime.datetime.timestamp(datetime.datetime(2012, 6, 14, 0, 0, 0)))
p_ts2 = int(datetime.datetime.timestamp(datetime.datetime(2013, 6, 16, 0, 0, 0)))
p_data = query_task_tag(tag, ts1=p_ts1, ts2=p_ts2)
for entry in p_data:
@ -190,7 +192,7 @@ if __name__ == "__main__":
comments[item['id']] = item['comments']
entry['task_comments'] = comments
DATA_PREFIX = "/data/users/mgaughan/mw-repo-lifecycles/phab_data/"
with open(f"{DATA_PREFIX}{tag}_2015_phab_data.json", "w") as outfile1:
with open(f"{DATA_PREFIX}{tag}_06-14-2012_06-16-2013_phab_data.json", "w") as outfile1:
json.dump(p_data, outfile1)
'''
user = query_users()