From 83e668bfe58b601284bfcabe11f7fdc2ac5b950d Mon Sep 17 00:00:00 2001 From: Matthew Gaughan Date: Mon, 6 Jan 2025 12:22:42 -0600 Subject: [PATCH] updating scripts --- src/helper_scripts/dump_collector.py | 2 ++ src/lib/phab_get.py | 12 ++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/helper_scripts/dump_collector.py b/src/helper_scripts/dump_collector.py index abae71a..4534be5 100644 --- a/src/helper_scripts/dump_collector.py +++ b/src/helper_scripts/dump_collector.py @@ -229,6 +229,8 @@ def batch_parallel_for_yearly(): def batch_parallel_for_single(): urls = [] for entry in ALL_PROJECTS: + if ALL_PROJECTS.index(entry) < ALL_PROJECTS.index("skwikiquote"): + continue if entry not in YEARLY_PROJECTS and entry not in MONTHLY_PROJECTS: urls.append(f"{DUMP_LOC_PREFIX}{entry}/{DUMP}.{entry}.all-time.tsv.bz2") for url in urls: diff --git a/src/lib/phab_get.py b/src/lib/phab_get.py index 4495e98..4bd5c81 100644 --- a/src/lib/phab_get.py +++ b/src/lib/phab_get.py @@ -127,12 +127,12 @@ if __name__ == "__main__": token = "api-wurg254ciq5uvfxlr4rszn5ynpy4" api_base = 'https://phabricator.wikimedia.org/api/' - ve_ts1 = int(datetime.datetime.timestamp(datetime.datetime(2012, 1, 1, 0, 0, 0))) - ve_ts2 = int(datetime.datetime.timestamp(datetime.datetime(2014, 12, 31, 0, 0, 0))) + p_ts1 = int(datetime.datetime.timestamp(datetime.datetime(2024, 6, 10, 0, 0, 0))) + p_ts2 = int(datetime.datetime.timestamp(datetime.datetime(2024, 10, 10, 0, 0, 0))) - ve_data = query_task_tag("VisualEditor", ts1=ve_ts1, ts2=ve_ts2) + p_data = query_task_tag("Parsoid", ts1=p_ts1, ts2=p_ts2) - for entry in ve_data: + for entry in p_data: task_id = entry['phid'] print(task_id) transactions = query_transactions_phid_task(task_id) @@ -141,5 +141,5 @@ if __name__ == "__main__": comments[item['id']] = item['comments'] entry['task_comments'] = comments DATA_PREFIX = "/data/users/mgaughan/mw-repo-lifecycles/phab_data/" - with open(DATA_PREFIX + "visualeditor/" + "2012_1_1_to_2014_12_31.json", "w") as outfile1: - json.dump(ve_data, outfile1) + with open(DATA_PREFIX + "parsoid/" + "2024_6_10_to_2024_10_10.json", "w") as outfile1: + json.dump(p_data, outfile1)