From 4eb0b7060813e56cb2e31a6bc5d213781b5682f5 Mon Sep 17 00:00:00 2001 From: Matthew Gaughan Date: Mon, 16 Dec 2024 17:22:44 -0600 Subject: [PATCH] data collection script --- src/helper_scripts/repo_collection.py | 36 +++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/helper_scripts/repo_collection.py b/src/helper_scripts/repo_collection.py index e69de29..9de098b 100644 --- a/src/helper_scripts/repo_collection.py +++ b/src/helper_scripts/repo_collection.py @@ -0,0 +1,36 @@ +import os +import csv +import datetime + +from pyrepohistory import repo_lifecycle + +file_location = "/data/users/mgaughan/mw-repo-lifecycles/commit_data/" +repo_location = "/data/users/mgaughan/mw-repo-lifecycles/repo_artifacts/" + +cst = datetime.timezone(datetime.timedelta(hours=-6)) +repos = { + "core" : { + "url": "https://gerrit.wikimedia.org/r/mediawiki/core", + "from_date": datetime.datetime(2022, 9, 10, 00, 00, 00, tzinfo=cst), + "to_date": datetime.datetime(2022, 10, 10, 00, 00, 00, tzinfo=cst) + }, + "parsoid" :{ + "url": "https://gerrit.wikimedia.org/r/mediawiki/services/parsoid", + "from_date": datetime.datetime(2024, 6, 10, 00, 00, 00, tzinfo=cst), + "to_date": datetime.datetime(2024, 10, 10, 00, 00, 00, tzinfo=cst) + }, + "visualeditor": { + "url": "https://gerrit.wikimedia.org/r/VisualEditor/VisualEditor", + "from_date": datetime.datetime(2013, 5, 1, 00, 00, 00, tzinfo=cst), + "to_date": datetime.datetime(2013, 10, 10, 00, 00, 00, tzinfo=cst) + } + } + +for repo in repos.keys(): + print(repos[repo]) + print(repos[repo]['url']) + print(repos[repo]["from_date"]) + print(repos[repo]["to_date"]) + repo_info = repos[repo] + repo_lifecycle(repo_info['url'], repo_location + repo + "/tmp", repo_info["from_date"], repo_info["to_date"], to_save=True) +