1
0

data collection script

This commit is contained in:
Matthew Gaughan 2024-12-16 17:22:44 -06:00
parent a30708557f
commit 4eb0b70608

View File

@ -0,0 +1,36 @@
import os
import csv
import datetime
from pyrepohistory import repo_lifecycle
file_location = "/data/users/mgaughan/mw-repo-lifecycles/commit_data/"
repo_location = "/data/users/mgaughan/mw-repo-lifecycles/repo_artifacts/"
cst = datetime.timezone(datetime.timedelta(hours=-6))
repos = {
"core" : {
"url": "https://gerrit.wikimedia.org/r/mediawiki/core",
"from_date": datetime.datetime(2022, 9, 10, 00, 00, 00, tzinfo=cst),
"to_date": datetime.datetime(2022, 10, 10, 00, 00, 00, tzinfo=cst)
},
"parsoid" :{
"url": "https://gerrit.wikimedia.org/r/mediawiki/services/parsoid",
"from_date": datetime.datetime(2024, 6, 10, 00, 00, 00, tzinfo=cst),
"to_date": datetime.datetime(2024, 10, 10, 00, 00, 00, tzinfo=cst)
},
"visualeditor": {
"url": "https://gerrit.wikimedia.org/r/VisualEditor/VisualEditor",
"from_date": datetime.datetime(2013, 5, 1, 00, 00, 00, tzinfo=cst),
"to_date": datetime.datetime(2013, 10, 10, 00, 00, 00, tzinfo=cst)
}
}
for repo in repos.keys():
print(repos[repo])
print(repos[repo]['url'])
print(repos[repo]["from_date"])
print(repos[repo]["to_date"])
repo_info = repos[repo]
repo_lifecycle(repo_info['url'], repo_location + repo + "/tmp", repo_info["from_date"], repo_info["to_date"], to_save=True)