1
0
mw-convo-collections/data_collection/repo_collection.py

41 lines
1.4 KiB
Python

import os
import csv
import datetime
from pyrepohistory import repo_lifecycle
file_location = "/data/users/mgaughan/mw-repo-lifecycles/commit_data/"
repo_location = "/data/users/mgaughan/mw-repo-lifecycles/repo_artifacts/"
cst = datetime.timezone(datetime.timedelta(hours=-6))
repos = {
"siddarthvp_mwn" : {
"url": "https://github.com/siddharthvp/mwn",
"from_date": datetime.datetime(2010, 1, 1, 00, 00, 00, tzinfo=cst),
"to_date": datetime.datetime(2024, 12, 31, 00, 00, 00, tzinfo=cst)
},
"brynne8_mwtest" : {
"url": "https://github.com/brynne8/mwtest",
"from_date": datetime.datetime(2010, 1, 1, 00, 00, 00, tzinfo=cst),
"to_date": datetime.datetime(2024, 12, 31, 00, 00, 00, tzinfo=cst)
},
}
'''
"parsoid" : {
"url": "https://gerrit.wikimedia.org/r/pywikibot/core",
"from_date": datetime.datetime(2010, 1, 1, 00, 00, 00, tzinfo=cst),
"to_date": datetime.datetime(2024, 12, 31, 00, 00, 00, tzinfo=cst)
}
}
'''
for repo in repos.keys():
print(repos[repo])
print(repos[repo]['url'])
print(repos[repo]["from_date"])
print(repos[repo]["to_date"])
repo_info = repos[repo]
repo_lifecycle(repo_info['url'], repo_location + repo + "/tmp", repo_info["from_date"], repo_info["to_date"], to_save=False, csv_loc_prefix=file_location)