From 9bd4a32ed32eae67da60350adad1833113fffc2e Mon Sep 17 00:00:00 2001 From: Matthew Gaughan Date: Tue, 26 Mar 2024 10:37:11 -0500 Subject: [PATCH] updating_denom_forLR --- non_coding_measures_scrape.py | 16 ++++++++++++++- redo_denom.py | 37 +++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 redo_denom.py diff --git a/non_coding_measures_scrape.py b/non_coding_measures_scrape.py index b96b907..0539571 100644 --- a/non_coding_measures_scrape.py +++ b/non_coding_measures_scrape.py @@ -101,6 +101,20 @@ def consolidate_rosters(): print(row) +def for_single_project(repo_name, project_name): + try: + octohatrack_results = subprocess.run(['python3', '-m', 'octohatrack', repo_name, '--wait-for-reset'], capture_output = True, text=True, timeout=900).stdout + except (subprocess.TimeoutExpired, TypeError) as e: + print(e) + return + split_results = octohatrack_results.split("\n") + print(split_results) + for entry in split_results: + if "{'api_contributors'" in entry: + formatted = ast.literal_eval(entry) + with open('/data/users/mgaughan/kkex/contrib_uni_rosters_013124/' + 'contrib_roster_' + project_name + '.json', 'w') as data_path: + json.dump(formatted, data_path) + if __name__ == "__main__": - main() \ No newline at end of file + for_single_project("agateau/yokadi", "yokadi") \ No newline at end of file diff --git a/redo_denom.py b/redo_denom.py new file mode 100644 index 0000000..f368ca4 --- /dev/null +++ b/redo_denom.py @@ -0,0 +1,37 @@ +import json +import os +import csv +import pandas as pd + + +def calc_file_denom(project_name): + with open('/data/users/mgaughan/kkex/contrib_uni_rosters_013124/' + 'contrib_roster_' + project_name + '.json') as file: + data = json.load(file) + print(len(data['api_contributors']) + len(data['issue_pr_contributors']) + len(data['file_contributors']) + len(data['wiki_contributors'])) + running_roster = data['api_contributors'] + for individual in data['issue_pr_contributors']: + if individual not in running_roster: + running_roster.append(individual) + for individual in data['file_contributors']: + if individual not in running_roster: + running_roster.append(individual) + for individual in data['wiki_contributors']: + if individual not in running_roster: + running_roster.append(individual) + return len(running_roster) + +def for_all_projects(): + with open('final_data/deb_octo_data.csv', newline='') as csvfile: + reader = csv.DictReader(csvfile) + with open('new_denom_032624.csv', 'w', newline='') as writefile: + keys = ["project_name","underproduction_mean","underproduction_low","underproduction_high","debian_vcs_link","upstream_vcs_link","age_of_project","contributors","collaborators","milestone_count", "api_contrib_count", "issue_contrib_count", "file_contrib_count", "wiki_contrib_count", "contrib_denom"] + writer = csv.DictWriter(writefile, fieldnames=keys) + for row in reader: + row['contrib_denom'] = calc_file_denom(row['project_name']) + #print(row) + writer.writerow(row) + + +if __name__ == "__main__": + for_all_projects() + #print(calc_file_denom("zzz-to-char")) \ No newline at end of file