backup for expanded contributor data
This commit is contained in:
parent
2473daf7f2
commit
1ae6c6ce7e
2129
011824_uni_contrib.csv
Normal file
2129
011824_uni_contrib.csv
Normal file
File diff suppressed because it is too large
Load Diff
21
cleaning_contrib_files.py
Normal file
21
cleaning_contrib_files.py
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
import csv
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
def csv_count():
|
||||||
|
with open("011824_uni_contrib.csv", "r") as file:
|
||||||
|
reader = csv.reader(file)
|
||||||
|
true_rep_counter = 0
|
||||||
|
for i, line in enumerate(reader):
|
||||||
|
if line[2] == line[3] == line[4] == line[5] == '0':
|
||||||
|
print("zeroes")
|
||||||
|
else:
|
||||||
|
print(line)
|
||||||
|
true_rep_counter += 1
|
||||||
|
print(true_rep_counter)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
csv_count()
|
||||||
|
|
||||||
|
|
@ -7,6 +7,7 @@ nltk.download('wordnet')
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
instructions_dir = "/data/users/mgaughan/kkex_contrib_files_122023/contribute_inst/"
|
instructions_dir = "/data/users/mgaughan/kkex_contrib_files_122023/contribute_inst/"
|
||||||
|
all_word_counts = []
|
||||||
for filename in os.listdir(instructions_dir):
|
for filename in os.listdir(instructions_dir):
|
||||||
instructions_metadata = {}
|
instructions_metadata = {}
|
||||||
print(filename)
|
print(filename)
|
||||||
@ -21,7 +22,11 @@ def main():
|
|||||||
# pulling whether or not keywords like "Checklist" or "Process" occur?
|
# pulling whether or not keywords like "Checklist" or "Process" occur?
|
||||||
# pulling whether "HOWTO" occurs?
|
# pulling whether "HOWTO" occurs?
|
||||||
unique_word_count = len(lemmatized_words)
|
unique_word_count = len(lemmatized_words)
|
||||||
|
if "checklist" in lemmatized_words or "process" in lemmatized_words:
|
||||||
|
print('contains keyword')
|
||||||
print(word_count)
|
print(word_count)
|
||||||
|
all_word_counts.append(unique_word_count)
|
||||||
|
print(sum(all_word_counts)/len(all_word_counts))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ import ast
|
|||||||
|
|
||||||
#sys.path.append('../octohatrack')
|
#sys.path.append('../octohatrack')
|
||||||
#import octohatrack.__main__ as oh
|
#import octohatrack.__main__ as oh
|
||||||
csv_path = "011523_uni_contrib.csv"
|
csv_path = "011824_uni_contrib.csv"
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
wd = os.getcwd()
|
wd = os.getcwd()
|
||||||
@ -24,19 +24,18 @@ def main():
|
|||||||
if "github" not in line[5]:
|
if "github" not in line[5]:
|
||||||
continue
|
continue
|
||||||
repo_name = line[5].strip()[19:]
|
repo_name = line[5].strip()[19:]
|
||||||
|
print(repo_name)
|
||||||
project_dict["project_name"] = repo_name.split("/")[1]
|
project_dict["project_name"] = repo_name.split("/")[1]
|
||||||
project_dict["project_owner"]= repo_name.split("/")[0]
|
project_dict["project_owner"]= repo_name.split("/")[0]
|
||||||
os.chdir("../octohatrack")
|
os.chdir("../octohatrack")
|
||||||
os.environ["GITHUB_TOKEN"] =
|
|
||||||
try:
|
try:
|
||||||
octohatrack_results = subprocess.run(['python3', '-m', 'octohatrack', repo_name, '--wait-for-reset'], capture_output = True, text=True).stdout
|
octohatrack_results = subprocess.run(['python3', '-m', 'octohatrack', repo_name, '--wait-for-reset'], capture_output = True, text=True, timeout=60).stdout
|
||||||
except:
|
except:
|
||||||
print("issue with the repository, string indices must be integers")
|
print("issue with the repository, string indices must be integers")
|
||||||
continue
|
continue
|
||||||
os.chdir(wd)
|
os.chdir(wd)
|
||||||
split_results = octohatrack_results.split("\n")
|
split_results = octohatrack_results.split("\n")
|
||||||
for entry in split_results:
|
for entry in split_results:
|
||||||
print("-------------")
|
|
||||||
if "{'api_contributors'" in entry:
|
if "{'api_contributors'" in entry:
|
||||||
formatted = ast.literal_eval(entry)
|
formatted = ast.literal_eval(entry)
|
||||||
project_dict["api_contrib_count"] = len(formatted['api_contributors'])
|
project_dict["api_contrib_count"] = len(formatted['api_contributors'])
|
||||||
@ -47,15 +46,10 @@ def main():
|
|||||||
#project_dict["file_contrib_list"] = formatted['file_contributors']
|
#project_dict["file_contrib_list"] = formatted['file_contributors']
|
||||||
project_dict["wiki_contrib_count"] = len(formatted['file_contributors'])
|
project_dict["wiki_contrib_count"] = len(formatted['file_contributors'])
|
||||||
#project_dict["wiki_contrib_list"] = formatted['file_contributors']
|
#project_dict["wiki_contrib_list"] = formatted['file_contributors']
|
||||||
|
print(project_dict)
|
||||||
dict_writer.writerow(project_dict)
|
dict_writer.writerow(project_dict)
|
||||||
with open('/data/users/mgaughan/kkex_contrib_uni_011523/' + 'contrib_roster_' + project_dict["project_name"] + '.json', 'w') as data_path:
|
with open('/data/users/mgaughan/b_kkex_contrib_uni_011824/' + 'contrib_roster_' + project_dict["project_name"] + '.json', 'w') as data_path:
|
||||||
json.dump(formatted, data_path)
|
json.dump(formatted, data_path)
|
||||||
#os.chdir("../octohatrack")
|
|
||||||
#subprocess.run(['python3', '-m', 'octohatrack', 'bluesky-social/atproto'])
|
|
||||||
#os.chdir(wd)
|
|
||||||
|
|
||||||
#def parse_results(string_results):
|
|
||||||
# for letter in string_results:
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
Loading…
Reference in New Issue
Block a user