checking files across major ds
This commit is contained in:
parent
ef25337e55
commit
e2da0d95a9
@ -2,6 +2,44 @@ import json
|
||||
import os
|
||||
import csv
|
||||
import pandas as pd
|
||||
from git import Repo
|
||||
from tqdm import tqdm
|
||||
import shutil
|
||||
|
||||
temp_dir = "/data/users/mgaughan/tmp3/"
|
||||
|
||||
def how_many_docs(dataset_csv):
|
||||
df = pd.read_csv(dataset_csv)
|
||||
project_repos = df['upstream_vcs_link'].to_list()
|
||||
print(len(project_repos))
|
||||
readme_count = 0
|
||||
contributing_count = 0
|
||||
for i in tqdm(range(len(project_repos))):
|
||||
vcs_link = project_repos[i]
|
||||
if "github" in vcs_link or "gitlab" in vcs_link:
|
||||
#making an evaluation that sub branches aren't being used and that people would fork if needed
|
||||
#this only looks at main
|
||||
vcs_link = "/".join(vcs_link.split("/")[0:5])
|
||||
full_temp_path = temp_dir + vcs_link.split('/')[4] + ".git"
|
||||
else:
|
||||
full_temp_path = temp_dir + vcs_link.split('/')[- 1] + ".git"
|
||||
vcs_link = vcs_link.strip()
|
||||
repo = Repo.clone_from(vcs_link, full_temp_path)
|
||||
files = os.listdir(full_temp_path)
|
||||
has_readme = False
|
||||
has_contributing = False
|
||||
for file in files:
|
||||
if "README" in file.upper():
|
||||
has_readme = True
|
||||
if "CONTRIBUTING" in file.upper():
|
||||
has_contributing = True
|
||||
if has_readme:
|
||||
readme_count += 1
|
||||
if has_contributing:
|
||||
contributing_count += 1
|
||||
shutil.rmtree(full_temp_path, ignore_errors=True)
|
||||
return readme_count, contributing_count
|
||||
|
||||
|
||||
|
||||
def calc_file_denom(project_name):
|
||||
@ -33,5 +71,7 @@ def for_all_projects():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
for_all_projects()
|
||||
#print(calc_file_denom("zzz-to-char"))
|
||||
#for_all_projects()
|
||||
#print(calc_file_denom("zzz-to-char"))
|
||||
readmec, contributingc = how_many_docs("final_data/deb_full_data.csv")
|
||||
print("README COUNT: " + str(readmec) + "|| CONTRIBUTING COUNT: " + str(contributingc))
|
Loading…
Reference in New Issue
Block a user