sampling for qual_analysis

This commit is contained in:
Matthew Gaughan 2024-06-16 13:40:05 -05:00
parent 00a1c5d157
commit ef25337e55

View File

@ -0,0 +1,26 @@
import csv
import io
import shutil
import os
from random import sample
readme_wd = "/data/users/mgaughan/kkex/time_specific_files/partitioned_readme"
contributing_wd = "/data/users/mgaughan/kkex/time_specific_files/partitioned_contributing"
def sample_from_doc(sample_k, doc_directory):
subdirs = os.listdir(doc_directory)
for dir in subdirs:
print(dir)
files = os.listdir(doc_directory + "/" + dir)
final_sampled = []
while len(final_sampled) < sample_k:
trial_sample = sample(files, 1)[0]
with open(doc_directory + "/" + dir + "/" + trial_sample,"r") as f:
file_length = len(f.readlines())
if file_length >= 10:
final_sampled.append([trial_sample, file_length])
print(final_sampled)
if __name__ == "__main__":
sample_from_doc(3, readme_wd)