26 lines
891 B
Python
26 lines
891 B
Python
import csv
|
|
import io
|
|
import shutil
|
|
import os
|
|
from random import sample
|
|
|
|
readme_wd = "/data/users/mgaughan/kkex/time_specific_files/dwo_partitioned_readme"
|
|
contributing_wd = "/data/users/mgaughan/kkex/time_specific_files/dwo_partitioned_contributing"
|
|
|
|
|
|
def sample_from_doc(sample_k, doc_directory):
|
|
subdirs = os.listdir(doc_directory)
|
|
for dir in subdirs:
|
|
print(dir)
|
|
files = os.listdir(doc_directory + "/" + dir)
|
|
final_sampled = []
|
|
while len(final_sampled) < sample_k:
|
|
trial_sample = sample(files, 1)[0]
|
|
with open(doc_directory + "/" + dir + "/" + trial_sample,"r") as f:
|
|
file_length = len(f.readlines())
|
|
if file_length >= 10:
|
|
final_sampled.append([trial_sample, file_length])
|
|
print(final_sampled)
|
|
|
|
if __name__ == "__main__":
|
|
sample_from_doc(3, contributing_wd) |