26 lines
877 B
Python
26 lines
877 B
Python
|
import csv
|
||
|
import io
|
||
|
import shutil
|
||
|
import os
|
||
|
from random import sample
|
||
|
|
||
|
readme_wd = "/data/users/mgaughan/kkex/time_specific_files/partitioned_readme"
|
||
|
contributing_wd = "/data/users/mgaughan/kkex/time_specific_files/partitioned_contributing"
|
||
|
|
||
|
|
||
|
def sample_from_doc(sample_k, doc_directory):
|
||
|
subdirs = os.listdir(doc_directory)
|
||
|
for dir in subdirs:
|
||
|
print(dir)
|
||
|
files = os.listdir(doc_directory + "/" + dir)
|
||
|
final_sampled = []
|
||
|
while len(final_sampled) < sample_k:
|
||
|
trial_sample = sample(files, 1)[0]
|
||
|
with open(doc_directory + "/" + dir + "/" + trial_sample,"r") as f:
|
||
|
file_length = len(f.readlines())
|
||
|
if file_length >= 10:
|
||
|
final_sampled.append([trial_sample, file_length])
|
||
|
print(final_sampled)
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
sample_from_doc(3, readme_wd)
|