sampling for qual_analysis
This commit is contained in:
parent
00a1c5d157
commit
ef25337e55
26
text_analysis/qual_sampling.py
Normal file
26
text_analysis/qual_sampling.py
Normal file
@ -0,0 +1,26 @@
|
||||
import csv
|
||||
import io
|
||||
import shutil
|
||||
import os
|
||||
from random import sample
|
||||
|
||||
readme_wd = "/data/users/mgaughan/kkex/time_specific_files/partitioned_readme"
|
||||
contributing_wd = "/data/users/mgaughan/kkex/time_specific_files/partitioned_contributing"
|
||||
|
||||
|
||||
def sample_from_doc(sample_k, doc_directory):
|
||||
subdirs = os.listdir(doc_directory)
|
||||
for dir in subdirs:
|
||||
print(dir)
|
||||
files = os.listdir(doc_directory + "/" + dir)
|
||||
final_sampled = []
|
||||
while len(final_sampled) < sample_k:
|
||||
trial_sample = sample(files, 1)[0]
|
||||
with open(doc_directory + "/" + dir + "/" + trial_sample,"r") as f:
|
||||
file_length = len(f.readlines())
|
||||
if file_length >= 10:
|
||||
final_sampled.append([trial_sample, file_length])
|
||||
print(final_sampled)
|
||||
|
||||
if __name__ == "__main__":
|
||||
sample_from_doc(3, readme_wd)
|
Loading…
Reference in New Issue
Block a user