sampling for qual_analysis
This commit is contained in:
parent
00a1c5d157
commit
ef25337e55
26
text_analysis/qual_sampling.py
Normal file
26
text_analysis/qual_sampling.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
import csv
|
||||||
|
import io
|
||||||
|
import shutil
|
||||||
|
import os
|
||||||
|
from random import sample
|
||||||
|
|
||||||
|
readme_wd = "/data/users/mgaughan/kkex/time_specific_files/partitioned_readme"
|
||||||
|
contributing_wd = "/data/users/mgaughan/kkex/time_specific_files/partitioned_contributing"
|
||||||
|
|
||||||
|
|
||||||
|
def sample_from_doc(sample_k, doc_directory):
|
||||||
|
subdirs = os.listdir(doc_directory)
|
||||||
|
for dir in subdirs:
|
||||||
|
print(dir)
|
||||||
|
files = os.listdir(doc_directory + "/" + dir)
|
||||||
|
final_sampled = []
|
||||||
|
while len(final_sampled) < sample_k:
|
||||||
|
trial_sample = sample(files, 1)[0]
|
||||||
|
with open(doc_directory + "/" + dir + "/" + trial_sample,"r") as f:
|
||||||
|
file_length = len(f.readlines())
|
||||||
|
if file_length >= 10:
|
||||||
|
final_sampled.append([trial_sample, file_length])
|
||||||
|
print(final_sampled)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sample_from_doc(3, readme_wd)
|
Loading…
Reference in New Issue
Block a user