import csv import os import nltk #from nltk.stem import WordNetLemmatizer nltk.download('wordnet') def main(): instructions_dir = "/data/users/mgaughan/kkex_contrib_files_122023/contribute_inst/" for filename in os.listdir(instructions_dir): instructions_metadata = {} print(filename) with open(instructions_dir + filename, "r") as file: word_list = file.read().split() word_count = len(word_list) lemmatized_words = [] for word in word_list: lemma_word = nltk.WordNetLemmatizer().lemmatize(word) if lemma_word not in lemmatized_words: lemmatized_words.append(lemma_word) # pulling whether or not keywords like "Checklist" or "Process" occur? # pulling whether "HOWTO" occurs? unique_word_count = len(lemmatized_words) print(word_count) if __name__ == "__main__": main()