prepping gridsearch

2024-04-30 16:30:06 -05:00 · 2024-04-30 16:30:06 -05:00 · 9f6f7e9423
commit 9f6f7e9423
parent fb1cf40591
1 changed files with 2 additions and 2 deletions
--- a/text_analysis/topicModel.py
+++ b/text_analysis/topicModel.py
@ -49,7 +49,7 @@ def preprocess(corpus_list):
    #mvp right now, can certainly be expanded as iterations of text analysis are done
    D = [[token for token in simple_preprocess(doc) if token not in stopwords]for doc in D]
    lemmatizer = WordNetLemmatizer()
-    D_lemma = [[lemmatizer.lemmatize(token) for token in doc] for doc in D]
+    D_lemma = [" ".join([lemmatizer.lemmatize(token) for token in doc]) for doc in D]
    return D_lemma

 #preparing processed data for model usage
@ -72,7 +72,7 @@ def text_preparation(lemmatized_text):
 def lda_model_identification(data_vectorized):
    lda = LatentDirichletAllocation()
    search_params = {'n_components': [5, 10, 15, 20, 25, 30], 'learning_decay': [.5, .7, .9], 'max_iter': [10, 20, 50], 'batch_size':[128, 256]}
-    model = GridSearchCV(lda, param_grid=search_params)
+    model = GridSearchCV(lda, param_grid=search_params, verbose=10)
    model.fit(data_vectorized)
    best_lda_model = model.best_estimator_
    print("Best Model's Params: ", model.best_params_)