{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "e09a84d6-cbd4-4a12-8e96-3775f734a262", "metadata": {}, "outputs": [], "source": [ "import re\n", "import numpy as np\n", "import pandas as pd\n", "import glob\n", "import copy\n", "import csv\n", "from statistics import mean, median\n", "from strip_markdown import strip_markdown\n", "import joblib" ] }, { "cell_type": "code", "execution_count": 2, "id": "9483091c-ac72-415c-932d-ac7cf7970789", "metadata": {}, "outputs": [], "source": [ "import gensim\n", "import gensim.corpora as corpora\n", "from gensim.utils import simple_preprocess\n", "from gensim.models import CoherenceModel\n", "from gensim.models.phrases import Phrases\n", "\n", "from sklearn.decomposition import LatentDirichletAllocation\n", "from sklearn.model_selection import GridSearchCV\n", "from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\n", "\n", "from statistics import mode" ] }, { "cell_type": "code", "execution_count": 15, "id": "196abd6a", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[nltk_data] Downloading package wordnet to\n", "[nltk_data] /home/SOC.NORTHWESTERN.EDU/nws8519/nltk_data...\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#import nltk\n", "#nltk.download('wordnet')" ] }, { "cell_type": "code", "execution_count": 3, "id": "3da6b590-875d-478d-aaaa-de020039c519", "metadata": {}, "outputs": [], "source": [ "# spacy and nltk for lemmatization\n", "import nltk \n", "#nltk.download('stopwords')\n", "import spacy\n", "from nltk.corpus import stopwords\n", "from nltk.stem.wordnet import WordNetLemmatizer\n", "\n", "stopwords = stopwords.words('english')" ] }, { "cell_type": "code", "execution_count": 4, "id": "60c137ae-6fe9-4b03-b899-6141b1645d6b", "metadata": {}, "outputs": [], "source": [ "def metadata_for_file(file):\n", " word_list = file.split()\n", " word_count = len(word_list)\n", " #print(word_list)\n", " if word_count == 0:\n", " avg_word_length = 0\n", " else: \n", " avg_word_length = sum(map(len, word_list)) / len(word_list)\n", " #return number of paragraphs\n", " return word_count, avg_word_length" ] }, { "cell_type": "code", "execution_count": 5, "id": "2e674fef-adb4-48c9-86a0-a655c41a95f3", "metadata": {}, "outputs": [], "source": [ "def get_data_from_dir(directory):\n", " files = glob.glob(f\"{directory}/*\")\n", " data_list = []\n", " word_counts = []\n", " avg_word_lengths = []\n", " file_list = []\n", " for file in files:\n", " text = open(file, encoding='utf-8', errors='ignore').read()\n", " #here's some of the descriptive text analysis\n", " word_count, avg_word_length = metadata_for_file(text)\n", " word_counts.append(word_count)\n", " avg_word_lengths.append(avg_word_length)\n", " #adding the data to the list of text\n", " data_list.append(text)\n", " #adding filename\n", " file_list.append(file)\n", " return data_list, word_counts, avg_word_lengths, file_list" ] }, { "cell_type": "code", "execution_count": 6, "id": "2b332b10-bfc8-4566-8c52-19a8a334af00", "metadata": {}, "outputs": [], "source": [ "#preprocessing text data\n", "def preprocess(corpus_list):\n", " #extending stopwords \n", " specific_stopwords = [\"http\", \"com\", \"www\", \"org\", \"file\", \"code\", \"time\", \"software\", \"use\", \"user\", \"set\", \"line\", \"run\", \"source\", \"github\",\n", " \"lineno\", \"python\", \"php\", \"ruby\", \"api\"]\n", " stopwords.extend(specific_stopwords)\n", " D = copy.copy(corpus_list)\n", " #stripping markdown from documents\n", " D = [strip_markdown(doc) for doc in D]\n", " #strip html \n", " D = [re.sub(r'', '', doc, flags=re.DOTALL) for doc in D]\n", " #mvp right now, can certainly be expanded as iterations of text analysis are done\n", " D = [[token for token in simple_preprocess(doc) if token not in stopwords and len(token) > 2]for doc in D]\n", " lemmatizer = WordNetLemmatizer()\n", " D_lemma = [\" \".join([lemmatizer.lemmatize(token) for token in doc]) for doc in D]\n", " return D_lemma" ] }, { "cell_type": "code", "execution_count": 7, "id": "2a26b7ef-d2df-4e1d-8aeb-66706ac6cbb7", "metadata": {}, "outputs": [], "source": [ "#preparing processed data for model usage\n", "def text_preparation(lemmatized_text):\n", " #bigrams\n", " D_bigrams = copy.copy(lemmatized_text)\n", " bigram = Phrases(D_bigrams, min_count=2)\n", " for i in range(len(lemmatized_text)):\n", " for token in bigram[D_bigrams[i]]:\n", " if '_' in token:\n", " D_bigrams[i].append(token)\n", " #id2word\n", " id2word = corpora.Dictionary(D_bigrams)\n", " id2word.filter_extremes(no_below=5, no_above=0.5)\n", " #bow representation \n", " bag_of_words = [id2word.doc2bow(doc) for doc in D_bigrams]\n", " return bag_of_words, id2word" ] }, { "cell_type": "code", "execution_count": 8, "id": "24799e25-2c0c-4e16-b503-68296f604f52", "metadata": {}, "outputs": [], "source": [ "def lda_model_identification(data_vectorized):\n", " lda = LatentDirichletAllocation()\n", " search_params = {'n_components': [11], 'learning_decay': [.5, .7, .9], 'batch_size' : [128, 256] }\n", " model = GridSearchCV(lda, param_grid=search_params, verbose=10)\n", " model.fit(data_vectorized)\n", " best_lda_model = model.best_estimator_\n", " print(\"Best Model's Params: \", model.best_params_)\n", " print(\"Best Log Likelihood Score: \", model.best_score_)\n", " print(\"Model Perplexity: \", best_lda_model.perplexity(data_vectorized))" ] }, { "cell_type": "code", "execution_count": 18, "id": "d3b5785f-8272-44f5-9aee-e5f5e97452e5", "metadata": {}, "outputs": [], "source": [ "def best_lda_model(data_vectorized, vocab):\n", " lda = LatentDirichletAllocation(n_components=11, learning_decay = 0.9, batch_size = 256, max_iter = 50)\n", " id_topic = lda.fit_transform(data_vectorized)\n", " topic_words = {}\n", " for topic, comp in enumerate(lda.components_):\n", " word_idx = np.argsort(comp)[::-1][:10]\n", " topic_words[topic] = [vocab[i] for i in word_idx]\n", " for topic, words in topic_words.items():\n", " print('Topic: %d' % topic)\n", " print(' %s' % ', '.join(words))\n", " #lda.print_topics(num_words=10)\n", " joblib.dump(lda, '020325_README_lda.jl')\n", " #lda = joblib.load('0509_lda.jl')\n", " return id_topic" ] }, { "cell_type": "code", "execution_count": 10, "id": "80bcdc6c-8a3d-4738-87b5-15e6c2db3a27", "metadata": {}, "outputs": [], "source": [ "def get_most_prevalent(vect_documents, documents):\n", " lda = joblib.load('020325_README_lda.jl')\n", " distributions = lda.transform(vect_documents)\n", " most_prevalent = {0: [0, \"\"],1: [0, \"\"], 2: [0, \"\"], 3: [0, \"\"], 4: [0, \"\"], 5: [0, \"\"], 6: [0, \"\"], 7: [0, \"\"], 8: [0, \"\"], 9: [0, \"\"], 10: [0, \"\"]}\n", " for i, topic_distribution in enumerate(distributions):\n", " for j in range(11):\n", " if topic_distribution[j] > most_prevalent[j][0]:\n", " most_prevalent[j] = [topic_distribution[j], documents[i]]\n", " print(most_prevalent)\n", " return most_prevalent\n" ] }, { "cell_type": "code", "execution_count": 23, "id": "3afd27af-8e8f-43c0-8610-06f7a68d5aec", "metadata": {}, "outputs": [], "source": [ "def prevalent_topics(vect_documents, file_list):\n", " lda = joblib.load('020325_README_lda.jl')\n", " #lda = joblib.load('0514_contrib_lda.jl')\n", " distributions = lda.transform(vect_documents)\n", " #figuring out what the max distribution is and then figuring out the mode\n", " top_topic = []\n", " count_of_multiple = 0\n", " topic_arrays = []\n", " for i, topic_distribution in enumerate(distributions):\n", " max_dist = max(topic_distribution)\n", " indexes = np.where(topic_distribution == max_dist)[0]\n", " if len(indexes) == 1:\n", " top_topic.append(indexes[0])\n", " else:\n", " count_of_multiple += 1\n", " topic_arrays.append(topic_distribution)\n", " #most_frequent(top_topic)\n", " print(count_of_multiple)\n", " df = pd.DataFrame(topic_arrays)\n", " #finding the distribution values for all documents\n", " with open('020325_README_file_topic_distributions.csv', 'w', newline='') as csvfile:\n", " fieldnames = ['filename', 't0', 't1', 't2', 't3', 't4', 't5', 't6', 't7', 't8', 't9', 't10']\n", " writer = csv.DictWriter(csvfile, fieldnames=fieldnames)\n", " writer.writeheader()\n", " for i, row in df.iterrows():\n", " project_dir = {}\n", " project_dir['filename'] = file_list[i].split(\"/\")[-1]\n", " array_row = df.iloc[i].to_numpy()\n", " for j in range(11):\n", " project_dir[\"t\" + str(j)] = array_row[j]\n", " writer.writerow(project_dir)\n", " #print(df.sort_values(by=['0']).head(5))\n", " for i in range(11):\n", " print(\"-----------------------Topic \" + str(i) + \" --------------------------------\")\n", " top5 = df.nlargest(10, i)\n", " top_indices = top5.index.to_list()\n", " print(top5)\n", " for index in top_indices:\n", " print(file_list[index])\n", " bottom5 = df.nsmallest(10, i)\n", " bottom_indices = bottom5.index.to_list()\n", " print(bottom5)\n", " for index in bottom_indices:\n", " print(file_list[index])\n", " averages = df.mean()\n", " print(averages)\n" ] }, { "cell_type": "code", "execution_count": 12, "id": "5aefbafc-0c1b-409a-afbc-655e4cef91e3", "metadata": {}, "outputs": [], "source": [ "def most_frequent(topic_prevalence):\n", " most_frequent_array = []\n", " for j in range(11):\n", " topic = mode(topic_prevalence)\n", " most_frequent_array.append(topic)\n", " topic_prevalence = [i for i in topic_prevalence if i != topic]\n", " print(most_frequent_array)" ] }, { "cell_type": "code", "execution_count": 13, "id": "69d606fd", "metadata": {}, "outputs": [], "source": [ "readme_directory = \"/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/\"" ] }, { "cell_type": "code", "execution_count": 14, "id": "1f937c2e-2714-475d-b670-602164c46642", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mean wordcount: 271.6877796091359\n", "Median wordcount: 98\n", "Mean wordlength: 6.063122274716372\n", "Median wordlength: 5.841269841269841\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/SOC.NORTHWESTERN.EDU/nws8519/anaconda3/lib/python3.12/html/parser.py:171: XMLParsedAsHTMLWarning: It looks like you're parsing an XML document using an HTML parser. If this really is an HTML document (maybe it's XHTML?), you can ignore or filter this warning. If it's XML, you should know that using an XML parser will be more reliable. To parse this document as XML, make sure you have the lxml package installed, and pass the keyword argument `features=\"xml\"` into the BeautifulSoup constructor.\n", " k = self.parse_starttag(i)\n" ] } ], "source": [ "listed_corpus, wordcounts, wordlengths, file_list = get_data_from_dir(readme_directory)\n", "print(\"Mean wordcount: \", mean(wordcounts))\n", "print(\"Median wordcount: \", median(wordcounts))\n", "print(\"Mean wordlength: \", mean(wordlengths))\n", "print(\"Median wordlength: \", median(wordlengths))\n", "lemmatized_corpus = preprocess(listed_corpus)" ] }, { "cell_type": "code", "execution_count": 15, "id": "e90e236f-8db5-40cc-88a3-60e674b9d1de", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['020325_README_vectorizer.joblib']" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "'''\n", "vectorizer = CountVectorizer(analyzer='word', \n", " min_df=2, \n", " stop_words='english', \n", " lowercase=True, \n", " token_pattern='[a-zA-Z0-9]{2,}', \n", " )\n", "data_vectorized = vectorizer.fit_transform(lemmatized_corpus)\n", "joblib.dump(vectorizer, '020325_README_vectorizer.joblib')\n", "'''\n" ] }, { "cell_type": "code", "execution_count": 16, "id": "d68aaf7b", "metadata": {}, "outputs": [], "source": [ "vectorizer = joblib.load('020325_README_vectorizer.joblib')\n", "data_vectorized = vectorizer.transform(lemmatized_corpus) " ] }, { "cell_type": "code", "execution_count": 17, "id": "dd1a70c2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fitting 5 folds for each of 6 candidates, totalling 30 fits\n", "[CV 1/5; 1/6] START batch_size=128, learning_decay=0.5, n_components=11.........\n", "[CV 1/5; 1/6] END batch_size=128, learning_decay=0.5, n_components=11;, score=-1005863.489 total time= 10.2s\n", "[CV 2/5; 1/6] START batch_size=128, learning_decay=0.5, n_components=11.........\n", "[CV 2/5; 1/6] END batch_size=128, learning_decay=0.5, n_components=11;, score=-1011357.156 total time= 10.0s\n", "[CV 3/5; 1/6] START batch_size=128, learning_decay=0.5, n_components=11.........\n", "[CV 3/5; 1/6] END batch_size=128, learning_decay=0.5, n_components=11;, score=-1015386.424 total time= 10.0s\n", "[CV 4/5; 1/6] START batch_size=128, learning_decay=0.5, n_components=11.........\n", "[CV 4/5; 1/6] END batch_size=128, learning_decay=0.5, n_components=11;, score=-965023.515 total time= 10.3s\n", "[CV 5/5; 1/6] START batch_size=128, learning_decay=0.5, n_components=11.........\n", "[CV 5/5; 1/6] END batch_size=128, learning_decay=0.5, n_components=11;, score=-994223.612 total time= 9.9s\n", "[CV 1/5; 2/6] START batch_size=128, learning_decay=0.7, n_components=11.........\n", "[CV 1/5; 2/6] END batch_size=128, learning_decay=0.7, n_components=11;, score=-1006613.702 total time= 9.9s\n", "[CV 2/5; 2/6] START batch_size=128, learning_decay=0.7, n_components=11.........\n", "[CV 2/5; 2/6] END batch_size=128, learning_decay=0.7, n_components=11;, score=-1013817.544 total time= 9.9s\n", "[CV 3/5; 2/6] START batch_size=128, learning_decay=0.7, n_components=11.........\n", "[CV 3/5; 2/6] END batch_size=128, learning_decay=0.7, n_components=11;, score=-1015692.660 total time= 10.0s\n", "[CV 4/5; 2/6] START batch_size=128, learning_decay=0.7, n_components=11.........\n", "[CV 4/5; 2/6] END batch_size=128, learning_decay=0.7, n_components=11;, score=-966771.244 total time= 10.4s\n", "[CV 5/5; 2/6] START batch_size=128, learning_decay=0.7, n_components=11.........\n", "[CV 5/5; 2/6] END batch_size=128, learning_decay=0.7, n_components=11;, score=-995596.978 total time= 10.3s\n", "[CV 1/5; 3/6] START batch_size=128, learning_decay=0.9, n_components=11.........\n", "[CV 1/5; 3/6] END batch_size=128, learning_decay=0.9, n_components=11;, score=-1005180.172 total time= 10.0s\n", "[CV 2/5; 3/6] START batch_size=128, learning_decay=0.9, n_components=11.........\n", "[CV 2/5; 3/6] END batch_size=128, learning_decay=0.9, n_components=11;, score=-1015590.801 total time= 10.6s\n", "[CV 3/5; 3/6] START batch_size=128, learning_decay=0.9, n_components=11.........\n", "[CV 3/5; 3/6] END batch_size=128, learning_decay=0.9, n_components=11;, score=-1018907.455 total time= 11.1s\n", "[CV 4/5; 3/6] START batch_size=128, learning_decay=0.9, n_components=11.........\n", "[CV 4/5; 3/6] END batch_size=128, learning_decay=0.9, n_components=11;, score=-964714.891 total time= 14.5s\n", "[CV 5/5; 3/6] START batch_size=128, learning_decay=0.9, n_components=11.........\n", "[CV 5/5; 3/6] END batch_size=128, learning_decay=0.9, n_components=11;, score=-996172.263 total time= 13.9s\n", "[CV 1/5; 4/6] START batch_size=256, learning_decay=0.5, n_components=11.........\n", "[CV 1/5; 4/6] END batch_size=256, learning_decay=0.5, n_components=11;, score=-1006251.961 total time= 13.9s\n", "[CV 2/5; 4/6] START batch_size=256, learning_decay=0.5, n_components=11.........\n", "[CV 2/5; 4/6] END batch_size=256, learning_decay=0.5, n_components=11;, score=-1016869.369 total time= 14.0s\n", "[CV 3/5; 4/6] START batch_size=256, learning_decay=0.5, n_components=11.........\n", "[CV 3/5; 4/6] END batch_size=256, learning_decay=0.5, n_components=11;, score=-1017346.297 total time= 14.2s\n", "[CV 4/5; 4/6] START batch_size=256, learning_decay=0.5, n_components=11.........\n", "[CV 4/5; 4/6] END batch_size=256, learning_decay=0.5, n_components=11;, score=-964440.209 total time= 13.9s\n", "[CV 5/5; 4/6] START batch_size=256, learning_decay=0.5, n_components=11.........\n", "[CV 5/5; 4/6] END batch_size=256, learning_decay=0.5, n_components=11;, score=-997104.875 total time= 14.1s\n", "[CV 1/5; 5/6] START batch_size=256, learning_decay=0.7, n_components=11.........\n", "[CV 1/5; 5/6] END batch_size=256, learning_decay=0.7, n_components=11;, score=-1005428.337 total time= 13.9s\n", "[CV 2/5; 5/6] START batch_size=256, learning_decay=0.7, n_components=11.........\n", "[CV 2/5; 5/6] END batch_size=256, learning_decay=0.7, n_components=11;, score=-1013241.313 total time= 14.0s\n", "[CV 3/5; 5/6] START batch_size=256, learning_decay=0.7, n_components=11.........\n", "[CV 3/5; 5/6] END batch_size=256, learning_decay=0.7, n_components=11;, score=-1014764.423 total time= 13.6s\n", "[CV 4/5; 5/6] START batch_size=256, learning_decay=0.7, n_components=11.........\n", "[CV 4/5; 5/6] END batch_size=256, learning_decay=0.7, n_components=11;, score=-964830.614 total time= 14.3s\n", "[CV 5/5; 5/6] START batch_size=256, learning_decay=0.7, n_components=11.........\n", "[CV 5/5; 5/6] END batch_size=256, learning_decay=0.7, n_components=11;, score=-996764.609 total time= 14.0s\n", "[CV 1/5; 6/6] START batch_size=256, learning_decay=0.9, n_components=11.........\n", "[CV 1/5; 6/6] END batch_size=256, learning_decay=0.9, n_components=11;, score=-1004883.859 total time= 13.6s\n", "[CV 2/5; 6/6] START batch_size=256, learning_decay=0.9, n_components=11.........\n", "[CV 2/5; 6/6] END batch_size=256, learning_decay=0.9, n_components=11;, score=-1007656.712 total time= 13.5s\n", "[CV 3/5; 6/6] START batch_size=256, learning_decay=0.9, n_components=11.........\n", "[CV 3/5; 6/6] END batch_size=256, learning_decay=0.9, n_components=11;, score=-1015740.335 total time= 14.0s\n", "[CV 4/5; 6/6] START batch_size=256, learning_decay=0.9, n_components=11.........\n", "[CV 4/5; 6/6] END batch_size=256, learning_decay=0.9, n_components=11;, score=-966718.005 total time= 13.8s\n", "[CV 5/5; 6/6] START batch_size=256, learning_decay=0.9, n_components=11.........\n", "[CV 5/5; 6/6] END batch_size=256, learning_decay=0.9, n_components=11;, score=-996819.504 total time= 13.8s\n", "Best Model's Params: {'batch_size': 256, 'learning_decay': 0.9, 'n_components': 11}\n", "Best Log Likelihood Score: -998363.6828381404\n", "Model Perplexity: 2076.905945051809\n" ] } ], "source": [ "lda_model_identification(data_vectorized)" ] }, { "cell_type": "code", "execution_count": 19, "id": "aa83d20f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Topic: 0\n", " test, library, object, google, include, class, interface, using, build, example\n", "Topic: 1\n", " server, client, option, command, network, device, port, support, interface, default\n", "Topic: 2\n", " value, function, string, data, object, return, type, table, method, error\n", "Topic: 3\n", " install, build, make, package, configure, debian, git, need, directory, gnome\n", "Topic: 4\n", " obj, filter, stream, length, type, page, count, parent, max, resource\n", "Topic: 5\n", " window, mode, color, game, key, menu, default, size, button, sound\n", "Topic: 6\n", " file, directory, path, install, make, command, default, version, option, usr\n", "Topic: 7\n", " license, version, gnu, http, public, free, general, copyright, project, install\n", "Topic: 8\n", " model, django, url, module, password, key, import, request, date, add\n", "Topic: 9\n", " library, file, version, make, module, perl, support, makefile, image, program\n", "Topic: 10\n", " html, git, copyright, license, copy, text, json, example, new, install\n" ] } ], "source": [ "topic_distributions = best_lda_model(data_vectorized, vectorizer.get_feature_names_out())" ] }, { "cell_type": "code", "execution_count": 21, "id": "f4345bd6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{0: [0.9998131703476353, '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf'], 1: [0.9936580635354768, '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/kjn_lbzip2_hullabaloo_README'], 2: [0.9992995657213791, '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/linuxmint_muffin.git_hullabaloo_README'], 3: [0.988192939654375, '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/chewing_scim-chewing.git_hullabaloo_README'], 4: [0.9964897891037261, '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/horde_webmail.git_hullabaloo_README'], 5: [0.9943880112670485, '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/state-machines_state_machines-activemodel_hullabaloo_README.md'], 6: [0.999759729377782, '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bluca_gsl_hullabaloo_README.md'], 7: [0.998666933112433, '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/astropy_photutils.git_hullabaloo_README.rst'], 8: [0.9996679425883734, '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md'], 9: [0.99815957978939, '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/agmartin_linuxdoc-tools_hullabaloo_README'], 10: [0.9996663626936376, '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt']}\n" ] }, { "data": { "text/plain": [ "{0: [0.9998131703476353,\n", " '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf'],\n", " 1: [0.9936580635354768,\n", " '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/kjn_lbzip2_hullabaloo_README'],\n", " 2: [0.9992995657213791,\n", " '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/linuxmint_muffin.git_hullabaloo_README'],\n", " 3: [0.988192939654375,\n", " '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/chewing_scim-chewing.git_hullabaloo_README'],\n", " 4: [0.9964897891037261,\n", " '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/horde_webmail.git_hullabaloo_README'],\n", " 5: [0.9943880112670485,\n", " '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/state-machines_state_machines-activemodel_hullabaloo_README.md'],\n", " 6: [0.999759729377782,\n", " '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bluca_gsl_hullabaloo_README.md'],\n", " 7: [0.998666933112433,\n", " '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/astropy_photutils.git_hullabaloo_README.rst'],\n", " 8: [0.9996679425883734,\n", " '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md'],\n", " 9: [0.99815957978939,\n", " '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/agmartin_linuxdoc-tools_hullabaloo_README'],\n", " 10: [0.9996663626936376,\n", " '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt']}" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_most_prevalent(data_vectorized, file_list)" ] }, { "cell_type": "code", "execution_count": 24, "id": "23468e82", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "349\n", "-----------------------Topic 0 --------------------------------\n", " 0 1 2 3 4 5 6 \\\n", "3551 0.984847 0.001515 0.001515 0.001515 0.001515 0.001515 0.001515 \n", "3413 0.981059 0.001894 0.001894 0.001894 0.001894 0.001894 0.001894 \n", "3396 0.973259 0.002674 0.002674 0.002674 0.002674 0.002675 0.002674 \n", "1240 0.965032 0.003497 0.003497 0.003497 0.003497 0.003497 0.003497 \n", "946 0.960470 0.003953 0.003953 0.003953 0.003953 0.003953 0.003953 \n", "2914 0.958673 0.004133 0.004132 0.004133 0.004133 0.004132 0.004133 \n", "225 0.954660 0.000918 0.000918 0.000918 0.000918 0.000918 0.000918 \n", "2355 0.943176 0.005683 0.005682 0.005682 0.005682 0.005682 0.005682 \n", "2913 0.943019 0.016762 0.000654 0.000654 0.000654 0.000654 0.000654 \n", "901 0.942914 0.001421 0.001420 0.044301 0.001421 0.001421 0.001421 \n", "\n", " 7 8 9 10 \n", "3551 0.001515 0.001515 0.001515 0.001515 \n", "3413 0.001894 0.001894 0.001894 0.001894 \n", "3396 0.002674 0.002674 0.002674 0.002674 \n", "1240 0.003497 0.003497 0.003497 0.003497 \n", "946 0.003953 0.003953 0.003953 0.003953 \n", "2914 0.004133 0.004133 0.004133 0.004133 \n", "225 0.000918 0.000918 0.037075 0.000918 \n", "2355 0.005682 0.005682 0.005682 0.005682 \n", "2913 0.000654 0.034986 0.000654 0.000654 \n", "901 0.001421 0.001420 0.001421 0.001421 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/pytest-dev_pytest-runner.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/twisted_pydoctor_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/pd-externals_ggee_hullabaloo_README.ggext\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/biojava_biojava.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/LLNL_sundials.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/ClusterLabs_pacemaker_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/wolever_parameterized_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jupyter_nbconvert_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/pymodbus-dev_pymodbus.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/zopefoundation_zope.proxy_hullabaloo_README.txt\n", " 0 1 2 3 4 5 6 \\\n", "734 0.000018 0.009437 0.000018 0.000018 0.000018 0.273398 0.000018 \n", "1319 0.000018 0.583029 0.061516 0.331986 0.003909 0.019454 0.000018 \n", "3299 0.000019 0.000019 0.000019 0.000019 0.999813 0.000019 0.000019 \n", "1106 0.000030 0.029005 0.708216 0.021411 0.000030 0.000030 0.166185 \n", "2154 0.000033 0.000033 0.549309 0.450392 0.000033 0.000033 0.000033 \n", "3771 0.000033 0.000033 0.346634 0.000033 0.000033 0.000033 0.000033 \n", "1435 0.000038 0.158357 0.000038 0.082379 0.000038 0.685618 0.000038 \n", "2081 0.000039 0.278867 0.041405 0.000039 0.663507 0.000039 0.015947 \n", "3589 0.000043 0.000043 0.625379 0.000043 0.001690 0.093226 0.000043 \n", "831 0.000043 0.000043 0.150073 0.000043 0.000043 0.842702 0.000043 \n", "\n", " 7 8 9 10 \n", "734 0.008753 0.000018 0.690342 0.017963 \n", "1319 0.000018 0.000018 0.000018 0.000018 \n", "3299 0.000019 0.000019 0.000019 0.000019 \n", "1106 0.028543 0.000030 0.046489 0.000030 \n", "2154 0.000033 0.000033 0.000033 0.000033 \n", "3771 0.000033 0.000033 0.653065 0.000033 \n", "1435 0.005478 0.006674 0.061303 0.000038 \n", "2081 0.000039 0.000039 0.000039 0.000039 \n", "3589 0.000043 0.000043 0.279405 0.000043 \n", "831 0.003522 0.000043 0.003402 0.000043 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/audacity_audacity.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/andrikos_kismet-debian_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/darold_ora2pg.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/iortcw_iortcw.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/epam_nfstrace_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/GNOME_genius_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jmtd_wadc_hullabaloo_README.md\n", "-----------------------Topic 1 --------------------------------\n", " 0 1 2 3 4 5 6 \\\n", "2395 0.000275 0.997253 0.000275 0.000275 0.000275 0.000275 0.000275 \n", "80 0.000758 0.992423 0.000758 0.000758 0.000758 0.000758 0.000758 \n", "534 0.000716 0.983133 0.000716 0.000716 0.010424 0.000716 0.000716 \n", "62 0.000928 0.979982 0.000928 0.000928 0.011669 0.000928 0.000928 \n", "1066 0.000866 0.971457 0.000866 0.000866 0.020751 0.000866 0.000866 \n", "98 0.024042 0.967857 0.000900 0.000900 0.000900 0.000900 0.000900 \n", "4070 0.001299 0.952846 0.001299 0.001299 0.001299 0.001299 0.001299 \n", "3923 0.005051 0.949491 0.005051 0.005051 0.005051 0.005051 0.005051 \n", "229 0.024377 0.947226 0.000168 0.000168 0.000168 0.000168 0.000168 \n", "3313 0.005348 0.946520 0.005348 0.005348 0.005348 0.005348 0.005348 \n", "\n", " 7 8 9 10 \n", "2395 0.000275 0.000275 0.000275 0.000275 \n", "80 0.000758 0.000758 0.000758 0.000758 \n", "534 0.000716 0.000716 0.000716 0.000716 \n", "62 0.000928 0.000928 0.000928 0.000928 \n", "1066 0.000866 0.000866 0.000866 0.000866 \n", "98 0.000900 0.000900 0.000900 0.000900 \n", "4070 0.035465 0.001299 0.001299 0.001299 \n", "3923 0.005051 0.005051 0.005051 0.005051 \n", "229 0.000168 0.000168 0.000168 0.027053 \n", "3313 0.005348 0.005348 0.005348 0.005349 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/cdidier_irssi-xmpp_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/alsa-lib.git_hullabaloo_README.aconnect\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bolt_bolt_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/batctl.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/royhills_arp-scan_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/ThomasHabets_arping_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/raboof_nethogs_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/python-zeroconf_python-zeroconf.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/lxc_lxc.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/troglobit_smcroute.git_hullabaloo_README\n", " 0 1 2 3 4 5 6 \\\n", "3299 0.000019 0.000019 0.000019 0.000019 0.999813 0.000019 0.000019 \n", "2154 0.000033 0.000033 0.549309 0.450392 0.000033 0.000033 0.000033 \n", "3771 0.000033 0.000033 0.346634 0.000033 0.000033 0.000033 0.000033 \n", "3589 0.000043 0.000043 0.625379 0.000043 0.001690 0.093226 0.000043 \n", "831 0.000043 0.000043 0.150073 0.000043 0.000043 0.842702 0.000043 \n", "2060 0.236422 0.000045 0.312054 0.000045 0.365046 0.000045 0.000045 \n", "2514 0.895267 0.000045 0.000045 0.000045 0.017315 0.000045 0.023678 \n", "2065 0.192353 0.000046 0.581575 0.000046 0.000046 0.098829 0.000046 \n", "3934 0.000048 0.000048 0.000048 0.000048 0.000048 0.945055 0.054512 \n", "1907 0.000050 0.000050 0.014488 0.000050 0.000050 0.985059 0.000050 \n", "\n", " 7 8 9 10 \n", "3299 0.000019 0.000019 0.000019 0.000019 \n", "2154 0.000033 0.000033 0.000033 0.000033 \n", "3771 0.000033 0.000033 0.653065 0.000033 \n", "3589 0.000043 0.000043 0.279405 0.000043 \n", "831 0.003522 0.000043 0.003402 0.000043 \n", "2060 0.000045 0.000045 0.078141 0.008067 \n", "2514 0.019573 0.000045 0.043894 0.000045 \n", "2065 0.000046 0.000046 0.010390 0.116578 \n", "3934 0.000048 0.000048 0.000048 0.000048 \n", "1907 0.000050 0.000050 0.000050 0.000050 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/GNOME_genius_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jmtd_wadc_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/EsotericSoftware_kryo.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/kjn_lbzip2_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enthought_mayavi.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jrincayc_ucblogo-code.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/trackballs_trackballs.git_hullabaloo_README.html\n", "-----------------------Topic 2 --------------------------------\n", " 0 1 2 3 4 5 6 \\\n", "1202 0.000909 0.000909 0.990908 0.000909 0.000909 0.000909 0.000909 \n", "2504 0.001151 0.001151 0.988492 0.001151 0.001151 0.001151 0.001151 \n", "1512 0.001166 0.001166 0.988344 0.001166 0.001166 0.001166 0.001166 \n", "1392 0.001894 0.001894 0.981059 0.001894 0.001894 0.001894 0.001894 \n", "3590 0.000248 0.000248 0.951422 0.000248 0.000248 0.000248 0.000248 \n", "2397 0.000301 0.055197 0.942094 0.000301 0.000301 0.000301 0.000301 \n", "2999 0.000107 0.000107 0.928211 0.046276 0.000107 0.000107 0.000107 \n", "1140 0.001399 0.001399 0.913971 0.001399 0.001399 0.001399 0.001399 \n", "872 0.000999 0.000999 0.908382 0.000999 0.000999 0.000999 0.000999 \n", "2833 0.000093 0.095750 0.903416 0.000093 0.000093 0.000093 0.000093 \n", "\n", " 7 8 9 10 \n", "1202 0.000909 0.000909 0.000909 0.000909 \n", "2504 0.001151 0.001151 0.001151 0.001151 \n", "1512 0.001166 0.001166 0.001166 0.001166 \n", "1392 0.001894 0.001894 0.001894 0.001894 \n", "3590 0.000248 0.000248 0.000248 0.046342 \n", "2397 0.000301 0.000301 0.000301 0.000301 \n", "2999 0.000107 0.000107 0.024656 0.000107 \n", "1140 0.001399 0.001399 0.001399 0.073441 \n", "872 0.000999 0.000999 0.000999 0.082626 \n", "2833 0.000093 0.000093 0.000093 0.000093 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/mathiasbynens_unicode-property-value-aliases-ecmascript_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/mathiasbynens_unicode-property-aliases-ecmascript_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/mathiasbynens_regenerate-unicode-properties_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jdunck_python-unicodecsv_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/nichtich_RDF-NS.git_hullabaloo_README.mkdn\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/redis_hiredis_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bearded_ruby-ldap_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/mathiasbynens_unicode-property-aliases_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/mathiasbynens_unicode-property-value-aliases.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/nicolasff_webdis_hullabaloo_README.md\n", " 0 1 2 3 4 5 6 \\\n", "734 0.000018 0.009437 0.000018 0.000018 0.000018 0.273398 0.000018 \n", "3299 0.000019 0.000019 0.000019 0.000019 0.999813 0.000019 0.000019 \n", "1604 0.003947 0.126398 0.000025 0.039761 0.000025 0.013216 0.735422 \n", "3275 0.007322 0.141429 0.000032 0.000032 0.297121 0.451645 0.000032 \n", "1435 0.000038 0.158357 0.000038 0.082379 0.000038 0.685618 0.000038 \n", "2514 0.895267 0.000045 0.000045 0.000045 0.017315 0.000045 0.023678 \n", "3934 0.000048 0.000048 0.000048 0.000048 0.000048 0.945055 0.054512 \n", "1386 0.000050 0.659359 0.000050 0.082049 0.000050 0.000050 0.079319 \n", "2133 0.000057 0.286587 0.000057 0.000057 0.000057 0.404221 0.090777 \n", "789 0.000058 0.483836 0.000058 0.000058 0.000058 0.400501 0.000058 \n", "\n", " 7 8 9 10 \n", "734 0.008753 0.000018 0.690342 0.017963 \n", "3299 0.000019 0.000019 0.000019 0.000019 \n", "1604 0.005943 0.000025 0.075213 0.000025 \n", "3275 0.000032 0.000032 0.102291 0.000032 \n", "1435 0.005478 0.006674 0.061303 0.000038 \n", "2514 0.019573 0.000045 0.043894 0.000045 \n", "3934 0.000048 0.000048 0.000048 0.000048 \n", "1386 0.178925 0.000050 0.000050 0.000050 \n", "2133 0.033438 0.000057 0.184634 0.000057 \n", "789 0.096216 0.000058 0.019038 0.000058 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/audacity_audacity.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/Xastir_Xastir.git_hullabaloo_README.1ST\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/smbolton_whysynth.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/iortcw_iortcw.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/kjn_lbzip2_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jrincayc_ucblogo-code.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/arno-iptables-firewall_aif.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/blais_xxdiff.git_hullabaloo_README.build\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/swami_swami_hullabaloo_README\n", "-----------------------Topic 3 --------------------------------\n", " 0 1 2 3 4 5 6 \\\n", "1363 0.000739 0.000739 0.000739 0.992609 0.000739 0.000739 0.000739 \n", "339 0.000805 0.000805 0.000805 0.991954 0.000805 0.000805 0.000805 \n", "4200 0.000805 0.000805 0.000805 0.991954 0.000805 0.000805 0.000805 \n", "3897 0.000819 0.000819 0.000819 0.991810 0.000819 0.000819 0.000819 \n", "1693 0.000819 0.000819 0.000819 0.991810 0.000819 0.000819 0.000819 \n", "1464 0.000850 0.000850 0.000850 0.991503 0.000850 0.000850 0.000850 \n", "3669 0.000850 0.000850 0.000850 0.991503 0.000850 0.000850 0.000850 \n", "1157 0.000978 0.000978 0.000978 0.990224 0.000978 0.000978 0.000978 \n", "1825 0.000988 0.000988 0.000988 0.990118 0.000988 0.000988 0.000988 \n", "3919 0.000988 0.000988 0.000988 0.990118 0.000988 0.000988 0.000988 \n", "\n", " 7 8 9 10 \n", "1363 0.000739 0.000739 0.000739 0.000739 \n", "339 0.000805 0.000805 0.000805 0.000805 \n", "4200 0.000805 0.000805 0.000805 0.000805 \n", "3897 0.000819 0.000819 0.000819 0.000819 \n", "1693 0.000819 0.000819 0.000819 0.000819 \n", "1464 0.000850 0.000850 0.000850 0.000850 \n", "3669 0.000850 0.000850 0.000850 0.000850 \n", "1157 0.000978 0.000978 0.000978 0.000978 \n", "1825 0.000988 0.000988 0.000988 0.000988 \n", "3919 0.000988 0.000988 0.000988 0.000988 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/mate-desktop_mate-panel.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/mate-desktop_mate-desktop.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/linuxmint_cinnamon-desktop.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/mate-desktop_mate-menus.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/linuxmint_cinnamon-menus.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/mate-desktop_mate-screensaver.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/linuxmint_cinnamon-screensaver.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/GNOME_libgudev_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/mate-desktop_mate-session-manager.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/linuxmint_cinnamon-session.git_hullabaloo_README\n", " 0 1 2 3 4 5 6 \\\n", "734 0.000018 0.009437 0.000018 0.000018 0.000018 0.273398 0.000018 \n", "3299 0.000019 0.000019 0.000019 0.000019 0.999813 0.000019 0.000019 \n", "375 0.117647 0.046106 0.014596 0.000024 0.000024 0.131388 0.000024 \n", "3275 0.007322 0.141429 0.000032 0.000032 0.297121 0.451645 0.000032 \n", "3771 0.000033 0.000033 0.346634 0.000033 0.000033 0.000033 0.000033 \n", "2081 0.000039 0.278867 0.041405 0.000039 0.663507 0.000039 0.015947 \n", "3589 0.000043 0.000043 0.625379 0.000043 0.001690 0.093226 0.000043 \n", "831 0.000043 0.000043 0.150073 0.000043 0.000043 0.842702 0.000043 \n", "3483 0.000044 0.767987 0.136634 0.000044 0.009711 0.000044 0.000044 \n", "2060 0.236422 0.000045 0.312054 0.000045 0.365046 0.000045 0.000045 \n", "\n", " 7 8 9 10 \n", "734 0.008753 0.000018 0.690342 0.017963 \n", "3299 0.000019 0.000019 0.000019 0.000019 \n", "375 0.000024 0.000024 0.004995 0.685147 \n", "3275 0.000032 0.000032 0.102291 0.000032 \n", "3771 0.000033 0.000033 0.653065 0.000033 \n", "2081 0.000039 0.000039 0.000039 0.000039 \n", "3589 0.000043 0.000043 0.279405 0.000043 \n", "831 0.003522 0.000043 0.003402 0.000043 \n", "3483 0.000044 0.000044 0.085361 0.000044 \n", "2060 0.000045 0.000045 0.078141 0.008067 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/audacity_audacity.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bluca_gsl_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/smbolton_whysynth.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/epam_nfstrace_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/GNOME_genius_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jmtd_wadc_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/OpenPrinting_foomatic-db.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/EsotericSoftware_kryo.git_hullabaloo_README.md\n", "-----------------------Topic 4 --------------------------------\n", " 0 1 2 3 4 5 6 \\\n", "3299 0.000019 0.000019 0.000019 0.000019 0.999813 0.000019 0.000019 \n", "226 0.010102 0.010103 0.010104 0.010103 0.898973 0.010103 0.010103 \n", "1042 0.022732 0.022729 0.022732 0.022727 0.772689 0.022727 0.022735 \n", "4129 0.030303 0.030303 0.030303 0.030308 0.696963 0.030303 0.030303 \n", "3097 0.030303 0.030303 0.030303 0.030303 0.696961 0.030303 0.030303 \n", "2081 0.000039 0.278867 0.041405 0.000039 0.663507 0.000039 0.015947 \n", "3127 0.018183 0.018183 0.018182 0.018184 0.606181 0.018182 0.018184 \n", "2259 0.045455 0.045455 0.045455 0.045455 0.545455 0.045455 0.045455 \n", "1226 0.045455 0.045455 0.045455 0.045455 0.545452 0.045455 0.045455 \n", "2953 0.045455 0.045456 0.045455 0.045456 0.545443 0.045455 0.045456 \n", "\n", " 7 8 9 10 \n", "3299 0.000019 0.000019 0.000019 0.000019 \n", "226 0.010101 0.010104 0.010102 0.010102 \n", "1042 0.022732 0.022735 0.022731 0.022730 \n", "4129 0.030305 0.030303 0.030303 0.030303 \n", "3097 0.030303 0.030303 0.030303 0.030311 \n", "2081 0.000039 0.000039 0.000039 0.000039 \n", "3127 0.230172 0.018182 0.018184 0.018183 \n", "2259 0.045455 0.045455 0.045455 0.045455 \n", "1226 0.045455 0.045455 0.045457 0.045455 \n", "2953 0.045455 0.045455 0.045456 0.045459 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/egh_ledger-autosync_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/fcitx_fcitx-libpinyin_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/ukui_ukui-indicators_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/ukui_ukui-sidebar_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/epam_nfstrace_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/phihag_ipaddress_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jpy-consortium_jpy_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/ntop_nDPI.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/ejeschke_ginga.git_hullabaloo_README.rst\n", " 0 1 2 3 4 5 6 \\\n", "734 0.000018 0.009437 0.000018 0.000018 0.000018 0.273398 0.000018 \n", "375 0.117647 0.046106 0.014596 0.000024 0.000024 0.131388 0.000024 \n", "1604 0.003947 0.126398 0.000025 0.039761 0.000025 0.013216 0.735422 \n", "1106 0.000030 0.029005 0.708216 0.021411 0.000030 0.000030 0.166185 \n", "2154 0.000033 0.000033 0.549309 0.450392 0.000033 0.000033 0.000033 \n", "3771 0.000033 0.000033 0.346634 0.000033 0.000033 0.000033 0.000033 \n", "1435 0.000038 0.158357 0.000038 0.082379 0.000038 0.685618 0.000038 \n", "831 0.000043 0.000043 0.150073 0.000043 0.000043 0.842702 0.000043 \n", "2468 0.013769 0.908888 0.020981 0.026627 0.000045 0.000045 0.000045 \n", "2065 0.192353 0.000046 0.581575 0.000046 0.000046 0.098829 0.000046 \n", "\n", " 7 8 9 10 \n", "734 0.008753 0.000018 0.690342 0.017963 \n", "375 0.000024 0.000024 0.004995 0.685147 \n", "1604 0.005943 0.000025 0.075213 0.000025 \n", "1106 0.028543 0.000030 0.046489 0.000030 \n", "2154 0.000033 0.000033 0.000033 0.000033 \n", "3771 0.000033 0.000033 0.653065 0.000033 \n", "1435 0.005478 0.006674 0.061303 0.000038 \n", "831 0.003522 0.000043 0.003402 0.000043 \n", "2468 0.005386 0.002043 0.022125 0.000045 \n", "2065 0.000046 0.000046 0.010390 0.116578 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/audacity_audacity.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bluca_gsl_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/Xastir_Xastir.git_hullabaloo_README.1ST\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/darold_ora2pg.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/iortcw_iortcw.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jmtd_wadc_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/ppp-project_ppp_hullabaloo_README.linux\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enthought_mayavi.git_hullabaloo_README.txt\n", "-----------------------Topic 5 --------------------------------\n", " 0 1 2 3 4 5 6 \\\n", "1907 0.000050 0.000050 0.014488 0.000050 0.000050 0.985059 0.000050 \n", "421 0.003367 0.003368 0.003367 0.003368 0.003367 0.966327 0.003367 \n", "2871 0.003788 0.003788 0.003788 0.003788 0.003788 0.962118 0.003788 \n", "3904 0.003788 0.003788 0.003788 0.003788 0.003788 0.962118 0.003788 \n", "4036 0.003788 0.003788 0.003788 0.003788 0.003788 0.962118 0.003788 \n", "3934 0.000048 0.000048 0.000048 0.000048 0.000048 0.945055 0.054512 \n", "3842 0.009091 0.009093 0.009091 0.009092 0.009091 0.909085 0.009091 \n", "1884 0.000179 0.019765 0.000179 0.069225 0.000179 0.897342 0.000179 \n", "1596 0.080111 0.000526 0.000526 0.000526 0.010355 0.897019 0.000526 \n", "3193 0.032556 0.000315 0.000315 0.032793 0.000315 0.864536 0.000315 \n", "\n", " 7 8 9 10 \n", "1907 0.000050 0.000050 0.000050 0.000050 \n", "421 0.003368 0.003367 0.003367 0.003367 \n", "2871 0.003789 0.003788 0.003788 0.003788 \n", "3904 0.003789 0.003788 0.003788 0.003788 \n", "4036 0.003789 0.003788 0.003788 0.003788 \n", "3934 0.000048 0.000048 0.000048 0.000048 \n", "3842 0.009091 0.009091 0.009092 0.009091 \n", "1884 0.012415 0.000179 0.000179 0.000179 \n", "1596 0.000525 0.000526 0.008836 0.000526 \n", "3193 0.000315 0.000315 0.067913 0.000315 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/trackballs_trackballs.git_hullabaloo_README.html\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/kodi-pvr_pvr.iptvsimple.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/kodi-pvr_pvr.njoy.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/kodi-pvr_pvr.hdhomerun.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/kodi-pvr_pvr.dvbviewer.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jrincayc_ucblogo-code.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/chrender_fizmo-ncursesw_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/callaa_luola.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/mawww_kakoune.git_hullabaloo_README.asciidoc\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bomber.git_hullabaloo_README.themes\n", " 0 1 2 3 4 5 6 \\\n", "3299 0.000019 0.000019 0.000019 0.000019 0.999813 0.000019 0.000019 \n", "3560 0.420193 0.034504 0.322584 0.020994 0.002791 0.000024 0.000024 \n", "1106 0.000030 0.029005 0.708216 0.021411 0.000030 0.000030 0.166185 \n", "2154 0.000033 0.000033 0.549309 0.450392 0.000033 0.000033 0.000033 \n", "3771 0.000033 0.000033 0.346634 0.000033 0.000033 0.000033 0.000033 \n", "2081 0.000039 0.278867 0.041405 0.000039 0.663507 0.000039 0.015947 \n", "3483 0.000044 0.767987 0.136634 0.000044 0.009711 0.000044 0.000044 \n", "2060 0.236422 0.000045 0.312054 0.000045 0.365046 0.000045 0.000045 \n", "2468 0.013769 0.908888 0.020981 0.026627 0.000045 0.000045 0.000045 \n", "2514 0.895267 0.000045 0.000045 0.000045 0.017315 0.000045 0.023678 \n", "\n", " 7 8 9 10 \n", "3299 0.000019 0.000019 0.000019 0.000019 \n", "3560 0.015793 0.104444 0.078624 0.000024 \n", "1106 0.028543 0.000030 0.046489 0.000030 \n", "2154 0.000033 0.000033 0.000033 0.000033 \n", "3771 0.000033 0.000033 0.653065 0.000033 \n", "2081 0.000039 0.000039 0.000039 0.000039 \n", "3483 0.000044 0.000044 0.085361 0.000044 \n", "2060 0.000045 0.000045 0.078141 0.008067 \n", "2468 0.005386 0.002043 0.022125 0.000045 \n", "2514 0.019573 0.000045 0.043894 0.000045 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/suds-community_suds.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/darold_ora2pg.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/epam_nfstrace_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/OpenPrinting_foomatic-db.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/EsotericSoftware_kryo.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/ppp-project_ppp_hullabaloo_README.linux\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/kjn_lbzip2_hullabaloo_README\n", "-----------------------Topic 6 --------------------------------\n", " 0 1 2 3 4 5 6 \\\n", "289 0.000215 0.000215 0.000215 0.000215 0.000215 0.000215 0.997846 \n", "1141 0.001122 0.001122 0.001122 0.001122 0.001122 0.001122 0.988775 \n", "2865 0.001421 0.001421 0.001421 0.001421 0.001420 0.001421 0.985794 \n", "2572 0.001653 0.001653 0.001653 0.001653 0.001653 0.001653 0.983470 \n", "1750 0.001894 0.001894 0.001894 0.001894 0.001894 0.001894 0.981059 \n", "510 0.003247 0.003247 0.003247 0.003247 0.003247 0.003247 0.967528 \n", "2034 0.003497 0.003497 0.003497 0.003497 0.003497 0.003497 0.965033 \n", "291 0.003789 0.003788 0.003788 0.003788 0.003788 0.003788 0.962118 \n", "156 0.004133 0.004133 0.004133 0.004133 0.004132 0.004132 0.958675 \n", "3778 0.005682 0.005682 0.005682 0.005682 0.005682 0.005682 0.943176 \n", "\n", " 7 8 9 10 \n", "289 0.000215 0.000215 0.000215 0.000215 \n", "1141 0.001122 0.001122 0.001123 0.001122 \n", "2865 0.001421 0.001421 0.001421 0.001421 \n", "2572 0.001653 0.001653 0.001653 0.001653 \n", "1750 0.001894 0.001894 0.001894 0.001894 \n", "510 0.003248 0.003247 0.003247 0.003247 \n", "2034 0.003497 0.003497 0.003497 0.003497 \n", "291 0.003788 0.003788 0.003788 0.003788 \n", "156 0.004132 0.004132 0.004133 0.004132 \n", "3778 0.005683 0.005682 0.005683 0.005683 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/wanderlust_semi_hullabaloo_README.en\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/ActiveState_appdirs.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/df7cb_sdate_hullabaloo_README.fake\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/gosa-project_gosa-core_hullabaloo_README.safemode\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/tkf_emacs-python-environment.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/hhatto_autopep8.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/skk-dev_skktools_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/ganglia_ganglia-modules-linux.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/zevv_duc_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/slicer69_sysvinit_hullabaloo_README\n", " 0 1 2 3 4 5 6 \\\n", "734 0.000018 0.009437 0.000018 0.000018 0.000018 0.273398 0.000018 \n", "1319 0.000018 0.583029 0.061516 0.331986 0.003909 0.019454 0.000018 \n", "3299 0.000019 0.000019 0.000019 0.000019 0.999813 0.000019 0.000019 \n", "375 0.117647 0.046106 0.014596 0.000024 0.000024 0.131388 0.000024 \n", "3560 0.420193 0.034504 0.322584 0.020994 0.002791 0.000024 0.000024 \n", "3275 0.007322 0.141429 0.000032 0.000032 0.297121 0.451645 0.000032 \n", "2154 0.000033 0.000033 0.549309 0.450392 0.000033 0.000033 0.000033 \n", "3771 0.000033 0.000033 0.346634 0.000033 0.000033 0.000033 0.000033 \n", "1435 0.000038 0.158357 0.000038 0.082379 0.000038 0.685618 0.000038 \n", "3589 0.000043 0.000043 0.625379 0.000043 0.001690 0.093226 0.000043 \n", "\n", " 7 8 9 10 \n", "734 0.008753 0.000018 0.690342 0.017963 \n", "1319 0.000018 0.000018 0.000018 0.000018 \n", "3299 0.000019 0.000019 0.000019 0.000019 \n", "375 0.000024 0.000024 0.004995 0.685147 \n", "3560 0.015793 0.104444 0.078624 0.000024 \n", "3275 0.000032 0.000032 0.102291 0.000032 \n", "2154 0.000033 0.000033 0.000033 0.000033 \n", "3771 0.000033 0.000033 0.653065 0.000033 \n", "1435 0.005478 0.006674 0.061303 0.000038 \n", "3589 0.000043 0.000043 0.279405 0.000043 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/audacity_audacity.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/andrikos_kismet-debian_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bluca_gsl_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/suds-community_suds.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/smbolton_whysynth.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/iortcw_iortcw.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/GNOME_genius_hullabaloo_README\n", "-----------------------Topic 7 --------------------------------\n", " 0 1 2 3 4 5 6 \\\n", "1146 0.000457 0.000457 0.000457 0.000457 0.000457 0.000457 0.000457 \n", "4245 0.000544 0.000544 0.000544 0.000544 0.000544 0.000544 0.000544 \n", "2853 0.000558 0.000558 0.000558 0.000558 0.000558 0.000558 0.000558 \n", "3625 0.000587 0.000587 0.000587 0.000587 0.000587 0.000587 0.000587 \n", "1663 0.000598 0.000598 0.000598 0.000598 0.000598 0.000598 0.000598 \n", "2567 0.000623 0.000623 0.000623 0.000623 0.000623 0.000623 0.000623 \n", "1779 0.000699 0.000699 0.000699 0.000699 0.000699 0.000699 0.000699 \n", "1265 0.000777 0.000777 0.000777 0.000777 0.000777 0.000777 0.000777 \n", "2103 0.000812 0.000812 0.000812 0.000812 0.000812 0.000812 0.000812 \n", "4023 0.000834 0.000834 0.000834 0.000834 0.000834 0.000834 0.000834 \n", "\n", " 7 8 9 10 \n", "1146 0.995431 0.000457 0.000457 0.000457 \n", "4245 0.994556 0.000544 0.000544 0.000544 \n", "2853 0.994422 0.000558 0.000558 0.000558 \n", "3625 0.994135 0.000587 0.000587 0.000587 \n", "1663 0.994019 0.000598 0.000598 0.000598 \n", "2567 0.993773 0.000623 0.000623 0.000623 \n", "1779 0.993007 0.000699 0.000699 0.000699 \n", "1265 0.992230 0.000777 0.000777 0.000777 \n", "2103 0.991883 0.000812 0.000812 0.000812 \n", "4023 0.991659 0.000834 0.000834 0.000834 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/horde_imp.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/horde_kronolith.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/tkem_mopidy-dleyna.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/mopidy_mopidy-alsamixer_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/horde_sesha.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/mopidy_mopidy-mpris.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/tkem_mopidy-internetarchive_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/kingosticks_mopidy-tunein_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/tkem_mopidy-podcast-itunes_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/horde_trean.git_hullabaloo_README\n", " 0 1 2 3 4 5 6 \\\n", "1319 0.000018 0.583029 0.061516 0.331986 0.003909 0.019454 0.000018 \n", "3299 0.000019 0.000019 0.000019 0.000019 0.999813 0.000019 0.000019 \n", "375 0.117647 0.046106 0.014596 0.000024 0.000024 0.131388 0.000024 \n", "3275 0.007322 0.141429 0.000032 0.000032 0.297121 0.451645 0.000032 \n", "2154 0.000033 0.000033 0.549309 0.450392 0.000033 0.000033 0.000033 \n", "3771 0.000033 0.000033 0.346634 0.000033 0.000033 0.000033 0.000033 \n", "2081 0.000039 0.278867 0.041405 0.000039 0.663507 0.000039 0.015947 \n", "3589 0.000043 0.000043 0.625379 0.000043 0.001690 0.093226 0.000043 \n", "3483 0.000044 0.767987 0.136634 0.000044 0.009711 0.000044 0.000044 \n", "2060 0.236422 0.000045 0.312054 0.000045 0.365046 0.000045 0.000045 \n", "\n", " 7 8 9 10 \n", "1319 0.000018 0.000018 0.000018 0.000018 \n", "3299 0.000019 0.000019 0.000019 0.000019 \n", "375 0.000024 0.000024 0.004995 0.685147 \n", "3275 0.000032 0.000032 0.102291 0.000032 \n", "2154 0.000033 0.000033 0.000033 0.000033 \n", "3771 0.000033 0.000033 0.653065 0.000033 \n", "2081 0.000039 0.000039 0.000039 0.000039 \n", "3589 0.000043 0.000043 0.279405 0.000043 \n", "3483 0.000044 0.000044 0.085361 0.000044 \n", "2060 0.000045 0.000045 0.078141 0.008067 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/andrikos_kismet-debian_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bluca_gsl_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/smbolton_whysynth.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/epam_nfstrace_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/GNOME_genius_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/OpenPrinting_foomatic-db.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/EsotericSoftware_kryo.git_hullabaloo_README.md\n", "-----------------------Topic 8 --------------------------------\n", " 0 1 2 3 4 5 6 \\\n", "1118 0.001541 0.001541 0.001541 0.001541 0.001541 0.001541 0.001541 \n", "1099 0.000100 0.000100 0.000100 0.000100 0.000100 0.000100 0.000100 \n", "2769 0.003031 0.003031 0.003030 0.003031 0.003030 0.003031 0.003031 \n", "1427 0.019060 0.000123 0.000123 0.000123 0.022132 0.000123 0.000123 \n", "3158 0.006994 0.006993 0.006993 0.006994 0.006993 0.006993 0.006993 \n", "1537 0.031350 0.001337 0.001337 0.001337 0.027705 0.029067 0.001337 \n", "3399 0.010101 0.010103 0.010101 0.010101 0.010101 0.010101 0.010101 \n", "2492 0.011364 0.011367 0.011364 0.011364 0.011364 0.011365 0.011365 \n", "2617 0.004329 0.004329 0.004329 0.004329 0.004329 0.004330 0.079500 \n", "2315 0.012988 0.012987 0.012988 0.012987 0.012987 0.012988 0.012988 \n", "\n", " 7 8 9 10 \n", "1118 0.001541 0.984591 0.001541 0.001541 \n", "1099 0.019750 0.979350 0.000100 0.000100 \n", "2769 0.003031 0.969694 0.003031 0.003031 \n", "1427 0.000123 0.957827 0.000123 0.000123 \n", "3158 0.006993 0.930067 0.006993 0.006993 \n", "1537 0.001337 0.902518 0.001337 0.001337 \n", "3399 0.010101 0.898986 0.010101 0.010101 \n", "2492 0.011364 0.886356 0.011364 0.011364 \n", "2617 0.004329 0.881536 0.004329 0.004330 \n", "2315 0.012987 0.870125 0.012988 0.012987 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/tax_python-requests-aws_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bestpractical_rt-extension-repeatticket_hullabaloo_README.pod\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/ericvsmith_toposort_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/zapier_django-rest-hooks_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/chibisov_drf-extensions.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bfirsh_django-ordered-model.git_hullabaloo_README.markdown\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/mikeal_tunnel-agent_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/Castaglia_proftpd-mod_vroot.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/LeaVerou_prism.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/unbit_django-uwsgi_hullabaloo_README.md\n", " 0 1 2 3 4 5 6 \\\n", "734 0.000018 0.009437 0.000018 0.000018 0.000018 0.273398 0.000018 \n", "1319 0.000018 0.583029 0.061516 0.331986 0.003909 0.019454 0.000018 \n", "3299 0.000019 0.000019 0.000019 0.000019 0.999813 0.000019 0.000019 \n", "375 0.117647 0.046106 0.014596 0.000024 0.000024 0.131388 0.000024 \n", "1604 0.003947 0.126398 0.000025 0.039761 0.000025 0.013216 0.735422 \n", "1106 0.000030 0.029005 0.708216 0.021411 0.000030 0.000030 0.166185 \n", "3275 0.007322 0.141429 0.000032 0.000032 0.297121 0.451645 0.000032 \n", "2154 0.000033 0.000033 0.549309 0.450392 0.000033 0.000033 0.000033 \n", "3771 0.000033 0.000033 0.346634 0.000033 0.000033 0.000033 0.000033 \n", "2081 0.000039 0.278867 0.041405 0.000039 0.663507 0.000039 0.015947 \n", "\n", " 7 8 9 10 \n", "734 0.008753 0.000018 0.690342 0.017963 \n", "1319 0.000018 0.000018 0.000018 0.000018 \n", "3299 0.000019 0.000019 0.000019 0.000019 \n", "375 0.000024 0.000024 0.004995 0.685147 \n", "1604 0.005943 0.000025 0.075213 0.000025 \n", "1106 0.028543 0.000030 0.046489 0.000030 \n", "3275 0.000032 0.000032 0.102291 0.000032 \n", "2154 0.000033 0.000033 0.000033 0.000033 \n", "3771 0.000033 0.000033 0.653065 0.000033 \n", "2081 0.000039 0.000039 0.000039 0.000039 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/audacity_audacity.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/andrikos_kismet-debian_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bluca_gsl_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/Xastir_Xastir.git_hullabaloo_README.1ST\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/darold_ora2pg.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/smbolton_whysynth.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/epam_nfstrace_hullabaloo_README\n", "-----------------------Topic 9 --------------------------------\n", " 0 1 2 3 4 5 6 \\\n", "4236 0.000083 0.000083 0.000083 0.000083 0.000083 0.000083 0.000083 \n", "3501 0.000110 0.000110 0.000110 0.000110 0.000110 0.000110 0.000110 \n", "2181 0.000145 0.000145 0.000145 0.000145 0.000145 0.000145 0.000145 \n", "2877 0.000255 0.000255 0.000255 0.000255 0.000255 0.000255 0.000255 \n", "559 0.000283 0.000283 0.000283 0.000283 0.000283 0.000283 0.000283 \n", "2673 0.000722 0.000722 0.000722 0.000722 0.000722 0.000722 0.000722 \n", "2255 0.000957 0.000958 0.000957 0.000957 0.000957 0.000957 0.000957 \n", "3216 0.001010 0.001010 0.001010 0.001010 0.001010 0.001010 0.001010 \n", "3285 0.001010 0.001010 0.001010 0.001010 0.001010 0.001010 0.001010 \n", "1546 0.001022 0.001022 0.001021 0.001022 0.001021 0.001021 0.001022 \n", "\n", " 7 8 9 10 \n", "4236 0.000083 0.000083 0.999171 0.000083 \n", "3501 0.000110 0.000110 0.998901 0.000110 \n", "2181 0.000145 0.000145 0.998550 0.000145 \n", "2877 0.000255 0.000255 0.997453 0.000255 \n", "559 0.000283 0.000283 0.997168 0.000283 \n", "2673 0.000722 0.000722 0.992784 0.000722 \n", "2255 0.000957 0.000957 0.990429 0.000957 \n", "3216 0.001010 0.001010 0.989898 0.001010 \n", "3285 0.001010 0.001010 0.989898 0.001010 \n", "1546 0.001022 0.001022 0.989785 0.001022 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/osslugaru_lugaru_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/michaelrsweet_htmldoc.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/glennrp_libpng_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/silx-kit_fabio.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/gyunaev_kchmviewer_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/book_Test-Database.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sebastianbergmann_php-invoker_hullabaloo_README.markdown\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sebastianbergmann_php-timer_hullabaloo_README.markdown\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sebastianbergmann_php-file-iterator_hullabaloo_README.markdown\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bobtfish_directory-scratch.git_hullabaloo_README\n", " 0 1 2 3 4 5 6 \\\n", "1319 0.000018 0.583029 0.061516 0.331986 0.003909 0.019454 0.000018 \n", "3299 0.000019 0.000019 0.000019 0.000019 0.999813 0.000019 0.000019 \n", "2154 0.000033 0.000033 0.549309 0.450392 0.000033 0.000033 0.000033 \n", "2081 0.000039 0.278867 0.041405 0.000039 0.663507 0.000039 0.015947 \n", "3934 0.000048 0.000048 0.000048 0.000048 0.000048 0.945055 0.054512 \n", "2954 0.308152 0.151885 0.017462 0.026990 0.000050 0.003691 0.491573 \n", "1386 0.000050 0.659359 0.000050 0.082049 0.000050 0.000050 0.079319 \n", "1907 0.000050 0.000050 0.014488 0.000050 0.000050 0.985059 0.000050 \n", "112 0.000051 0.000051 0.336407 0.000051 0.000051 0.663130 0.000051 \n", "2565 0.000053 0.355726 0.097612 0.000053 0.000053 0.079875 0.466419 \n", "\n", " 7 8 9 10 \n", "1319 0.000018 0.000018 0.000018 0.000018 \n", "3299 0.000019 0.000019 0.000019 0.000019 \n", "2154 0.000033 0.000033 0.000033 0.000033 \n", "2081 0.000039 0.000039 0.000039 0.000039 \n", "3934 0.000048 0.000048 0.000048 0.000048 \n", "2954 0.000050 0.000050 0.000050 0.000050 \n", "1386 0.178925 0.000050 0.000050 0.000050 \n", "1907 0.000050 0.000050 0.000050 0.000050 \n", "112 0.000051 0.000051 0.000051 0.000051 \n", "2565 0.000053 0.000053 0.000053 0.000053 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/andrikos_kismet-debian_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/epam_nfstrace_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jrincayc_ucblogo-code.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jirka-h_haveged_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/arno-iptables-firewall_aif.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/trackballs_trackballs.git_hullabaloo_README.html\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/GNOME_evolution_hullabaloo_README.TXT\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/punitagrawal_global_hullabaloo_README\n", "-----------------------Topic 10 --------------------------------\n", " 0 1 2 3 4 5 6 \\\n", "122 0.000133 0.000133 0.000133 0.000133 0.000133 0.000133 0.000133 \n", "2996 0.000133 0.000133 0.000133 0.000133 0.000133 0.000133 0.000133 \n", "4117 0.000133 0.000133 0.000133 0.000133 0.000133 0.000133 0.000133 \n", "2682 0.000805 0.000805 0.000805 0.000805 0.000805 0.000805 0.000805 \n", "2191 0.000850 0.000850 0.000850 0.000850 0.000850 0.000850 0.000850 \n", "1129 0.000858 0.000858 0.000858 0.000858 0.000858 0.000858 0.000858 \n", "3720 0.000928 0.000928 0.000928 0.000928 0.000928 0.000928 0.000928 \n", "3739 0.000937 0.000937 0.000937 0.000937 0.000937 0.000937 0.000937 \n", "935 0.000937 0.000937 0.000937 0.000937 0.000937 0.000937 0.000937 \n", "1335 0.000947 0.000947 0.000947 0.000947 0.000947 0.000947 0.000947 \n", "\n", " 7 8 9 10 \n", "122 0.000133 0.000133 0.000133 0.998667 \n", "2996 0.000133 0.000133 0.000133 0.998667 \n", "4117 0.000133 0.000133 0.000133 0.998667 \n", "2682 0.000805 0.000805 0.000805 0.991954 \n", "2191 0.000850 0.000850 0.000850 0.991503 \n", "1129 0.000858 0.000858 0.000858 0.991423 \n", "3720 0.000928 0.000928 0.000928 0.990723 \n", "3739 0.000937 0.000937 0.000937 0.990628 \n", "935 0.000937 0.000937 0.000937 0.990628 \n", "1335 0.000947 0.000947 0.000947 0.990530 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/astropy_photutils.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/astropy_astroquery.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/astropy_ccdproc.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/nxt-firmware.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/seattlerb_rubyinline.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/gazay_gon_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/seattlerb_ruby_parser.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/faye_faye_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jtzemp_base62.git_hullabaloo_README.rdoc\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/seattlerb_sexp_processor_hullabaloo_README.txt\n", " 0 1 2 3 4 5 6 \\\n", "1319 0.000018 0.583029 0.061516 0.331986 0.003909 0.019454 0.000018 \n", "3299 0.000019 0.000019 0.000019 0.000019 0.999813 0.000019 0.000019 \n", "3560 0.420193 0.034504 0.322584 0.020994 0.002791 0.000024 0.000024 \n", "1604 0.003947 0.126398 0.000025 0.039761 0.000025 0.013216 0.735422 \n", "1106 0.000030 0.029005 0.708216 0.021411 0.000030 0.000030 0.166185 \n", "3275 0.007322 0.141429 0.000032 0.000032 0.297121 0.451645 0.000032 \n", "2154 0.000033 0.000033 0.549309 0.450392 0.000033 0.000033 0.000033 \n", "3771 0.000033 0.000033 0.346634 0.000033 0.000033 0.000033 0.000033 \n", "1435 0.000038 0.158357 0.000038 0.082379 0.000038 0.685618 0.000038 \n", "2081 0.000039 0.278867 0.041405 0.000039 0.663507 0.000039 0.015947 \n", "\n", " 7 8 9 10 \n", "1319 0.000018 0.000018 0.000018 0.000018 \n", "3299 0.000019 0.000019 0.000019 0.000019 \n", "3560 0.015793 0.104444 0.078624 0.000024 \n", "1604 0.005943 0.000025 0.075213 0.000025 \n", "1106 0.028543 0.000030 0.046489 0.000030 \n", "3275 0.000032 0.000032 0.102291 0.000032 \n", "2154 0.000033 0.000033 0.000033 0.000033 \n", "3771 0.000033 0.000033 0.653065 0.000033 \n", "1435 0.005478 0.006674 0.061303 0.000038 \n", "2081 0.000039 0.000039 0.000039 0.000039 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/andrikos_kismet-debian_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/suds-community_suds.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/Xastir_Xastir.git_hullabaloo_README.1ST\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/darold_ora2pg.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/smbolton_whysynth.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/iortcw_iortcw.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/epam_nfstrace_hullabaloo_README\n", "0 0.097433\n", "1 0.101780\n", "2 0.081539\n", "3 0.130848\n", "4 0.024088\n", "5 0.061218\n", "6 0.089808\n", "7 0.134225\n", "8 0.059773\n", "9 0.132976\n", "10 0.086312\n", "dtype: float64\n" ] } ], "source": [ "prevalent_topics(data_vectorized, file_list)" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 5 }