{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "e09a84d6-cbd4-4a12-8e96-3775f734a262", "metadata": {}, "outputs": [], "source": [ "import re\n", "import numpy as np\n", "import pandas as pd\n", "import glob\n", "import copy\n", "import csv\n", "from statistics import mean, median\n", "from strip_markdown import strip_markdown\n", "import joblib" ] }, { "cell_type": "code", "execution_count": 2, "id": "9483091c-ac72-415c-932d-ac7cf7970789", "metadata": {}, "outputs": [], "source": [ "import gensim\n", "import gensim.corpora as corpora\n", "from gensim.utils import simple_preprocess\n", "from gensim.models import CoherenceModel\n", "from gensim.models.phrases import Phrases\n", "\n", "from sklearn.decomposition import LatentDirichletAllocation\n", "from sklearn.model_selection import GridSearchCV\n", "from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\n", "\n", "from statistics import mode" ] }, { "cell_type": "code", "execution_count": 15, "id": "196abd6a", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[nltk_data] Downloading package wordnet to\n", "[nltk_data] /home/SOC.NORTHWESTERN.EDU/nws8519/nltk_data...\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#import nltk\n", "#nltk.download('wordnet')" ] }, { "cell_type": "code", "execution_count": 3, "id": "3da6b590-875d-478d-aaaa-de020039c519", "metadata": {}, "outputs": [], "source": [ "# spacy and nltk for lemmatization\n", "import nltk \n", "#nltk.download('stopwords')\n", "import spacy\n", "from nltk.corpus import stopwords\n", "from nltk.stem.wordnet import WordNetLemmatizer\n", "\n", "stopwords = stopwords.words('english')" ] }, { "cell_type": "code", "execution_count": 4, "id": "60c137ae-6fe9-4b03-b899-6141b1645d6b", "metadata": {}, "outputs": [], "source": [ "def metadata_for_file(file):\n", " word_list = file.split()\n", " word_count = len(word_list)\n", " #print(word_list)\n", " if word_count == 0:\n", " avg_word_length = 0\n", " else: \n", " avg_word_length = sum(map(len, word_list)) / len(word_list)\n", " #return number of paragraphs\n", " return word_count, avg_word_length" ] }, { "cell_type": "code", "execution_count": 13, "id": "2e674fef-adb4-48c9-86a0-a655c41a95f3", "metadata": {}, "outputs": [], "source": [ "def get_data_from_dir(directory):\n", " files = glob.glob(f\"{directory}/*\")\n", " data_list = []\n", " word_counts = []\n", " avg_word_lengths = []\n", " file_list = []\n", " for file in files:\n", " text = open(file, encoding='utf-8', errors='ignore').read()\n", " #here's some of the descriptive text analysis\n", " word_count, avg_word_length = metadata_for_file(text)\n", " word_counts.append(word_count)\n", " avg_word_lengths.append(avg_word_length)\n", " #adding the data to the list of text\n", " data_list.append(text)\n", " #adding filename\n", " file_list.append(file)\n", " return data_list, word_counts, avg_word_lengths, file_list" ] }, { "cell_type": "code", "execution_count": 6, "id": "2b332b10-bfc8-4566-8c52-19a8a334af00", "metadata": {}, "outputs": [], "source": [ "#preprocessing text data\n", "def preprocess(corpus_list):\n", " #extending stopwords \n", " specific_stopwords = [\"http\", \"com\", \"www\", \"org\", \"file\", \"code\", \"time\", \"software\", \"use\", \"user\", \"set\", \"line\", \"run\", \"source\", \"github\",\n", " \"lineno\", \"python\", \"php\", \"ruby\", \"api\"]\n", " stopwords.extend(specific_stopwords)\n", " D = copy.copy(corpus_list)\n", " #stripping markdown from documents\n", " D = [strip_markdown(doc) for doc in D]\n", " #strip html \n", " D = [re.sub(r'', '', doc, flags=re.DOTALL) for doc in D]\n", " #mvp right now, can certainly be expanded as iterations of text analysis are done\n", " D = [[token for token in simple_preprocess(doc) if token not in stopwords and len(token) > 2]for doc in D]\n", " lemmatizer = WordNetLemmatizer()\n", " D_lemma = [\" \".join([lemmatizer.lemmatize(token) for token in doc]) for doc in D]\n", " return D_lemma" ] }, { "cell_type": "code", "execution_count": 7, "id": "2a26b7ef-d2df-4e1d-8aeb-66706ac6cbb7", "metadata": {}, "outputs": [], "source": [ "#preparing processed data for model usage\n", "def text_preparation(lemmatized_text):\n", " #bigrams\n", " D_bigrams = copy.copy(lemmatized_text)\n", " bigram = Phrases(D_bigrams, min_count=2)\n", " for i in range(len(lemmatized_text)):\n", " for token in bigram[D_bigrams[i]]:\n", " if '_' in token:\n", " D_bigrams[i].append(token)\n", " #id2word\n", " id2word = corpora.Dictionary(D_bigrams)\n", " id2word.filter_extremes(no_below=5, no_above=0.5)\n", " #bow representation \n", " bag_of_words = [id2word.doc2bow(doc) for doc in D_bigrams]\n", " return bag_of_words, id2word" ] }, { "cell_type": "code", "execution_count": 8, "id": "24799e25-2c0c-4e16-b503-68296f604f52", "metadata": {}, "outputs": [], "source": [ "def lda_model_identification(data_vectorized):\n", " lda = LatentDirichletAllocation()\n", " search_params = {'n_components': [11], 'learning_decay': [.5, .7, .9], 'batch_size' : [128, 256] }\n", " model = GridSearchCV(lda, param_grid=search_params, verbose=10)\n", " model.fit(data_vectorized)\n", " best_lda_model = model.best_estimator_\n", " print(\"Best Model's Params: \", model.best_params_)\n", " print(\"Best Log Likelihood Score: \", model.best_score_)\n", " print(\"Model Perplexity: \", best_lda_model.perplexity(data_vectorized))" ] }, { "cell_type": "code", "execution_count": 18, "id": "d3b5785f-8272-44f5-9aee-e5f5e97452e5", "metadata": {}, "outputs": [], "source": [ "def best_lda_model(data_vectorized, vocab):\n", " lda = LatentDirichletAllocation(n_components=11, learning_decay = 0.5, batch_size = 256, max_iter = 50)\n", " id_topic = lda.fit_transform(data_vectorized)\n", " topic_words = {}\n", " for topic, comp in enumerate(lda.components_):\n", " word_idx = np.argsort(comp)[::-1][:10]\n", " topic_words[topic] = [vocab[i] for i in word_idx]\n", " for topic, words in topic_words.items():\n", " print('Topic: %d' % topic)\n", " print(' %s' % ', '.join(words))\n", " #lda.print_topics(num_words=10)\n", " joblib.dump(lda, '020125_README_lda.jl')\n", " #lda = joblib.load('0509_lda.jl')\n", " return id_topic" ] }, { "cell_type": "code", "execution_count": 20, "id": "80bcdc6c-8a3d-4738-87b5-15e6c2db3a27", "metadata": {}, "outputs": [], "source": [ "def get_most_prevalent(vect_documents, documents):\n", " lda = joblib.load('020125_README_lda.jl')\n", " distributions = lda.transform(vect_documents)\n", " most_prevalent = {0: [0, \"\"],1: [0, \"\"], 2: [0, \"\"], 3: [0, \"\"], 4: [0, \"\"], 5: [0, \"\"], 6: [0, \"\"], 7: [0, \"\"], 8: [0, \"\"], 9: [0, \"\"], 10: [0, \"\"]}\n", " for i, topic_distribution in enumerate(distributions):\n", " for j in range(11):\n", " if topic_distribution[j] > most_prevalent[j][0]:\n", " most_prevalent[j] = [topic_distribution[j], documents[i]]\n", " print(most_prevalent)\n", " return most_prevalent\n" ] }, { "cell_type": "code", "execution_count": 22, "id": "3afd27af-8e8f-43c0-8610-06f7a68d5aec", "metadata": {}, "outputs": [], "source": [ "def prevalent_topics(vect_documents, file_list):\n", " lda = joblib.load('020125_README_lda.jl')\n", " #lda = joblib.load('0514_contrib_lda.jl')\n", " distributions = lda.transform(vect_documents)\n", " #figuring out what the max distribution is and then figuring out the mode\n", " top_topic = []\n", " count_of_multiple = 0\n", " topic_arrays = []\n", " for i, topic_distribution in enumerate(distributions):\n", " max_dist = max(topic_distribution)\n", " indexes = np.where(topic_distribution == max_dist)[0]\n", " if len(indexes) == 1:\n", " top_topic.append(indexes[0])\n", " else:\n", " count_of_multiple += 1\n", " topic_arrays.append(topic_distribution)\n", " #most_frequent(top_topic)\n", " print(count_of_multiple)\n", " df = pd.DataFrame(topic_arrays)\n", " #finding the distribution values for all documents\n", " with open('020125_README_file_topic_distributions.csv', 'w', newline='') as csvfile:\n", " fieldnames = ['filename', 't0', 't1', 't2', 't3', 't4', 't5', 't6', 't7', 't8', 't9', 't10']\n", " writer = csv.DictWriter(csvfile, fieldnames=fieldnames)\n", " writer.writeheader()\n", " for i, row in df.iterrows():\n", " project_dir = {}\n", " project_dir['filename'] = file_list[i].split(\"/\")[-1]\n", " array_row = df.iloc[i].to_numpy()\n", " for j in range(11):\n", " project_dir[\"t\" + str(j)] = array_row[j]\n", " writer.writerow(project_dir)\n", " #print(df.sort_values(by=['0']).head(5))\n", " for i in range(11):\n", " print(\"-----------------------Topic \" + str(i) + \" --------------------------------\")\n", " top5 = df.nlargest(10, i)\n", " top_indices = top5.index.to_list()\n", " print(top5)\n", " for index in top_indices:\n", " print(file_list[index])\n", " bottom5 = df.nsmallest(10, i)\n", " bottom_indices = bottom5.index.to_list()\n", " print(bottom5)\n", " for index in bottom_indices:\n", " print(file_list[index])\n", " averages = df.mean()\n", " print(averages)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "5aefbafc-0c1b-409a-afbc-655e4cef91e3", "metadata": {}, "outputs": [], "source": [ "def most_frequent(topic_prevalence):\n", " most_frequent_array = []\n", " for j in range(11):\n", " topic = mode(topic_prevalence)\n", " most_frequent_array.append(topic)\n", " topic_prevalence = [i for i in topic_prevalence if i != topic]\n", " print(most_frequent_array)" ] }, { "cell_type": "code", "execution_count": 10, "id": "69d606fd", "metadata": {}, "outputs": [], "source": [ "readme_directory = \"/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/\"" ] }, { "cell_type": "code", "execution_count": 14, "id": "1f937c2e-2714-475d-b670-602164c46642", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mean wordcount: 272.11756407241944\n", "Median wordcount: 98\n", "Mean wordlength: 6.0641336743311145\n", "Median wordlength: 5.841463414634147\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/SOC.NORTHWESTERN.EDU/nws8519/anaconda3/lib/python3.12/html/parser.py:171: XMLParsedAsHTMLWarning: It looks like you're parsing an XML document using an HTML parser. If this really is an HTML document (maybe it's XHTML?), you can ignore or filter this warning. If it's XML, you should know that using an XML parser will be more reliable. To parse this document as XML, make sure you have the lxml package installed, and pass the keyword argument `features=\"xml\"` into the BeautifulSoup constructor.\n", " k = self.parse_starttag(i)\n" ] } ], "source": [ "listed_corpus, wordcounts, wordlengths, file_list = get_data_from_dir(readme_directory)\n", "print(\"Mean wordcount: \", mean(wordcounts))\n", "print(\"Median wordcount: \", median(wordcounts))\n", "print(\"Mean wordlength: \", mean(wordlengths))\n", "print(\"Median wordlength: \", median(wordlengths))\n", "lemmatized_corpus = preprocess(listed_corpus)" ] }, { "cell_type": "code", "execution_count": null, "id": "e90e236f-8db5-40cc-88a3-60e674b9d1de", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['020125_README_vectorizer.joblib']" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "'''\n", "vectorizer = CountVectorizer(analyzer='word', \n", " min_df=2, \n", " stop_words='english', \n", " lowercase=True, \n", " token_pattern='[a-zA-Z0-9]{2,}', \n", " )\n", "data_vectorized = vectorizer.fit_transform(lemmatized_corpus)\n", "joblib.dump(vectorizer, '020125_README_vectorizer.joblib')\n", "'''\n" ] }, { "cell_type": "code", "execution_count": 16, "id": "d68aaf7b", "metadata": {}, "outputs": [], "source": [ "vectorizer = joblib.load('020125_README_vectorizer.joblib')\n", "data_vectorized = vectorizer.transform(lemmatized_corpus) " ] }, { "cell_type": "code", "execution_count": 17, "id": "dd1a70c2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fitting 5 folds for each of 6 candidates, totalling 30 fits\n", "[CV 1/5; 1/6] START batch_size=128, learning_decay=0.5, n_components=11.........\n", "[CV 1/5; 1/6] END batch_size=128, learning_decay=0.5, n_components=11;, score=-1007509.681 total time= 10.3s\n", "[CV 2/5; 1/6] START batch_size=128, learning_decay=0.5, n_components=11.........\n", "[CV 2/5; 1/6] END batch_size=128, learning_decay=0.5, n_components=11;, score=-1014261.652 total time= 10.7s\n", "[CV 3/5; 1/6] START batch_size=128, learning_decay=0.5, n_components=11.........\n", "[CV 3/5; 1/6] END batch_size=128, learning_decay=0.5, n_components=11;, score=-1022848.244 total time= 10.3s\n", "[CV 4/5; 1/6] START batch_size=128, learning_decay=0.5, n_components=11.........\n", "[CV 4/5; 1/6] END batch_size=128, learning_decay=0.5, n_components=11;, score=-973246.017 total time= 9.8s\n", "[CV 5/5; 1/6] START batch_size=128, learning_decay=0.5, n_components=11.........\n", "[CV 5/5; 1/6] END batch_size=128, learning_decay=0.5, n_components=11;, score=-999233.122 total time= 9.8s\n", "[CV 1/5; 2/6] START batch_size=128, learning_decay=0.7, n_components=11.........\n", "[CV 1/5; 2/6] END batch_size=128, learning_decay=0.7, n_components=11;, score=-1005592.521 total time= 9.5s\n", "[CV 2/5; 2/6] START batch_size=128, learning_decay=0.7, n_components=11.........\n", "[CV 2/5; 2/6] END batch_size=128, learning_decay=0.7, n_components=11;, score=-1018157.449 total time= 9.9s\n", "[CV 3/5; 2/6] START batch_size=128, learning_decay=0.7, n_components=11.........\n", "[CV 3/5; 2/6] END batch_size=128, learning_decay=0.7, n_components=11;, score=-1021034.619 total time= 10.0s\n", "[CV 4/5; 2/6] START batch_size=128, learning_decay=0.7, n_components=11.........\n", "[CV 4/5; 2/6] END batch_size=128, learning_decay=0.7, n_components=11;, score=-975254.657 total time= 10.1s\n", "[CV 5/5; 2/6] START batch_size=128, learning_decay=0.7, n_components=11.........\n", "[CV 5/5; 2/6] END batch_size=128, learning_decay=0.7, n_components=11;, score=-999502.591 total time= 9.9s\n", "[CV 1/5; 3/6] START batch_size=128, learning_decay=0.9, n_components=11.........\n", "[CV 1/5; 3/6] END batch_size=128, learning_decay=0.9, n_components=11;, score=-1006733.511 total time= 9.8s\n", "[CV 2/5; 3/6] START batch_size=128, learning_decay=0.9, n_components=11.........\n", "[CV 2/5; 3/6] END batch_size=128, learning_decay=0.9, n_components=11;, score=-1014617.289 total time= 9.8s\n", "[CV 3/5; 3/6] START batch_size=128, learning_decay=0.9, n_components=11.........\n", "[CV 3/5; 3/6] END batch_size=128, learning_decay=0.9, n_components=11;, score=-1020025.742 total time= 10.0s\n", "[CV 4/5; 3/6] START batch_size=128, learning_decay=0.9, n_components=11.........\n", "[CV 4/5; 3/6] END batch_size=128, learning_decay=0.9, n_components=11;, score=-974336.406 total time= 10.1s\n", "[CV 5/5; 3/6] START batch_size=128, learning_decay=0.9, n_components=11.........\n", "[CV 5/5; 3/6] END batch_size=128, learning_decay=0.9, n_components=11;, score=-1002762.208 total time= 10.0s\n", "[CV 1/5; 4/6] START batch_size=256, learning_decay=0.5, n_components=11.........\n", "[CV 1/5; 4/6] END batch_size=256, learning_decay=0.5, n_components=11;, score=-1002368.558 total time= 9.5s\n", "[CV 2/5; 4/6] START batch_size=256, learning_decay=0.5, n_components=11.........\n", "[CV 2/5; 4/6] END batch_size=256, learning_decay=0.5, n_components=11;, score=-1011512.930 total time= 9.9s\n", "[CV 3/5; 4/6] START batch_size=256, learning_decay=0.5, n_components=11.........\n", "[CV 3/5; 4/6] END batch_size=256, learning_decay=0.5, n_components=11;, score=-1021450.228 total time= 10.0s\n", "[CV 4/5; 4/6] START batch_size=256, learning_decay=0.5, n_components=11.........\n", "[CV 4/5; 4/6] END batch_size=256, learning_decay=0.5, n_components=11;, score=-974933.561 total time= 9.7s\n", "[CV 5/5; 4/6] START batch_size=256, learning_decay=0.5, n_components=11.........\n", "[CV 5/5; 4/6] END batch_size=256, learning_decay=0.5, n_components=11;, score=-1000500.033 total time= 9.9s\n", "[CV 1/5; 5/6] START batch_size=256, learning_decay=0.7, n_components=11.........\n", "[CV 1/5; 5/6] END batch_size=256, learning_decay=0.7, n_components=11;, score=-1004646.970 total time= 9.6s\n", "[CV 2/5; 5/6] START batch_size=256, learning_decay=0.7, n_components=11.........\n", "[CV 2/5; 5/6] END batch_size=256, learning_decay=0.7, n_components=11;, score=-1011587.159 total time= 9.8s\n", "[CV 3/5; 5/6] START batch_size=256, learning_decay=0.7, n_components=11.........\n", "[CV 3/5; 5/6] END batch_size=256, learning_decay=0.7, n_components=11;, score=-1020348.275 total time= 9.8s\n", "[CV 4/5; 5/6] START batch_size=256, learning_decay=0.7, n_components=11.........\n", "[CV 4/5; 5/6] END batch_size=256, learning_decay=0.7, n_components=11;, score=-974751.507 total time= 10.0s\n", "[CV 5/5; 5/6] START batch_size=256, learning_decay=0.7, n_components=11.........\n", "[CV 5/5; 5/6] END batch_size=256, learning_decay=0.7, n_components=11;, score=-1001461.612 total time= 9.8s\n", "[CV 1/5; 6/6] START batch_size=256, learning_decay=0.9, n_components=11.........\n", "[CV 1/5; 6/6] END batch_size=256, learning_decay=0.9, n_components=11;, score=-1005603.520 total time= 9.8s\n", "[CV 2/5; 6/6] START batch_size=256, learning_decay=0.9, n_components=11.........\n", "[CV 2/5; 6/6] END batch_size=256, learning_decay=0.9, n_components=11;, score=-1014507.304 total time= 9.8s\n", "[CV 3/5; 6/6] START batch_size=256, learning_decay=0.9, n_components=11.........\n", "[CV 3/5; 6/6] END batch_size=256, learning_decay=0.9, n_components=11;, score=-1022378.609 total time= 10.0s\n", "[CV 4/5; 6/6] START batch_size=256, learning_decay=0.9, n_components=11.........\n", "[CV 4/5; 6/6] END batch_size=256, learning_decay=0.9, n_components=11;, score=-971582.299 total time= 9.9s\n", "[CV 5/5; 6/6] START batch_size=256, learning_decay=0.9, n_components=11.........\n", "[CV 5/5; 6/6] END batch_size=256, learning_decay=0.9, n_components=11;, score=-1000234.956 total time= 9.7s\n", "Best Model's Params: {'batch_size': 256, 'learning_decay': 0.5, 'n_components': 11}\n", "Best Log Likelihood Score: -1002153.0620655585\n", "Model Perplexity: 2065.7772975666703\n" ] } ], "source": [ "lda_model_identification(data_vectorized)" ] }, { "cell_type": "code", "execution_count": 19, "id": "aa83d20f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Topic: 0\n", " obj, http, stream, filter, length, type, image, pypi, svg, page\n", "Topic: 1\n", " module, perl, test, make, server, cpan, install, command, version, process\n", "Topic: 2\n", " window, device, linux, bug, server, gnome, packet, network, support, work\n", "Topic: 3\n", " install, make, build, directory, package, file, configure, library, usr, path\n", "Topic: 4\n", " license, copyright, gnu, version, public, free, general, warranty, copy, library\n", "Topic: 5\n", " class, object, client, django, interface, json, key, request, new, url\n", "Topic: 6\n", " html, xml, node, like, make, using, page, language, library, graph\n", "Topic: 7\n", " test, version, project, git, google, package, setup, add, library, support\n", "Topic: 8\n", " table, function, default, mode, path, text, add, level, used, output\n", "Topic: 9\n", " file, image, support, format, library, font, version, read, example, html\n", "Topic: 10\n", " value, function, string, object, return, type, data, method, error, argument\n" ] } ], "source": [ "topic_distributions = best_lda_model(data_vectorized, vectorizer.get_feature_names_out())" ] }, { "cell_type": "code", "execution_count": 21, "id": "f4345bd6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{0: [0.9998131703476353, '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf'], 1: [0.9936580635354768, '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/kjn_lbzip2_hullabaloo_README'], 2: [0.9992995657213791, '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/linuxmint_muffin.git_hullabaloo_README'], 3: [0.988192939654375, '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/chewing_scim-chewing.git_hullabaloo_README'], 4: [0.9964897891037261, '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/horde_webmail.git_hullabaloo_README'], 5: [0.9943880112670485, '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/state-machines_state_machines-activemodel_hullabaloo_README.md'], 6: [0.999759729377782, '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bluca_gsl_hullabaloo_README.md'], 7: [0.998666933112433, '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/astropy_photutils.git_hullabaloo_README.rst'], 8: [0.9996679425883734, '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md'], 9: [0.99815957978939, '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/agmartin_linuxdoc-tools_hullabaloo_README'], 10: [0.9996663626936376, '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt']}\n" ] }, { "data": { "text/plain": [ "{0: [0.9998131703476353,\n", " '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf'],\n", " 1: [0.9936580635354768,\n", " '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/kjn_lbzip2_hullabaloo_README'],\n", " 2: [0.9992995657213791,\n", " '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/linuxmint_muffin.git_hullabaloo_README'],\n", " 3: [0.988192939654375,\n", " '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/chewing_scim-chewing.git_hullabaloo_README'],\n", " 4: [0.9964897891037261,\n", " '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/horde_webmail.git_hullabaloo_README'],\n", " 5: [0.9943880112670485,\n", " '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/state-machines_state_machines-activemodel_hullabaloo_README.md'],\n", " 6: [0.999759729377782,\n", " '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bluca_gsl_hullabaloo_README.md'],\n", " 7: [0.998666933112433,\n", " '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/astropy_photutils.git_hullabaloo_README.rst'],\n", " 8: [0.9996679425883734,\n", " '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md'],\n", " 9: [0.99815957978939,\n", " '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/agmartin_linuxdoc-tools_hullabaloo_README'],\n", " 10: [0.9996663626936376,\n", " '/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt']}" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_most_prevalent(data_vectorized, file_list)" ] }, { "cell_type": "code", "execution_count": 23, "id": "23468e82", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "349\n", "-----------------------Topic 0 --------------------------------\n", " 0 1 2 3 4 5 6 \\\n", "3304 0.999813 0.000019 0.000019 0.000019 0.000019 0.000019 0.000019 \n", "939 0.986430 0.001357 0.001357 0.001357 0.001357 0.001357 0.001357 \n", "1267 0.928259 0.000777 0.000777 0.064747 0.000777 0.000777 0.000777 \n", "206 0.919651 0.001357 0.001357 0.001357 0.068137 0.001357 0.001357 \n", "3626 0.908870 0.002066 0.002066 0.002066 0.002066 0.002066 0.002066 \n", "2397 0.891207 0.001818 0.001818 0.001819 0.001818 0.092427 0.001818 \n", "3630 0.889165 0.000587 0.000587 0.105556 0.000587 0.000587 0.000587 \n", "1781 0.888760 0.000699 0.000699 0.104946 0.000699 0.000699 0.000699 \n", "60 0.863787 0.001299 0.001299 0.001299 0.001299 0.001299 0.001299 \n", "2792 0.848473 0.015152 0.015152 0.015154 0.015153 0.015154 0.015154 \n", "\n", " 7 8 9 10 \n", "3304 0.000019 0.000019 0.000019 0.000019 \n", "939 0.001357 0.001357 0.001357 0.001357 \n", "1267 0.000777 0.000777 0.000777 0.000777 \n", "206 0.001357 0.001357 0.001357 0.001357 \n", "3626 0.002066 0.002066 0.072534 0.002066 \n", "2397 0.001818 0.001818 0.001819 0.001818 \n", "3630 0.000587 0.000587 0.000587 0.000587 \n", "1781 0.000699 0.000699 0.000699 0.000699 \n", "60 0.001299 0.001299 0.001299 0.124523 \n", "2792 0.015152 0.015152 0.015152 0.015153 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/zopefoundation_roman_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/kingosticks_mopidy-tunein_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/astropy_astroplan.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/ionelmc_python-tblib_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/pylons_plaster_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/mopidy_mopidy-alsamixer_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/tkem_mopidy-internetarchive_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/Toilal_rebulk_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/halcy_Mastodon.py.git_hullabaloo_README.md\n", " 0 1 2 3 4 5 6 \\\n", "735 0.000018 0.000018 0.152543 0.000018 0.067391 0.000018 0.000018 \n", "1321 0.000018 0.000018 0.462060 0.062600 0.306453 0.000018 0.000018 \n", "376 0.000024 0.000024 0.000024 0.000024 0.000024 0.000024 0.999760 \n", "3565 0.000024 0.000024 0.000024 0.000024 0.000024 0.042916 0.000024 \n", "1606 0.000025 0.000025 0.581536 0.337059 0.007607 0.000025 0.000025 \n", "1107 0.000030 0.071306 0.000030 0.018016 0.011003 0.000030 0.000030 \n", "3280 0.000032 0.000032 0.070475 0.026385 0.007691 0.000032 0.003867 \n", "2157 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 \n", "3776 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 \n", "1437 0.000038 0.000038 0.677241 0.075530 0.012854 0.000038 0.000038 \n", "\n", " 7 8 9 10 \n", "735 0.333556 0.173726 0.272679 0.000018 \n", "1321 0.000018 0.089744 0.000018 0.079036 \n", "376 0.000024 0.000024 0.000024 0.000024 \n", "3565 0.651042 0.000024 0.000024 0.305849 \n", "1606 0.000025 0.000025 0.073622 0.000025 \n", "1107 0.561982 0.061747 0.000030 0.275796 \n", "3280 0.000032 0.820210 0.071213 0.000032 \n", "2157 0.000033 0.999668 0.000033 0.000033 \n", "3776 0.000033 0.000033 0.000033 0.999666 \n", "1437 0.000038 0.093611 0.120873 0.019700 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/audacity_audacity.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/andrikos_kismet-debian_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bluca_gsl_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/suds-community_suds.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/Xastir_Xastir.git_hullabaloo_README.1ST\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/darold_ora2pg.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/smbolton_whysynth.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/iortcw_iortcw.git_hullabaloo_README\n", "-----------------------Topic 1 --------------------------------\n", " 0 1 2 3 4 5 6 \\\n", "2518 0.005935 0.993658 0.000045 0.000045 0.000045 0.000045 0.000045 \n", "2677 0.000722 0.992785 0.000722 0.000722 0.000722 0.000722 0.000722 \n", "4073 0.000967 0.990328 0.000967 0.000967 0.000967 0.000967 0.000967 \n", "1755 0.001021 0.989785 0.001022 0.001022 0.001022 0.001022 0.001021 \n", "2100 0.001045 0.989550 0.001045 0.001045 0.001045 0.001045 0.001045 \n", "2535 0.001057 0.989428 0.001057 0.001057 0.001057 0.001057 0.001057 \n", "222 0.001151 0.988492 0.001151 0.001151 0.001151 0.001151 0.001151 \n", "1473 0.001245 0.987546 0.001245 0.001246 0.001246 0.001245 0.001245 \n", "1226 0.001299 0.987012 0.001299 0.001299 0.001299 0.001299 0.001299 \n", "3717 0.002525 0.974746 0.002525 0.002526 0.002526 0.002525 0.002525 \n", "\n", " 7 8 9 10 \n", "2518 0.000045 0.000045 0.000045 0.000045 \n", "2677 0.000722 0.000722 0.000722 0.000722 \n", "4073 0.000967 0.000967 0.000967 0.000967 \n", "1755 0.001022 0.001022 0.001022 0.001021 \n", "2100 0.001045 0.001045 0.001045 0.001045 \n", "2535 0.001057 0.001057 0.001057 0.001057 \n", "222 0.001151 0.001151 0.001151 0.001151 \n", "1473 0.001245 0.001245 0.001245 0.001245 \n", "1226 0.001299 0.001299 0.001299 0.001299 \n", "3717 0.002525 0.002525 0.002525 0.002525 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/kjn_lbzip2_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/book_Test-Database.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/shlomif_perl-io-socket-inet6.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/maddingue_SNMP-Extension-PassPersist.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/perl-openssl_perl-crypt-openssl-pkcs10.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/wchristian_crypt-dh.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/theory_class-meta.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/ap_Test-File-Contents.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/porridge_ydpdict_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/p5-number-fraction_number-fraction.git_hullabaloo_README\n", " 0 1 2 3 4 5 6 \\\n", "735 0.000018 0.000018 0.152543 0.000018 0.067391 0.000018 0.000018 \n", "1321 0.000018 0.000018 0.462060 0.062600 0.306453 0.000018 0.000018 \n", "3304 0.999813 0.000019 0.000019 0.000019 0.000019 0.000019 0.000019 \n", "376 0.000024 0.000024 0.000024 0.000024 0.000024 0.000024 0.999760 \n", "3565 0.000024 0.000024 0.000024 0.000024 0.000024 0.042916 0.000024 \n", "1606 0.000025 0.000025 0.581536 0.337059 0.007607 0.000025 0.000025 \n", "3280 0.000032 0.000032 0.070475 0.026385 0.007691 0.000032 0.003867 \n", "2157 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 \n", "3776 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 \n", "1437 0.000038 0.000038 0.677241 0.075530 0.012854 0.000038 0.000038 \n", "\n", " 7 8 9 10 \n", "735 0.333556 0.173726 0.272679 0.000018 \n", "1321 0.000018 0.089744 0.000018 0.079036 \n", "3304 0.000019 0.000019 0.000019 0.000019 \n", "376 0.000024 0.000024 0.000024 0.000024 \n", "3565 0.651042 0.000024 0.000024 0.305849 \n", "1606 0.000025 0.000025 0.073622 0.000025 \n", "3280 0.000032 0.820210 0.071213 0.000032 \n", "2157 0.000033 0.999668 0.000033 0.000033 \n", "3776 0.000033 0.000033 0.000033 0.999666 \n", "1437 0.000038 0.093611 0.120873 0.019700 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/audacity_audacity.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/andrikos_kismet-debian_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bluca_gsl_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/suds-community_suds.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/Xastir_Xastir.git_hullabaloo_README.1ST\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/smbolton_whysynth.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/iortcw_iortcw.git_hullabaloo_README\n", "-----------------------Topic 2 --------------------------------\n", " 0 1 2 3 4 5 6 \\\n", "1570 0.000070 0.000070 0.999300 0.000070 0.000070 0.000070 0.000070 \n", "97 0.002410 0.000063 0.997025 0.000063 0.000063 0.000063 0.000063 \n", "1076 0.001653 0.001653 0.983470 0.001653 0.001653 0.001653 0.001653 \n", "2419 0.002331 0.002331 0.976687 0.002331 0.002331 0.002331 0.002331 \n", "627 0.003955 0.003953 0.960470 0.003953 0.003953 0.003953 0.003953 \n", "4107 0.004546 0.004546 0.954540 0.004546 0.004546 0.004546 0.004546 \n", "3149 0.000410 0.065727 0.930587 0.000410 0.000410 0.000410 0.000410 \n", "1864 0.007576 0.007576 0.924238 0.007576 0.007577 0.007576 0.007576 \n", "288 0.007576 0.007576 0.924238 0.007577 0.007576 0.007576 0.007576 \n", "1066 0.007576 0.007576 0.924235 0.007577 0.007576 0.007576 0.007578 \n", "\n", " 7 8 9 10 \n", "1570 0.000070 0.000070 0.000070 0.000070 \n", "97 0.000063 0.000063 0.000063 0.000063 \n", "1076 0.001653 0.001653 0.001653 0.001653 \n", "2419 0.002331 0.002331 0.002332 0.002331 \n", "627 0.003953 0.003953 0.003953 0.003953 \n", "4107 0.004546 0.004546 0.004546 0.004546 \n", "3149 0.000410 0.000410 0.000410 0.000410 \n", "1864 0.007577 0.007576 0.007576 0.007576 \n", "288 0.007576 0.007576 0.007576 0.007576 \n", "1066 0.007576 0.007576 0.007576 0.007576 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/linuxmint_muffin.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/mate-desktop_marco.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/GNOME_gdk-pixbuf_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/conserver_conserver.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/ukui_peony_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/mate-desktop_caja-actions_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/mogaal_sendemail_hullabaloo_README-BR.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sdr_rtl-sdr.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/brendangregg_perf-tools_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/GNOME_libwnck_hullabaloo_README\n", " 0 1 2 3 4 5 6 \\\n", "3304 0.999813 0.000019 0.000019 0.000019 0.000019 0.000019 0.000019 \n", "376 0.000024 0.000024 0.000024 0.000024 0.000024 0.000024 0.999760 \n", "3565 0.000024 0.000024 0.000024 0.000024 0.000024 0.042916 0.000024 \n", "1107 0.000030 0.071306 0.000030 0.018016 0.011003 0.000030 0.000030 \n", "2157 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 \n", "3776 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 \n", "832 0.000043 0.000043 0.000043 0.000043 0.000043 0.000043 0.061009 \n", "2063 0.009143 0.000045 0.000045 0.070350 0.000045 0.465237 0.020005 \n", "2518 0.005935 0.993658 0.000045 0.000045 0.000045 0.000045 0.000045 \n", "2068 0.000046 0.000046 0.000046 0.022781 0.000046 0.003475 0.000046 \n", "\n", " 7 8 9 10 \n", "3304 0.000019 0.000019 0.000019 0.000019 \n", "376 0.000024 0.000024 0.000024 0.000024 \n", "3565 0.651042 0.000024 0.000024 0.305849 \n", "1107 0.561982 0.061747 0.000030 0.275796 \n", "2157 0.000033 0.999668 0.000033 0.000033 \n", "3776 0.000033 0.000033 0.000033 0.999666 \n", "832 0.000043 0.349562 0.423503 0.165625 \n", "2063 0.056883 0.147322 0.000045 0.230882 \n", "2518 0.000045 0.000045 0.000045 0.000045 \n", "2068 0.054426 0.103594 0.392211 0.423285 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bluca_gsl_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/suds-community_suds.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/darold_ora2pg.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jmtd_wadc_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/EsotericSoftware_kryo.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/kjn_lbzip2_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enthought_mayavi.git_hullabaloo_README.txt\n", "-----------------------Topic 3 --------------------------------\n", " 0 1 2 3 4 5 6 \\\n", "3197 0.001181 0.001181 0.001181 0.988193 0.001181 0.001181 0.001181 \n", "959 0.001856 0.001855 0.001855 0.981446 0.001855 0.001855 0.001855 \n", "3641 0.001934 0.001935 0.001934 0.980656 0.001934 0.001935 0.001934 \n", "946 0.002066 0.002066 0.002066 0.979338 0.002066 0.002066 0.002066 \n", "2855 0.002457 0.002457 0.002457 0.975428 0.002457 0.002457 0.002457 \n", "1398 0.002755 0.002755 0.002755 0.972451 0.002755 0.002755 0.002755 \n", "2019 0.002755 0.002755 0.002755 0.972451 0.002755 0.002755 0.002755 \n", "321 0.002755 0.002755 0.002755 0.972450 0.002755 0.002755 0.002755 \n", "2612 0.002933 0.002933 0.002933 0.970673 0.002933 0.002933 0.002933 \n", "1510 0.003030 0.003030 0.003030 0.969696 0.003030 0.003031 0.003030 \n", "\n", " 7 8 9 10 \n", "3197 0.001181 0.001181 0.001181 0.001181 \n", "959 0.001855 0.001855 0.001855 0.001855 \n", "3641 0.001935 0.001934 0.001934 0.001934 \n", "946 0.002066 0.002066 0.002066 0.002066 \n", "2855 0.002458 0.002457 0.002457 0.002457 \n", "1398 0.002755 0.002755 0.002755 0.002755 \n", "2019 0.002755 0.002755 0.002755 0.002755 \n", "321 0.002755 0.002755 0.002755 0.002755 \n", "2612 0.002933 0.002933 0.002933 0.002933 \n", "1510 0.003030 0.003030 0.003030 0.003030 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/chewing_scim-chewing.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/lastpass_lastpass-cli_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/Ultimaker_libSavitar.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/systemd-cron_systemd-cron.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/mati75_volumeicon-debian.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/x42_x42-plugins_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/kiwix_libkiwix.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/rolinh_dfc_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/cairo_cairomm_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/dmc_gfal2_hullabaloo_README\n", " 0 1 2 3 4 5 6 \\\n", "735 0.000018 0.000018 0.152543 0.000018 0.067391 0.000018 0.000018 \n", "3304 0.999813 0.000019 0.000019 0.000019 0.000019 0.000019 0.000019 \n", "376 0.000024 0.000024 0.000024 0.000024 0.000024 0.000024 0.999760 \n", "3565 0.000024 0.000024 0.000024 0.000024 0.000024 0.042916 0.000024 \n", "2157 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 \n", "3776 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 \n", "2084 0.000039 0.034171 0.902181 0.000039 0.000039 0.000039 0.000039 \n", "832 0.000043 0.000043 0.000043 0.000043 0.000043 0.000043 0.061009 \n", "2518 0.005935 0.993658 0.000045 0.000045 0.000045 0.000045 0.000045 \n", "2829 0.000046 0.052600 0.069040 0.000046 0.048128 0.000046 0.000046 \n", "\n", " 7 8 9 10 \n", "735 0.333556 0.173726 0.272679 0.000018 \n", "3304 0.000019 0.000019 0.000019 0.000019 \n", "376 0.000024 0.000024 0.000024 0.000024 \n", "3565 0.651042 0.000024 0.000024 0.305849 \n", "2157 0.000033 0.999668 0.000033 0.000033 \n", "3776 0.000033 0.000033 0.000033 0.999666 \n", "2084 0.000039 0.000039 0.000039 0.063336 \n", "832 0.000043 0.349562 0.423503 0.165625 \n", "2518 0.000045 0.000045 0.000045 0.000045 \n", "2829 0.000046 0.000046 0.000046 0.829909 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/audacity_audacity.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bluca_gsl_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/suds-community_suds.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/epam_nfstrace_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jmtd_wadc_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/kjn_lbzip2_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/PyGreSQL_PyGreSQL_hullabaloo_README\n", "-----------------------Topic 4 --------------------------------\n", " 0 1 2 3 4 5 6 \\\n", "3298 0.000351 0.000351 0.000351 0.000351 0.996490 0.000351 0.000351 \n", "2734 0.000410 0.000410 0.000410 0.000410 0.995905 0.000410 0.000410 \n", "1147 0.000457 0.000457 0.000457 0.000457 0.995431 0.000457 0.000457 \n", "3491 0.000505 0.000505 0.000505 0.000505 0.994949 0.000505 0.000505 \n", "4251 0.000544 0.000544 0.000544 0.000544 0.994556 0.000544 0.000544 \n", "1665 0.000598 0.000598 0.000598 0.000598 0.994019 0.000598 0.000598 \n", "2474 0.000606 0.000606 0.000606 0.000606 0.993939 0.000606 0.000606 \n", "3518 0.000668 0.000669 0.000668 0.000669 0.993315 0.000668 0.000669 \n", "2686 0.000805 0.000805 0.000805 0.000805 0.991955 0.000805 0.000805 \n", "4028 0.000834 0.000834 0.000834 0.000834 0.991659 0.000834 0.000834 \n", "\n", " 7 8 9 10 \n", "3298 0.000351 0.000351 0.000351 0.000351 \n", "2734 0.000410 0.000410 0.000410 0.000410 \n", "1147 0.000457 0.000457 0.000457 0.000457 \n", "3491 0.000505 0.000505 0.000505 0.000505 \n", "4251 0.000544 0.000544 0.000544 0.000544 \n", "1665 0.000598 0.000598 0.000598 0.000598 \n", "2474 0.000606 0.000606 0.000606 0.000606 \n", "3518 0.000668 0.000669 0.000669 0.000669 \n", "2686 0.000805 0.000805 0.000805 0.000805 \n", "4028 0.000834 0.000834 0.000834 0.000834 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/horde_webmail.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/horde_groupware.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/horde_imp.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/horde_mnemo.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/horde_kronolith.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/horde_sesha.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/horde_gollem.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/ocaml-batteries-team_batteries-included.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/nxt-firmware.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/horde_trean.git_hullabaloo_README\n", " 0 1 2 3 4 5 6 \\\n", "3304 0.999813 0.000019 0.000019 0.000019 0.000019 0.000019 0.000019 \n", "376 0.000024 0.000024 0.000024 0.000024 0.000024 0.000024 0.999760 \n", "3565 0.000024 0.000024 0.000024 0.000024 0.000024 0.042916 0.000024 \n", "2157 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 \n", "3776 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 \n", "2084 0.000039 0.034171 0.902181 0.000039 0.000039 0.000039 0.000039 \n", "3594 0.000043 0.000043 0.025673 0.009215 0.000043 0.000043 0.000043 \n", "832 0.000043 0.000043 0.000043 0.000043 0.000043 0.000043 0.061009 \n", "3488 0.000044 0.000044 0.016315 0.037750 0.000044 0.237159 0.020084 \n", "2063 0.009143 0.000045 0.000045 0.070350 0.000045 0.465237 0.020005 \n", "\n", " 7 8 9 10 \n", "3304 0.000019 0.000019 0.000019 0.000019 \n", "376 0.000024 0.000024 0.000024 0.000024 \n", "3565 0.651042 0.000024 0.000024 0.305849 \n", "2157 0.000033 0.999668 0.000033 0.000033 \n", "3776 0.000033 0.000033 0.000033 0.999666 \n", "2084 0.000039 0.000039 0.000039 0.063336 \n", "3594 0.000043 0.080246 0.000043 0.884566 \n", "832 0.000043 0.349562 0.423503 0.165625 \n", "3488 0.000044 0.000044 0.499671 0.188801 \n", "2063 0.056883 0.147322 0.000045 0.230882 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bluca_gsl_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/suds-community_suds.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/epam_nfstrace_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/GNOME_genius_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jmtd_wadc_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/OpenPrinting_foomatic-db.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/EsotericSoftware_kryo.git_hullabaloo_README.md\n", "-----------------------Topic 5 --------------------------------\n", " 0 1 2 3 4 5 6 \\\n", "1476 0.000561 0.000561 0.000561 0.000561 0.000561 0.994388 0.000561 \n", "2071 0.000623 0.000623 0.000623 0.000623 0.000623 0.993773 0.000623 \n", "418 0.000819 0.000819 0.000819 0.000819 0.000819 0.991810 0.000819 \n", "4222 0.001196 0.001196 0.001196 0.001196 0.001196 0.988037 0.001196 \n", "3804 0.001280 0.001280 0.001281 0.001280 0.001280 0.987195 0.001281 \n", "168 0.001377 0.001378 0.001377 0.001378 0.001378 0.986225 0.001377 \n", "3429 0.001466 0.001466 0.001466 0.001466 0.001466 0.985337 0.001466 \n", "1651 0.001567 0.001567 0.001567 0.001567 0.001567 0.984325 0.001567 \n", "3185 0.001653 0.001653 0.001653 0.001653 0.001653 0.983470 0.001653 \n", "411 0.001748 0.001748 0.001748 0.001748 0.001748 0.982517 0.001748 \n", "\n", " 7 8 9 10 \n", "1476 0.000561 0.000561 0.000561 0.000561 \n", "2071 0.000623 0.000623 0.000623 0.000623 \n", "418 0.000819 0.000819 0.000819 0.000819 \n", "4222 0.001196 0.001196 0.001196 0.001196 \n", "3804 0.001280 0.001281 0.001280 0.001281 \n", "168 0.001378 0.001377 0.001377 0.001378 \n", "3429 0.001466 0.001466 0.001466 0.001466 \n", "1651 0.001568 0.001567 0.001567 0.001567 \n", "3185 0.001653 0.001653 0.001653 0.001653 \n", "411 0.001748 0.001748 0.001748 0.001748 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/state-machines_state_machines-activemodel_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/rails_jbuilder_hullabaloo_README.rd\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/state-machines_state_machines-activerecord_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/ruby-amqp_amqp.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/lostisland_faraday_middleware_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/ruby-concurrency_thread_safe_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/state-machines_state_machines_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/doorkeeper-gem_doorkeeper-openid_connect.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/rails_rails-dom-testing_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/lwe_entypo-rails_hullabaloo_README.md\n", " 0 1 2 3 4 5 6 \\\n", "735 0.000018 0.000018 0.152543 0.000018 0.067391 0.000018 0.000018 \n", "1321 0.000018 0.000018 0.462060 0.062600 0.306453 0.000018 0.000018 \n", "3304 0.999813 0.000019 0.000019 0.000019 0.000019 0.000019 0.000019 \n", "376 0.000024 0.000024 0.000024 0.000024 0.000024 0.000024 0.999760 \n", "1606 0.000025 0.000025 0.581536 0.337059 0.007607 0.000025 0.000025 \n", "1107 0.000030 0.071306 0.000030 0.018016 0.011003 0.000030 0.000030 \n", "3280 0.000032 0.000032 0.070475 0.026385 0.007691 0.000032 0.003867 \n", "2157 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 \n", "3776 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 \n", "1437 0.000038 0.000038 0.677241 0.075530 0.012854 0.000038 0.000038 \n", "\n", " 7 8 9 10 \n", "735 0.333556 0.173726 0.272679 0.000018 \n", "1321 0.000018 0.089744 0.000018 0.079036 \n", "3304 0.000019 0.000019 0.000019 0.000019 \n", "376 0.000024 0.000024 0.000024 0.000024 \n", "1606 0.000025 0.000025 0.073622 0.000025 \n", "1107 0.561982 0.061747 0.000030 0.275796 \n", "3280 0.000032 0.820210 0.071213 0.000032 \n", "2157 0.000033 0.999668 0.000033 0.000033 \n", "3776 0.000033 0.000033 0.000033 0.999666 \n", "1437 0.000038 0.093611 0.120873 0.019700 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/audacity_audacity.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/andrikos_kismet-debian_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bluca_gsl_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/Xastir_Xastir.git_hullabaloo_README.1ST\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/darold_ora2pg.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/smbolton_whysynth.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/iortcw_iortcw.git_hullabaloo_README\n", "-----------------------Topic 6 --------------------------------\n", " 0 1 2 3 4 5 6 \\\n", "376 0.000024 0.000024 0.000024 0.000024 0.000024 0.000024 0.999760 \n", "3770 0.001894 0.001894 0.001894 0.001894 0.001894 0.001894 0.981059 \n", "277 0.002597 0.002598 0.002598 0.002598 0.002598 0.002598 0.974023 \n", "2445 0.000610 0.000610 0.000610 0.000610 0.000610 0.000610 0.958713 \n", "3773 0.004545 0.004546 0.004546 0.004546 0.004546 0.004546 0.954543 \n", "2176 0.004785 0.004785 0.004785 0.004785 0.004785 0.004785 0.952150 \n", "2683 0.004785 0.004785 0.004785 0.004785 0.004785 0.004785 0.952150 \n", "3097 0.004785 0.004785 0.004785 0.004785 0.004785 0.004785 0.952150 \n", "3898 0.004785 0.004785 0.004785 0.004785 0.004785 0.004785 0.952150 \n", "1438 0.000092 0.000092 0.000092 0.000092 0.000092 0.000092 0.951493 \n", "\n", " 7 8 9 10 \n", "376 0.000024 0.000024 0.000024 0.000024 \n", "3770 0.001894 0.001894 0.001894 0.001894 \n", "277 0.002598 0.002598 0.002598 0.002598 \n", "2445 0.000610 0.000610 0.035795 0.000610 \n", "3773 0.004546 0.004546 0.004546 0.004546 \n", "2176 0.004786 0.004785 0.004785 0.004785 \n", "2683 0.004786 0.004785 0.004785 0.004785 \n", "3097 0.004786 0.004785 0.004785 0.004785 \n", "3898 0.004786 0.004785 0.004785 0.004785 \n", "1438 0.000092 0.000092 0.000092 0.047675 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bluca_gsl_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jquery_sizzle.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/gregkh_bti.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/tavianator_bfs.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/xonsh_xonsh.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/angband_angband_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/subdownloader_subdownloader_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/Beep6581_RawTherapee_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/mlpack_mlpack_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/erlware_erlware_commons.git_hullabaloo_README.md\n", " 0 1 2 3 4 5 6 \\\n", "735 0.000018 0.000018 0.152543 0.000018 0.067391 0.000018 0.000018 \n", "1321 0.000018 0.000018 0.462060 0.062600 0.306453 0.000018 0.000018 \n", "3304 0.999813 0.000019 0.000019 0.000019 0.000019 0.000019 0.000019 \n", "3565 0.000024 0.000024 0.000024 0.000024 0.000024 0.042916 0.000024 \n", "1606 0.000025 0.000025 0.581536 0.337059 0.007607 0.000025 0.000025 \n", "1107 0.000030 0.071306 0.000030 0.018016 0.011003 0.000030 0.000030 \n", "2157 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 \n", "3776 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 \n", "1437 0.000038 0.000038 0.677241 0.075530 0.012854 0.000038 0.000038 \n", "2084 0.000039 0.034171 0.902181 0.000039 0.000039 0.000039 0.000039 \n", "\n", " 7 8 9 10 \n", "735 0.333556 0.173726 0.272679 0.000018 \n", "1321 0.000018 0.089744 0.000018 0.079036 \n", "3304 0.000019 0.000019 0.000019 0.000019 \n", "3565 0.651042 0.000024 0.000024 0.305849 \n", "1606 0.000025 0.000025 0.073622 0.000025 \n", "1107 0.561982 0.061747 0.000030 0.275796 \n", "2157 0.000033 0.999668 0.000033 0.000033 \n", "3776 0.000033 0.000033 0.000033 0.999666 \n", "1437 0.000038 0.093611 0.120873 0.019700 \n", "2084 0.000039 0.000039 0.000039 0.063336 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/audacity_audacity.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/andrikos_kismet-debian_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/suds-community_suds.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/Xastir_Xastir.git_hullabaloo_README.1ST\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/darold_ora2pg.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/iortcw_iortcw.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/epam_nfstrace_hullabaloo_README\n", "-----------------------Topic 7 --------------------------------\n", " 0 1 2 3 4 5 6 \\\n", "122 0.000133 0.000133 0.000133 0.000133 0.000133 0.000133 0.000133 \n", "3001 0.000133 0.000133 0.000133 0.000133 0.000133 0.000133 0.000133 \n", "4122 0.000133 0.000133 0.000133 0.000133 0.000133 0.000133 0.000133 \n", "3720 0.001515 0.001515 0.001515 0.001515 0.001515 0.001515 0.001515 \n", "507 0.001567 0.001567 0.001567 0.001568 0.001568 0.001567 0.001567 \n", "1763 0.001623 0.001624 0.001623 0.001624 0.001624 0.001623 0.001623 \n", "3670 0.001653 0.001653 0.001653 0.001653 0.001653 0.001653 0.001653 \n", "2801 0.002066 0.002066 0.002066 0.002066 0.002066 0.002066 0.002066 \n", "1117 0.002066 0.002066 0.002066 0.002066 0.002066 0.002066 0.002066 \n", "2984 0.002066 0.002066 0.002066 0.002066 0.002066 0.002066 0.002066 \n", "\n", " 7 8 9 10 \n", "122 0.998667 0.000133 0.000133 0.000133 \n", "3001 0.998667 0.000133 0.000133 0.000133 \n", "4122 0.998667 0.000133 0.000133 0.000133 \n", "3720 0.984847 0.001515 0.001515 0.001515 \n", "507 0.984325 0.001567 0.001567 0.001567 \n", "1763 0.983765 0.001623 0.001623 0.001623 \n", "3670 0.983470 0.001653 0.001653 0.001653 \n", "2801 0.979338 0.002066 0.002066 0.002066 \n", "1117 0.979338 0.002066 0.002066 0.002066 \n", "2984 0.979338 0.002066 0.002066 0.002066 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/astropy_photutils.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/astropy_astroquery.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/astropy_ccdproc.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jpastuszek_capture-output_hullabaloo_README.rdoc\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/kaminari_kaminari_hullabaloo_README.rdoc\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/tmuxinator_tmuxinator_hullabaloo_README.rdoc\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/duritong_trocla_hullabaloo_README.rdoc\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/technicalpickles_homesick_hullabaloo_README.rdoc\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/hipchat_hipchat-rb_hullabaloo_README.rdoc\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/cucumber_aruba.git_hullabaloo_README.rdoc\n", " 0 1 2 3 4 5 6 \\\n", "1321 0.000018 0.000018 0.462060 0.062600 0.306453 0.000018 0.000018 \n", "3304 0.999813 0.000019 0.000019 0.000019 0.000019 0.000019 0.000019 \n", "376 0.000024 0.000024 0.000024 0.000024 0.000024 0.000024 0.999760 \n", "1606 0.000025 0.000025 0.581536 0.337059 0.007607 0.000025 0.000025 \n", "3280 0.000032 0.000032 0.070475 0.026385 0.007691 0.000032 0.003867 \n", "2157 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 \n", "3776 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 \n", "1437 0.000038 0.000038 0.677241 0.075530 0.012854 0.000038 0.000038 \n", "2084 0.000039 0.034171 0.902181 0.000039 0.000039 0.000039 0.000039 \n", "3594 0.000043 0.000043 0.025673 0.009215 0.000043 0.000043 0.000043 \n", "\n", " 7 8 9 10 \n", "1321 0.000018 0.089744 0.000018 0.079036 \n", "3304 0.000019 0.000019 0.000019 0.000019 \n", "376 0.000024 0.000024 0.000024 0.000024 \n", "1606 0.000025 0.000025 0.073622 0.000025 \n", "3280 0.000032 0.820210 0.071213 0.000032 \n", "2157 0.000033 0.999668 0.000033 0.000033 \n", "3776 0.000033 0.000033 0.000033 0.999666 \n", "1437 0.000038 0.093611 0.120873 0.019700 \n", "2084 0.000039 0.000039 0.000039 0.063336 \n", "3594 0.000043 0.080246 0.000043 0.884566 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/andrikos_kismet-debian_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bluca_gsl_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/Xastir_Xastir.git_hullabaloo_README.1ST\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/smbolton_whysynth.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/iortcw_iortcw.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/epam_nfstrace_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/GNOME_genius_hullabaloo_README\n", "-----------------------Topic 8 --------------------------------\n", " 0 1 2 3 4 5 6 \\\n", "2157 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 \n", "1910 0.000050 0.000050 0.000050 0.000050 0.000050 0.000050 0.000050 \n", "2782 0.000125 0.000125 0.000125 0.000125 0.000125 0.000125 0.000125 \n", "3940 0.009091 0.009091 0.009092 0.009091 0.009091 0.009091 0.009092 \n", "3740 0.000076 0.000076 0.000076 0.024915 0.006011 0.000076 0.000076 \n", "1598 0.000526 0.000526 0.063883 0.000526 0.000525 0.000526 0.025487 \n", "3147 0.012987 0.012987 0.012987 0.012987 0.012988 0.012988 0.012990 \n", "2298 0.000096 0.000096 0.000096 0.000096 0.000096 0.000096 0.000096 \n", "1744 0.001684 0.001684 0.001684 0.001684 0.001684 0.001684 0.126724 \n", "310 0.015152 0.015153 0.015152 0.015155 0.015154 0.015153 0.015153 \n", "\n", " 7 8 9 10 \n", "2157 0.000033 0.999668 0.000033 0.000033 \n", "1910 0.000050 0.999496 0.000050 0.000050 \n", "2782 0.000125 0.998746 0.000125 0.000125 \n", "3940 0.009091 0.909084 0.009092 0.009093 \n", "3740 0.000076 0.907753 0.000076 0.060791 \n", "1598 0.000526 0.906426 0.000526 0.000526 \n", "3147 0.012988 0.870119 0.012989 0.012988 \n", "2298 0.033925 0.865677 0.099632 0.000096 \n", "1744 0.001684 0.858124 0.001684 0.001684 \n", "310 0.015153 0.848467 0.015154 0.015154 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/trackballs_trackballs.git_hullabaloo_README.html\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/keras-team_keras_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/thlorenz_combine-source-map.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/pgRouting_pgrouting.git_hullabaloo_README.routing\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/mawww_kakoune.git_hullabaloo_README.asciidoc\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/tidymodels_recipes_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/Parchive_par2cmdline.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/katomic.git_hullabaloo_README.levels\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/weaverba137_pydl_hullabaloo_README.txt\n", " 0 1 2 3 4 5 6 \\\n", "3304 0.999813 0.000019 0.000019 0.000019 0.000019 0.000019 0.000019 \n", "376 0.000024 0.000024 0.000024 0.000024 0.000024 0.000024 0.999760 \n", "3565 0.000024 0.000024 0.000024 0.000024 0.000024 0.042916 0.000024 \n", "1606 0.000025 0.000025 0.581536 0.337059 0.007607 0.000025 0.000025 \n", "3776 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 \n", "2084 0.000039 0.034171 0.902181 0.000039 0.000039 0.000039 0.000039 \n", "3488 0.000044 0.000044 0.016315 0.037750 0.000044 0.237159 0.020084 \n", "2518 0.005935 0.993658 0.000045 0.000045 0.000045 0.000045 0.000045 \n", "2829 0.000046 0.052600 0.069040 0.000046 0.048128 0.000046 0.000046 \n", "112 0.000051 0.000051 0.000051 0.000051 0.000051 0.000051 0.000051 \n", "\n", " 7 8 9 10 \n", "3304 0.000019 0.000019 0.000019 0.000019 \n", "376 0.000024 0.000024 0.000024 0.000024 \n", "3565 0.651042 0.000024 0.000024 0.305849 \n", "1606 0.000025 0.000025 0.073622 0.000025 \n", "3776 0.000033 0.000033 0.000033 0.999666 \n", "2084 0.000039 0.000039 0.000039 0.063336 \n", "3488 0.000044 0.000044 0.499671 0.188801 \n", "2518 0.000045 0.000045 0.000045 0.000045 \n", "2829 0.000046 0.000046 0.000046 0.829909 \n", "112 0.564349 0.000051 0.000051 0.435188 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bluca_gsl_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/suds-community_suds.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/Xastir_Xastir.git_hullabaloo_README.1ST\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/epam_nfstrace_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/OpenPrinting_foomatic-db.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/kjn_lbzip2_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/PyGreSQL_PyGreSQL_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/GNOME_evolution_hullabaloo_README.TXT\n", "-----------------------Topic 9 --------------------------------\n", " 0 1 2 3 4 5 6 \\\n", "1961 0.000184 0.000184 0.000184 0.000184 0.000184 0.000184 0.000184 \n", "2881 0.000255 0.000255 0.000255 0.000255 0.000255 0.000255 0.000255 \n", "1106 0.000602 0.000602 0.000602 0.000602 0.000602 0.000602 0.000602 \n", "1016 0.002114 0.002114 0.002114 0.002114 0.002115 0.002114 0.002114 \n", "1574 0.002457 0.002457 0.002457 0.002457 0.002457 0.002457 0.002457 \n", "2204 0.002755 0.002755 0.002755 0.002755 0.002755 0.002755 0.002755 \n", "947 0.003953 0.003953 0.003953 0.003953 0.003953 0.003953 0.003953 \n", "2956 0.000587 0.000587 0.040665 0.000587 0.000587 0.000587 0.000587 \n", "1416 0.005051 0.005051 0.005051 0.005051 0.005051 0.005051 0.005051 \n", "3068 0.005051 0.005051 0.005051 0.005051 0.005051 0.005051 0.005051 \n", "\n", " 7 8 9 10 \n", "1961 0.000184 0.000184 0.998160 0.000184 \n", "2881 0.000255 0.000255 0.997453 0.000255 \n", "1106 0.000602 0.000602 0.993979 0.000602 \n", "1016 0.002114 0.002115 0.978856 0.002114 \n", "1574 0.002457 0.002457 0.975428 0.002457 \n", "2204 0.002755 0.002755 0.972450 0.002755 \n", "947 0.003953 0.003953 0.960471 0.003953 \n", "2956 0.000587 0.000587 0.954056 0.000587 \n", "1416 0.005051 0.005051 0.949493 0.005051 \n", "3068 0.005051 0.005051 0.949493 0.005051 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/agmartin_linuxdoc-tools_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/silx-kit_fabio.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/caseman_noise.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/Bioconductor_GenomeInfoDb.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/dompdf_php-font-lib_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/freedoom_freedoom_hullabaloo_README.TXT\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/LLNL_sundials.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/WinFF_winff.git_hullabaloo_README-Presets.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/ianare_exif-py_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/AltraMayor_f3.git_hullabaloo_README\n", " 0 1 2 3 4 5 6 \\\n", "1321 0.000018 0.000018 0.462060 0.062600 0.306453 0.000018 0.000018 \n", "3304 0.999813 0.000019 0.000019 0.000019 0.000019 0.000019 0.000019 \n", "376 0.000024 0.000024 0.000024 0.000024 0.000024 0.000024 0.999760 \n", "3565 0.000024 0.000024 0.000024 0.000024 0.000024 0.042916 0.000024 \n", "1107 0.000030 0.071306 0.000030 0.018016 0.011003 0.000030 0.000030 \n", "2157 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 \n", "3776 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 \n", "2084 0.000039 0.034171 0.902181 0.000039 0.000039 0.000039 0.000039 \n", "3594 0.000043 0.000043 0.025673 0.009215 0.000043 0.000043 0.000043 \n", "2063 0.009143 0.000045 0.000045 0.070350 0.000045 0.465237 0.020005 \n", "\n", " 7 8 9 10 \n", "1321 0.000018 0.089744 0.000018 0.079036 \n", "3304 0.000019 0.000019 0.000019 0.000019 \n", "376 0.000024 0.000024 0.000024 0.000024 \n", "3565 0.651042 0.000024 0.000024 0.305849 \n", "1107 0.561982 0.061747 0.000030 0.275796 \n", "2157 0.000033 0.999668 0.000033 0.000033 \n", "3776 0.000033 0.000033 0.000033 0.999666 \n", "2084 0.000039 0.000039 0.000039 0.063336 \n", "3594 0.000043 0.080246 0.000043 0.884566 \n", "2063 0.056883 0.147322 0.000045 0.230882 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/andrikos_kismet-debian_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bluca_gsl_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/suds-community_suds.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/darold_ora2pg.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/epam_nfstrace_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/GNOME_genius_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/EsotericSoftware_kryo.git_hullabaloo_README.md\n", "-----------------------Topic 10 --------------------------------\n", " 0 1 2 3 4 5 6 \\\n", "3776 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 \n", "2853 0.000130 0.000130 0.000130 0.000130 0.000130 0.000130 0.000130 \n", "2955 0.000544 0.000544 0.000544 0.000544 0.000544 0.000544 0.000544 \n", "1100 0.000100 0.000100 0.000100 0.000100 0.015886 0.000100 0.000100 \n", "3123 0.002114 0.002114 0.002114 0.002114 0.002114 0.002114 0.002114 \n", "817 0.000777 0.000777 0.000777 0.000777 0.000777 0.045709 0.000777 \n", "1658 0.000089 0.063941 0.000089 0.000089 0.000089 0.000089 0.000089 \n", "2834 0.008265 0.008265 0.008265 0.008265 0.008265 0.008265 0.008265 \n", "913 0.001196 0.001196 0.001196 0.001196 0.079367 0.001196 0.001196 \n", "2319 0.009091 0.009091 0.009092 0.009093 0.009091 0.009091 0.009091 \n", "\n", " 7 8 9 10 \n", "3776 0.000033 0.000033 0.000033 0.999666 \n", "2853 0.000130 0.000130 0.000130 0.998696 \n", "2955 0.000544 0.000544 0.000544 0.994556 \n", "1100 0.000100 0.000100 0.000100 0.983214 \n", "3123 0.002114 0.002114 0.002114 0.978857 \n", "817 0.000777 0.000777 0.000777 0.947298 \n", "1658 0.000089 0.000089 0.000089 0.935259 \n", "2834 0.008265 0.008265 0.008266 0.917351 \n", "913 0.001196 0.001196 0.001196 0.909866 \n", "2319 0.009092 0.009092 0.009092 0.909084 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jazzband_inflect.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/gkz_type-check.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/mschilli_cache-historical-perl.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bestpractical_rt-extension-repeatticket_hullabaloo_README.pod\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/lunarmodules_say.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/mikeboers_PyMemoize_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/perl5-utils_Params-Util_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/thlorenz_inline-source-map.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/osantana_dicteval.git_hullabaloo_README.rst\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/kilobyte_termrec_hullabaloo_README\n", " 0 1 2 3 4 5 6 \\\n", "735 0.000018 0.000018 0.152543 0.000018 0.067391 0.000018 0.000018 \n", "3304 0.999813 0.000019 0.000019 0.000019 0.000019 0.000019 0.000019 \n", "376 0.000024 0.000024 0.000024 0.000024 0.000024 0.000024 0.999760 \n", "1606 0.000025 0.000025 0.581536 0.337059 0.007607 0.000025 0.000025 \n", "3280 0.000032 0.000032 0.070475 0.026385 0.007691 0.000032 0.003867 \n", "2157 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 0.000033 \n", "2518 0.005935 0.993658 0.000045 0.000045 0.000045 0.000045 0.000045 \n", "3939 0.000048 0.000048 0.129402 0.115562 0.002418 0.000048 0.056808 \n", "1388 0.000050 0.234664 0.331442 0.264437 0.157592 0.000050 0.000050 \n", "1910 0.000050 0.000050 0.000050 0.000050 0.000050 0.000050 0.000050 \n", "\n", " 7 8 9 10 \n", "735 0.333556 0.173726 0.272679 0.000018 \n", "3304 0.000019 0.000019 0.000019 0.000019 \n", "376 0.000024 0.000024 0.000024 0.000024 \n", "1606 0.000025 0.000025 0.073622 0.000025 \n", "3280 0.000032 0.820210 0.071213 0.000032 \n", "2157 0.000033 0.999668 0.000033 0.000033 \n", "2518 0.000045 0.000045 0.000045 0.000045 \n", "3939 0.014969 0.680601 0.000048 0.000048 \n", "1388 0.000050 0.011566 0.000050 0.000050 \n", "1910 0.000050 0.999496 0.000050 0.000050 \n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/audacity_audacity.git_hullabaloo_README.txt\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/sqlmapproject_sqlmap.git_hullabaloo_README.pdf\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/bluca_gsl_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/Xastir_Xastir.git_hullabaloo_README.1ST\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/smbolton_whysynth.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/enova_pg_fact_loader.git_hullabaloo_README.md\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/kjn_lbzip2_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/jrincayc_ucblogo-code.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/arno-iptables-firewall_aif.git_hullabaloo_README\n", "/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/trackballs_trackballs.git_hullabaloo_README.html\n", "0 0.034897\n", "1 0.079768\n", "2 0.096922\n", "3 0.161341\n", "4 0.110154\n", "5 0.093408\n", "6 0.070131\n", "7 0.118367\n", "8 0.061284\n", "9 0.096849\n", "10 0.076878\n", "dtype: float64\n" ] } ], "source": [ "prevalent_topics(data_vectorized, file_list)" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 5 }