{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "merged_manifest = pd.read_csv('0203_contributing_merged_manifest.csv')\n", "topic_distributions = pd.read_csv('020125_CONTRIBUTING_file_topic_distributions.csv')\n", "readability_scores = pd.read_csv('020125_CONTRIBUTING_readability.csv')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "715" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "first_merge = readability_scores.merge(topic_distributions, on=['filename'],how=\"inner\")\n", "#primary_merge = first_merge.merge(readability_scores, )\n", "first_merge['fvf_filepath'] = first_merge['filename']\n", "second_merge = first_merge.merge(merged_manifest, on=['fvf_filepath'], how=\"inner\")\n", "len(second_merge)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
flesch_reading_easereading_timeword_count
meanmedianmeanmedianmeanmedian
ranef_grouping
046.10207149.9923.40928613.720259.635714153.0
144.23727653.4129.06578916.880329.151703198.0
247.43222254.7834.36162723.365394.242063252.0
\n", "
" ], "text/plain": [ " flesch_reading_ease reading_time word_count \\\n", " mean median mean median mean \n", "ranef_grouping \n", "0 46.102071 49.99 23.409286 13.720 259.635714 \n", "1 44.237276 53.41 29.065789 16.880 329.151703 \n", "2 47.432222 54.78 34.361627 23.365 394.242063 \n", "\n", " \n", " median \n", "ranef_grouping \n", "0 153.0 \n", "1 198.0 \n", "2 252.0 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "readability_aggregate = second_merge.groupby('ranef_grouping').agg({\n", " 'flesch_reading_ease' : ['mean', 'median'],\n", " 'reading_time' : ['mean', 'median'],\n", " 'word_count' : ['mean', 'median'],\n", "})\n", "\n", "readability_aggregate" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
t0t1t2t3t4
meanmeanmeanmeanmean
ranef_grouping
00.0932810.3682680.1563040.0987480.283398
10.1575190.2819080.1858170.1121950.262561
20.2025650.2525550.1671630.0906780.287039
\n", "
" ], "text/plain": [ " t0 t1 t2 t3 t4\n", " mean mean mean mean mean\n", "ranef_grouping \n", "0 0.093281 0.368268 0.156304 0.098748 0.283398\n", "1 0.157519 0.281908 0.185817 0.112195 0.262561\n", "2 0.202565 0.252555 0.167163 0.090678 0.287039" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "topic_aggregate = second_merge.groupby('ranef_grouping').agg({\n", " 't0' : ['mean'],\n", " 't1' : ['mean'],\n", " 't2' : ['mean'],\n", " 't3' : ['mean'],\n", " 't4' : ['mean']\n", "})\n", "\n", "topic_aggregate" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 2 }