{ "cells": [ { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "merged_manifest = pd.read_csv('0203_readme_merged_manifest.csv')\n", "topic_distributions = pd.read_csv('020325_README_file_topic_distributions.csv')\n", "readability_scores = pd.read_csv('020325_README_readability.csv')" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4247" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "first_merge = readability_scores.merge(topic_distributions, on=['filename'],how=\"inner\")\n", "#primary_merge = first_merge.merge(readability_scores, )\n", "first_merge['fvf_filepath'] = first_merge['filename']\n", "#len(first_merge)\n", "second_merge = first_merge.merge(merged_manifest, on=['fvf_filepath'], how=\"inner\")\n", "len(second_merge)\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
filenameflesch_reading_easeflesch_kincaid_gradelinsear_write_formuladale_chall_readability_scoremcalpine_eflawreading_timechar_countword_countt0...effectgroupleveltermestimatestd.errorconf.lowconf.highranef_groupingrank
0libevent_libevent_hullabaloo_README50.3611.414.62500012.4726.37.37502830.001976...ran_valsproject_idlibevent_libeventbefore_after:week_index0.1865220.478091-0.7505191.12356313229.0
1zaach_jison_hullabaloo_README.md44.7813.57.71428610.7037.233.0522503950.000489...ran_valsproject_idzaach_jisonbefore_after:week_index1.0018790.2170630.5764431.42731524068.0
2lincolnloop_python-qrcode.git_hullabaloo_READM...58.288.47.58333310.7618.36.82464780.002597...ran_valsproject_idlincolnloop_python-qrcode.gitbefore_after:week_index-0.2206540.679910-1.5532541.11194611467.0
3mati75_evilwm.git_hullabaloo_README66.747.25.8888898.7718.156.7638647420.000266...ran_valsproject_idmati75_evilwm.gitbefore_after:week_index-0.2938460.697316-1.6605601.07286811184.0
4markdown-it_markdown-it_hullabaloo_README.md45.7611.112.00000012.8424.01.51103170.006993...ran_valsproject_idmarkdown-it_markdown-itbefore_after:week_index0.2386150.602856-0.9429611.42019013459.0
\n", "

5 rows × 33 columns

\n", "
" ], "text/plain": [ " filename flesch_reading_ease \\\n", "0 libevent_libevent_hullabaloo_README 50.36 \n", "1 zaach_jison_hullabaloo_README.md 44.78 \n", "2 lincolnloop_python-qrcode.git_hullabaloo_READM... 58.28 \n", "3 mati75_evilwm.git_hullabaloo_README 66.74 \n", "4 markdown-it_markdown-it_hullabaloo_README.md 45.76 \n", "\n", " flesch_kincaid_grade linsear_write_formula dale_chall_readability_score \\\n", "0 11.4 14.625000 12.47 \n", "1 13.5 7.714286 10.70 \n", "2 8.4 7.583333 10.76 \n", "3 7.2 5.888889 8.77 \n", "4 11.1 12.000000 12.84 \n", "\n", " mcalpine_eflaw reading_time char_count word_count t0 ... \\\n", "0 26.3 7.37 502 83 0.001976 ... \n", "1 37.2 33.05 2250 395 0.000489 ... \n", "2 18.3 6.82 464 78 0.002597 ... \n", "3 18.1 56.76 3864 742 0.000266 ... \n", "4 24.0 1.51 103 17 0.006993 ... \n", "\n", " effect group level \\\n", "0 ran_vals project_id libevent_libevent \n", "1 ran_vals project_id zaach_jison \n", "2 ran_vals project_id lincolnloop_python-qrcode.git \n", "3 ran_vals project_id mati75_evilwm.git \n", "4 ran_vals project_id markdown-it_markdown-it \n", "\n", " term estimate std.error conf.low conf.high \\\n", "0 before_after:week_index 0.186522 0.478091 -0.750519 1.123563 \n", "1 before_after:week_index 1.001879 0.217063 0.576443 1.427315 \n", "2 before_after:week_index -0.220654 0.679910 -1.553254 1.111946 \n", "3 before_after:week_index -0.293846 0.697316 -1.660560 1.072868 \n", "4 before_after:week_index 0.238615 0.602856 -0.942961 1.420190 \n", "\n", " ranef_grouping rank \n", "0 1 3229.0 \n", "1 2 4068.0 \n", "2 1 1467.0 \n", "3 1 1184.0 \n", "4 1 3459.0 \n", "\n", "[5 rows x 33 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "second_merge.head()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
flesch_reading_easereading_timeword_count
meanmedianmeanmedianmeanmedian
ranef_grouping
155.08970852.90023.4868869.28256.97340690.0
246.42590951.36522.7606429.53249.534759100.0
\n", "
" ], "text/plain": [ " flesch_reading_ease reading_time word_count \\\n", " mean median mean median mean \n", "ranef_grouping \n", "1 55.089708 52.900 23.486886 9.28 256.973406 \n", "2 46.425909 51.365 22.760642 9.53 249.534759 \n", "\n", " \n", " median \n", "ranef_grouping \n", "1 90.0 \n", "2 100.0 " ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "readability_aggregate = second_merge.groupby('ranef_grouping').agg({\n", " 'flesch_reading_ease' : ['mean', 'median'],\n", " 'reading_time' : ['mean', 'median'],\n", " 'word_count' : ['mean', 'median'],\n", "})\n", "\n", "readability_aggregate" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
t0t1t2t3t4t5t6t7t8t9t10
meanmeanmeanmeanmeanmeanmeanmeanmeanmeanmean
ranef_grouping
10.0951420.1013660.0846090.1270500.0248730.0601550.0886600.1361910.0604510.1344080.087095
20.1229120.1023890.0507820.1687740.0157520.0721540.1028390.1159940.0536740.1192780.075450
\n", "
" ], "text/plain": [ " t0 t1 t2 t3 t4 t5 \\\n", " mean mean mean mean mean mean \n", "ranef_grouping \n", "1 0.095142 0.101366 0.084609 0.127050 0.024873 0.060155 \n", "2 0.122912 0.102389 0.050782 0.168774 0.015752 0.072154 \n", "\n", " t6 t7 t8 t9 t10 \n", " mean mean mean mean mean \n", "ranef_grouping \n", "1 0.088660 0.136191 0.060451 0.134408 0.087095 \n", "2 0.102839 0.115994 0.053674 0.119278 0.075450 " ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "topic_aggregate = second_merge.groupby('ranef_grouping').agg({\n", " 't0' : ['mean'],\n", " 't1' : ['mean'],\n", " 't2' : ['mean'],\n", " 't3' : ['mean'],\n", " 't4' : ['mean'],\n", " 't5' : ['mean'],\n", " 't6' : ['mean'],\n", " 't7' : ['mean'],\n", " 't8' : ['mean'],\n", " 't9' : ['mean'],\n", " 't10': ['mean']\n", "})\n", "\n", "topic_aggregate" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 2 }