1
0
govdoc-cr-analysis/text_analysis/quick_data_summary.ipynb

220 lines
6.4 KiB
Plaintext
Raw Normal View History

2025-02-03 04:51:49 +00:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import math"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"contributing_readability = pd.read_csv(\"020125_CONTRIBUTING_readability.csv\")\n",
"readme_readability = pd.read_csv(\"020125_README_readability.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>filename</th>\n",
" <th>flesch_reading_ease</th>\n",
" <th>flesch_kincaid_grade</th>\n",
" <th>linsear_write_formula</th>\n",
" <th>dale_chall_readability_score</th>\n",
" <th>mcalpine_eflaw</th>\n",
" <th>reading_time</th>\n",
" <th>char_count</th>\n",
" <th>word_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>pytest-dev_pytest-bdd.git_hullabaloo_CONTRIBUT...</td>\n",
" <td>40.95</td>\n",
" <td>10.9</td>\n",
" <td>10.6000</td>\n",
" <td>11.61</td>\n",
" <td>18.2</td>\n",
" <td>6.58</td>\n",
" <td>448</td>\n",
" <td>67</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>lxqt_qterminal.git_hullabaloo_CONTRIBUTING.md</td>\n",
" <td>51.14</td>\n",
" <td>9.0</td>\n",
" <td>8.0000</td>\n",
" <td>9.63</td>\n",
" <td>15.3</td>\n",
" <td>10.44</td>\n",
" <td>711</td>\n",
" <td>105</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>rbenv_ruby-build.git_hullabaloo_CONTRIBUTING.md</td>\n",
" <td>71.14</td>\n",
" <td>7.6</td>\n",
" <td>9.0000</td>\n",
" <td>10.05</td>\n",
" <td>27.0</td>\n",
" <td>1.16</td>\n",
" <td>79</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>muse-sequencer_muse.git_hullabaloo_CONTRIBUTING</td>\n",
" <td>-68.97</td>\n",
" <td>24.1</td>\n",
" <td>3.5000</td>\n",
" <td>25.99</td>\n",
" <td>6.0</td>\n",
" <td>1.09</td>\n",
" <td>74</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>mypaint_libmypaint_hullabaloo_CONTRIBUTING.md</td>\n",
" <td>51.04</td>\n",
" <td>9.1</td>\n",
" <td>6.5625</td>\n",
" <td>9.93</td>\n",
" <td>16.8</td>\n",
" <td>8.68</td>\n",
" <td>591</td>\n",
" <td>94</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" filename flesch_reading_ease \\\n",
"0 pytest-dev_pytest-bdd.git_hullabaloo_CONTRIBUT... 40.95 \n",
"1 lxqt_qterminal.git_hullabaloo_CONTRIBUTING.md 51.14 \n",
"2 rbenv_ruby-build.git_hullabaloo_CONTRIBUTING.md 71.14 \n",
"3 muse-sequencer_muse.git_hullabaloo_CONTRIBUTING -68.97 \n",
"4 mypaint_libmypaint_hullabaloo_CONTRIBUTING.md 51.04 \n",
"\n",
" flesch_kincaid_grade linsear_write_formula dale_chall_readability_score \\\n",
"0 10.9 10.6000 11.61 \n",
"1 9.0 8.0000 9.63 \n",
"2 7.6 9.0000 10.05 \n",
"3 24.1 3.5000 25.99 \n",
"4 9.1 6.5625 9.93 \n",
"\n",
" mcalpine_eflaw reading_time char_count word_count \n",
"0 18.2 6.58 448 67 \n",
"1 15.3 10.44 711 105 \n",
"2 27.0 1.16 79 17 \n",
"3 6.0 1.09 74 5 \n",
"4 16.8 8.68 591 94 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"contributing_readability.head()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"214.0"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"contributing_readability['word_count'].median()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"9.1"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"readme_readability['flesch_kincaid_grade'].median()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}