{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os \n", "import textstat\n", "import csv" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "contributing_directory = \"/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/contributing/\"\n", "readme_directory = \"/data/users/mgaughan/kkex/012825_cam_revision_main/final_data/first_version_documents/readme/\"\n", "\n", "csv_fieldnames = [ 'filename', 'flesch_reading_ease', 'flesch_kincaid_grade', 'linsear_write_formula', 'dale_chall_readability_score', 'mcalpine_eflaw', 'reading_time', 'char_count', 'word_count']" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def generate_file(output_csv, wdirectory):\n", " with open(output_csv, 'w') as csvfile: \n", " writer = csv.DictWriter(csvfile, fieldnames = csv_fieldnames) \n", " writer.writeheader() \n", " files = os.listdir(wdirectory)\n", " for file in files:\n", " file_dict = {\"filename\": file}\n", " full_address = wdirectory + file\n", " file_dict = get_readibility(full_address, file_dict)\n", " writer.writerow(file_dict)\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def get_readibility(file_address, file_dict):\n", " file = open(file_address, \"r\", encoding='utf-8', errors=\"ignore\")\n", " document = file.read()\n", " file_dict['flesch_reading_ease'] = textstat.flesch_reading_ease(document)\n", " file_dict['flesch_kincaid_grade'] = textstat.flesch_kincaid_grade(document)\n", " file_dict['linsear_write_formula'] = textstat.linsear_write_formula(document)\n", " file_dict['dale_chall_readability_score'] = textstat.dale_chall_readability_score(document)\n", " file_dict['mcalpine_eflaw'] = textstat.mcalpine_eflaw(document)\n", " file_dict['reading_time'] = textstat.reading_time(document, ms_per_char=14.69)\n", " file_dict['char_count'] = textstat.char_count(document, ignore_spaces=True)\n", " file_dict['word_count'] = textstat.lexicon_count(document, removepunct=True)\n", " return file_dict" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "generate_file('020125_CONTRIBUTING_readability.csv', contributing_directory)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "generate_file('020325_README_readability.csv', readme_directory)" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 2 }