24_deb_pkg_gov/text_analysis/getMetadata.py

17 lines
419 B
Python
Raw Normal View History

2024-04-30 18:49:28 +00:00
import csv
import os
import nltk
import pandas as pd
from statistics import mean, median
import json
def metadata_for_file(file):
word_list = file.split()
word_count = len(word_list)
#print(word_list)
if word_count == 0:
avg_word_length = 0
else:
avg_word_length = sum(map(len, word_list)) / len(word_list)
#return number of paragraphs
return word_count, avg_word_length