From 2decdc97502891a194df6b385935a71c2cc11edf Mon Sep 17 00:00:00 2001
From: Nathan TeBlunthuis <nathanteblunthuis@gmail.com>
Date: Wed, 27 Nov 2024 19:13:49 -0800
Subject: [PATCH] move function to outer scope.

---
 ngrams/term_frequencies.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/ngrams/term_frequencies.py b/ngrams/term_frequencies.py
index c30131c..9d43493 100755
--- a/ngrams/term_frequencies.py
+++ b/ngrams/term_frequencies.py
@@ -19,6 +19,17 @@ from pathlib import Path
 # taken from https://stackoverflow.com/questions/3809401/what-is-a-good-regular-expression-to-match-a-url
 urlregex = re.compile(r"[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)")
 
+def remove_punct(sentence):
+    new_sentence = []
+    for token in sentence:
+        new_token = ''
+        for c in token:
+            if c not in string.punctuation:
+                new_token += c
+        if len(new_token) > 0:
+            new_sentence.append(new_token)
+    return new_sentence
+
 def my_tokenizer(text):
     # remove stopwords, punctuation, urls, lower case
     # lowercase        
@@ -169,17 +180,6 @@ def weekly_tf(partition,
     else:
         mwe_tokenize = MWETokenizer().tokenize
 
-    def remove_punct(sentence):
-        new_sentence = []
-        for token in sentence:
-            new_token = ''
-            for c in token:
-                if c not in string.punctuation:
-                    new_token += c
-            if len(new_token) > 0:
-                new_sentence.append(new_token)
-        return new_sentence
-
     stopWords = set(stopwords.words('english'))
 
     # we follow the approach described in datta, phelan, adar 2017