no longer need to convert from spark dates into isoformat.

2024-12-28 13:55:54 -08:00 · 2024-12-28 13:55:54 -08:00 · 02ec11f726
commit 02ec11f726
parent 104b708ff6
1 changed files with 2 additions and 2 deletions
--- a/similarities/weekly_cosine_similarities.py
+++ b/similarities/weekly_cosine_similarities.py
@ -40,7 +40,7 @@ def _week_similarities(week, simfunc, tfidf_path, term_colname, included_subredd
                         term_colname=term_colname,
                         included_subreddits=included_subreddits,
                         topN=topN,
-                         week=week.isoformat(),
+                         week=week,
                         rescale_idf=False)
    
    tfidf_colname='tf_idf'
@ -78,7 +78,7 @@ def cosine_similarities_weekly(tfidf_path, outfile, term_colname, included_subre
    print(outfile)
    # do this step in parallel if we have the memory for it.
    # should be doable with pool.map
-    conn = duckdb.connect()
+    conn = duckdb.connect() 
    subreddit_names = conn.execute(f"SELECT DISTINCT subreddit, subreddit_id from read_parquet('{tfidf_path}/*/*.parquet') ORDER BY subreddit_id;").df()

    nterms = conn.execute(f"SELECT MAX({term_colname + '_id'}) as nterms FROM read_parquet('{tfidf_path}/*/*.parquet')").df()