From a013f6718bf221fd870fcaff36d6724d736a0766 Mon Sep 17 00:00:00 2001 From: Nate E TeBlunthuis Date: Wed, 24 Mar 2021 17:18:30 -0700 Subject: [PATCH 1/2] export timeseries functions --- __init__.py | 2 ++ timeseries/__init__.py | 2 ++ 2 files changed, 4 insertions(+) create mode 100644 __init__.py create mode 100644 timeseries/__init__.py diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..dbb8061 --- /dev/null +++ b/__init__.py @@ -0,0 +1,2 @@ +from .timeseries import load_clusters, load_densities, build_cluster_timeseries + diff --git a/timeseries/__init__.py b/timeseries/__init__.py new file mode 100644 index 0000000..c023c66 --- /dev/null +++ b/timeseries/__init__.py @@ -0,0 +1,2 @@ +from .choose_clusters import load_clusters, load_densities +from .cluster_timeseries import build_cluster_timeseries From 47ba04aa9715325a67fe17cee205230b042022fe Mon Sep 17 00:00:00 2001 From: Nate E TeBlunthuis Date: Mon, 10 May 2021 18:24:22 -0700 Subject: [PATCH 2/2] add script for pulling cluster timeseries --- timeseries/cluster_timeseries.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/timeseries/cluster_timeseries.py b/timeseries/cluster_timeseries.py index 07507d7..91fa705 100644 --- a/timeseries/cluster_timeseries.py +++ b/timeseries/cluster_timeseries.py @@ -2,11 +2,11 @@ import pandas as pd import numpy as np from pyspark.sql import functions as f from pyspark.sql import SparkSession -from choose_clusters import load_clusters, load_densities +from .choose_clusters import load_clusters, load_densities import fire from pathlib import Path -def main(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_terms_10000.feather", +def build_cluster_timeseries(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_terms_10000.feather", author_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_authors_10000.feather", term_densities_path="/gscratch/comdata/output/reddit_density/comment_terms_10000.feather", author_densities_path="/gscratch/comdata/output/reddit_density/comment_authors_10000.feather", @@ -34,4 +34,4 @@ def main(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_ ts.write.parquet(output, mode='overwrite') if __name__ == "__main__": - fire.Fire(main) + fire.Fire(build_cluster_timeseries)