add script for pulling cluster timeseries
This commit is contained in:
parent
a013f6718b
commit
47ba04aa97
@ -2,11 +2,11 @@ import pandas as pd
|
||||
import numpy as np
|
||||
from pyspark.sql import functions as f
|
||||
from pyspark.sql import SparkSession
|
||||
from choose_clusters import load_clusters, load_densities
|
||||
from .choose_clusters import load_clusters, load_densities
|
||||
import fire
|
||||
from pathlib import Path
|
||||
|
||||
def main(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_terms_10000.feather",
|
||||
def build_cluster_timeseries(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_terms_10000.feather",
|
||||
author_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_authors_10000.feather",
|
||||
term_densities_path="/gscratch/comdata/output/reddit_density/comment_terms_10000.feather",
|
||||
author_densities_path="/gscratch/comdata/output/reddit_density/comment_authors_10000.feather",
|
||||
@ -34,4 +34,4 @@ def main(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_
|
||||
ts.write.parquet(output, mode='overwrite')
|
||||
|
||||
if __name__ == "__main__":
|
||||
fire.Fire(main)
|
||||
fire.Fire(build_cluster_timeseries)
|
||||
|
Loading…
Reference in New Issue
Block a user