Merge branch 'excise_reindex' of code:cdsc_reddit into excise_reindex
This commit is contained in:
commit
7b14db67de
2
__init__.py
Normal file
2
__init__.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
from .timeseries import load_clusters, load_densities, build_cluster_timeseries
|
||||||
|
|
2
timeseries/__init__.py
Normal file
2
timeseries/__init__.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
from .choose_clusters import load_clusters, load_densities
|
||||||
|
from .cluster_timeseries import build_cluster_timeseries
|
@ -2,11 +2,11 @@ import pandas as pd
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from pyspark.sql import functions as f
|
from pyspark.sql import functions as f
|
||||||
from pyspark.sql import SparkSession
|
from pyspark.sql import SparkSession
|
||||||
from choose_clusters import load_clusters, load_densities
|
from .choose_clusters import load_clusters, load_densities
|
||||||
import fire
|
import fire
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
def main(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_terms_10000.feather",
|
def build_cluster_timeseries(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_terms_10000.feather",
|
||||||
author_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_authors_10000.feather",
|
author_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_authors_10000.feather",
|
||||||
term_densities_path="/gscratch/comdata/output/reddit_density/comment_terms_10000.feather",
|
term_densities_path="/gscratch/comdata/output/reddit_density/comment_terms_10000.feather",
|
||||||
author_densities_path="/gscratch/comdata/output/reddit_density/comment_authors_10000.feather",
|
author_densities_path="/gscratch/comdata/output/reddit_density/comment_authors_10000.feather",
|
||||||
@ -34,4 +34,4 @@ def main(term_clusters_path="/gscratch/comdata/output/reddit_clustering/comment_
|
|||||||
ts.write.parquet(output, mode='overwrite')
|
ts.write.parquet(output, mode='overwrite')
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
fire.Fire(main)
|
fire.Fire(build_cluster_timeseries)
|
||||||
|
Loading…
Reference in New Issue
Block a user