From 6cb3296e8e061ef4b380d03154e7f741aa32043c Mon Sep 17 00:00:00 2001 From: Matthew Gaughan Date: Wed, 11 Dec 2024 17:20:26 -0600 Subject: [PATCH] getting kibo set for data collection --- src/lib/get_wiki_activity.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/lib/get_wiki_activity.py b/src/lib/get_wiki_activity.py index 9045a96..54a882c 100644 --- a/src/lib/get_wiki_activity.py +++ b/src/lib/get_wiki_activity.py @@ -4,6 +4,12 @@ import re import json import pandas as pd +import findspark +findspark.init("/home/SOC.NORTHWESTERN.EDU/nws8519/spark-3.2.0-bin-hadoop3.2") +from pyspark.sql import SparkSession +from pyspark.sql import functions as F, types as T, Window +#breaking +import wmfdata.spark as wmfspark # TODO Get a list of bots in the project # TODO get all mws wikis