add the rest of the code.

This commit is contained in:
2024-03-12 09:39:12 -07:00
parent 29abd26b97
commit 2c733a8788
15 changed files with 1909 additions and 0 deletions

37
Makefile Normal file
View File

@@ -0,0 +1,37 @@
SHELL:=/bin/bash
data/20200301_article_labelings.json_SUCCESS:
./run_aql_jobs.sh
data/20200301_article_labelings_sample.json:sample_training_labels.py
source ./bin/activate; \
./sample_training_labels.py
data/article_sample.csv:sample_articles.py
source ./bin/activate; \
start_spark_and_run.sh 1 sample_articles.py
data/scored_article_sample.feather:data/article_sample_set.csv ores_scores_sample.py
source ./bin/activate; \
python3 ores_scores_sample.py data/article_sample_set.parquet data/scored_article_sample.feather
# run this step on kibo
data/20200301_al_sample_revisions.w_text.json:data/20200301_article_labelings_sample.json
source ./bin/activate; \
python3 articlequality/utility fetch_text \
--api-host=https://en.wikipedia.org \
--labelings=data/20200301_article_labelings_sample.json \
--output=data/20200301_al_sample_revisions.w_text.json \
# run this step on kibo
data/20200301_al_sample_revisions.w_scores.json:data/20200301_al_sample_revisions.w_text.json
python3 score_sample_labels.py
models/ordinal_quality.RDS:data/20200301_al_sample_revisions.w_text.json ordinal_quality_models.R
Rscript ordinal_quality_models.R
PHONY: data/20200301_article_labelings.json