diff --git a/datasets/add_months.sh b/datasets/add_months.sh index c046235..fff2f72 100755 --- a/datasets/add_months.sh +++ b/datasets/add_months.sh @@ -93,8 +93,9 @@ start_spark_and_run.sh 1 submissions_part2.py \ # --- Verify: inspect staging before copying to live ------------------------- # -# Stop here and check that the staging output looks right before running -# the copy step. The live datasets are untouched at this point. Example: +# The script stops here (exit 0 below). Check the staging output looks right +# before running the copy step manually. The live datasets are untouched at +# this point. Example checks: # # ls -lah "$STAGING_COMMENTS_SUB" | head # python3 -c " @@ -104,6 +105,8 @@ start_spark_and_run.sh 1 submissions_part2.py \ # print(t.column('created_utc')[0].as_py(), t.column('created_utc')[-1].as_py()) # " +exit 0 + # --- Copy: add staging files into live datasets ----------------------------- # # Run these lines manually after verifying staging. This is the only step