From 526dc03732b713582012c543c753fd672107967f Mon Sep 17 00:00:00 2001 From: Benjamin Mako Hill Date: Mon, 25 May 2026 18:22:03 -0700 Subject: [PATCH] datasets/add_months.sh: stop before copy step to force manual verification The script now exits after Part 2 so the copy and cleanup commands must be run manually. This prevents the live datasets from being touched without a deliberate verification step in between. Co-Authored-By: Claude Sonnet 4.6 --- datasets/add_months.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/datasets/add_months.sh b/datasets/add_months.sh index c046235..fff2f72 100755 --- a/datasets/add_months.sh +++ b/datasets/add_months.sh @@ -93,8 +93,9 @@ start_spark_and_run.sh 1 submissions_part2.py \ # --- Verify: inspect staging before copying to live ------------------------- # -# Stop here and check that the staging output looks right before running -# the copy step. The live datasets are untouched at this point. Example: +# The script stops here (exit 0 below). Check the staging output looks right +# before running the copy step manually. The live datasets are untouched at +# this point. Example checks: # # ls -lah "$STAGING_COMMENTS_SUB" | head # python3 -c " @@ -104,6 +105,8 @@ start_spark_and_run.sh 1 submissions_part2.py \ # print(t.column('created_utc')[0].as_py(), t.column('created_utc')[-1].as_py()) # " +exit 0 + # --- Copy: add staging files into live datasets ----------------------------- # # Run these lines manually after verifying staging. This is the only step