Compare commits
	
		
			No commits in common. "49eaade66697a539ec7d40c804bff94dad9ce443" and "9477c2cbc05e5dd99a6089f9e221fe044dbc53ee" have entirely different histories.
		
	
	
		
			49eaade666
			...
			9477c2cbc0
		
	
		
							
								
								
									
										10
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										10
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -1,10 +0,0 @@ | |||||||
| # ignore the R studio docker image needed by hyak  |  | ||||||
| rstudio_latest.sif |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| # do not need to include any R items  |  | ||||||
| .Rhistory |  | ||||||
| .cache/ |  | ||||||
| .config/ |  | ||||||
| .local/ |  | ||||||
| 
 |  | ||||||
| @ -1,37 +0,0 @@ | |||||||
| library(tidyverse) |  | ||||||
| # data directory: /gscratch/comdata/users/mjilg/mw-repo-lifecycles/bot_activity_counts |  | ||||||
| # load in the paritioned directories |  | ||||||
| library(dplyr) |  | ||||||
| monthly_file_dir = "/gscratch/comdata/users/mjilg/mw-repo-lifecycles/bot_activity_counts/011625_dab_monthly/" |  | ||||||
| yearly_file_dir = "/gscratch/comdata/users/mjilg/mw-repo-lifecycles/bot_activity_counts/011625_dab_yearly/" |  | ||||||
| single_file_dir = "/gscratch/comdata/users/mjilg/mw-repo-lifecycles/bot_activity_counts/011625_dab_single/" |  | ||||||
| column_names <- c("wiki_db", "date", "event_entity", "event_action", "count") |  | ||||||
| # define a function to combing the multiple csv files in each directory |  | ||||||
| consolidate_csv <- function(directory, column_names) { |  | ||||||
|   file_list <- list.files(path = directory, pattern = "*.csv", full.names = TRUE) |  | ||||||
|   df_list <- lapply(file_list, function(file){ |  | ||||||
|     df = read.csv(file, header = FALSE)  |  | ||||||
|     colnames(df) <- column_names |  | ||||||
|     return(df) |  | ||||||
|     }) |  | ||||||
|   combined_df <- do.call(rbind, df_list) |  | ||||||
|   return(combined_df) |  | ||||||
| } |  | ||||||
| #apply the function to our three directories of data  |  | ||||||
| monthly_df <- consolidate_csv(monthly_file_dir, column_names) |  | ||||||
| yearly_df <- consolidate_csv(yearly_file_dir, column_names) |  | ||||||
| single_df <- consolidate_csv(single_file_dir, column_names) |  | ||||||
| #rbind  |  | ||||||
| combined_df <- rbind(monthly_df, yearly_df, single_df) |  | ||||||
| rm(monthly_df) |  | ||||||
| rm(yearly_df) |  | ||||||
| rm(single_df) |  | ||||||
| #making sure data columns are of the right type |  | ||||||
| combined_df <- combined_df |>  |  | ||||||
|   mutate(  |  | ||||||
|     wiki_db = as.factor(wiki_db), |  | ||||||
|     date = as.Date(date), |  | ||||||
|     event_entity = as.factor(event_entity), |  | ||||||
|     event_action = as.factor(event_action), |  | ||||||
|     count = as.numeric(count) |  | ||||||
|   ) |  | ||||||
| @ -1,100 +0,0 @@ | |||||||
| #!/bin/sh |  | ||||||
| 
 |  | ||||||
| #SBATCH --job-name=mgaughan-rstudio-server |  | ||||||
| #SBATCH --partition=cpu-g2-mem2x |  | ||||||
| 
 |  | ||||||
| #SBATCH --time=02:00:00 |  | ||||||
| #SBATCH --nodes=1 |  | ||||||
| #SBATCH --ntasks=4 |  | ||||||
| #SBATCH --mem=20G |  | ||||||
| 
 |  | ||||||
| #SBATCH --signal=USR2 |  | ||||||
| #SBATCH --output=%x_%j.out |  | ||||||
| 
 |  | ||||||
| # This script will request a single CPU with four threads with 20GB of RAM for 2 hours.  |  | ||||||
| # You can adjust --time, --nodes, --ntasks, and --mem above to adjust these settings for your session. |  | ||||||
| 
 |  | ||||||
| # --output=%x_%j.out creates a output file called rstudio-server_XXXXXXXX.out  |  | ||||||
| # where the %x is short hand for --job-name above and the X's are an 8-digit  |  | ||||||
| # jobID assigned by SLURM when our job is submitted. |  | ||||||
| 
 |  | ||||||
| RSTUDIO_CWD="/mmfs1/home/mjilg/git/mw-lifecycle-analysis" |  | ||||||
| RSTUDIO_SIF="rstudio_latest.sif" |  | ||||||
| 
 |  | ||||||
| # Create temp directory for ephemeral content to bind-mount in the container |  | ||||||
| RSTUDIO_TMP=$(/usr/bin/python3 -c 'import tempfile; print(tempfile.mkdtemp())') |  | ||||||
| 
 |  | ||||||
| mkdir -p -m 700 \ |  | ||||||
|         ${RSTUDIO_TMP}/run \ |  | ||||||
|         ${RSTUDIO_TMP}/tmp \ |  | ||||||
|         ${RSTUDIO_TMP}/var/lib/rstudio-server |  | ||||||
| 
 |  | ||||||
| cat > ${RSTUDIO_TMP}/database.conf <<END |  | ||||||
| provider=sqlite |  | ||||||
| directory=/var/lib/rstudio-server |  | ||||||
| END |  | ||||||
| 
 |  | ||||||
| # Set OMP_NUM_THREADS to prevent OpenBLAS (and any other OpenMP-enhanced |  | ||||||
| # libraries used by R) from spawning more threads than the number of processors |  | ||||||
| # allocated to the job. |  | ||||||
| # |  | ||||||
| # Set R_LIBS_USER to a path specific to rocker/rstudio to avoid conflicts with |  | ||||||
| # personal libraries from any R installation in the host environment |  | ||||||
| 
 |  | ||||||
| cat > ${RSTUDIO_TMP}/rsession.sh <<END |  | ||||||
| #!/bin/sh |  | ||||||
| 
 |  | ||||||
| export OMP_NUM_THREADS=${SLURM_JOB_CPUS_PER_NODE} |  | ||||||
| export R_LIBS_USER=/gscratch/scrubbed/mjilg/R |  | ||||||
| exec /usr/lib/rstudio-server/bin/rsession "\${@}" |  | ||||||
| END |  | ||||||
| 
 |  | ||||||
| chmod +x ${RSTUDIO_TMP}/rsession.sh |  | ||||||
| 
 |  | ||||||
| export APPTAINER_BIND="${RSTUDIO_CWD}:${RSTUDIO_CWD},/gscratch:/gscratch,${RSTUDIO_TMP}/run:/run,${RSTUDIO_TMP}/tmp:/tmp,${RSTUDIO_TMP}/database.conf:/etc/rstudio/database.conf,${RSTUDIO_TMP}/rsession.sh:/etc/rstudio/rsession.sh,${RSTUDIO_TMP}/var/lib/rstudio-server:/var/lib/rstudio-server" |  | ||||||
| 
 |  | ||||||
| # Do not suspend idle sessions. |  | ||||||
| # Alternative to setting session-timeout-minutes=0 in /etc/rstudio/rsession.conf |  | ||||||
| export APPTAINERENV_RSTUDIO_SESSION_TIMEOUT=0 |  | ||||||
| 
 |  | ||||||
| export APPTAINERENV_USER=$(id -un) |  | ||||||
| export APPTAINERENV_PASSWORD=$(openssl rand -base64 15) |  | ||||||
| 
 |  | ||||||
| # get unused socket per https://unix.stackexchange.com/a/132524 |  | ||||||
| # tiny race condition between the python & apptainer commands |  | ||||||
| readonly PORT=$(/mmfs1/sw/pyenv/versions/3.9.5/bin/python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1]); s.close()') |  | ||||||
| cat 1>&2 <<END |  | ||||||
| 1. SSH tunnel from your workstation using the following command: |  | ||||||
| 
 |  | ||||||
|    ssh -N -L 8787:${HOSTNAME}:${PORT} ${APPTAINERENV_USER}@klone.hyak.uw.edu |  | ||||||
| 
 |  | ||||||
|    and point your web browser to http://localhost:8787 |  | ||||||
| 
 |  | ||||||
| 2. log in to RStudio Server using the following credentials: |  | ||||||
| 
 |  | ||||||
|    user: ${APPTAINERENV_USER} |  | ||||||
|    password: ${APPTAINERENV_PASSWORD} |  | ||||||
| 
 |  | ||||||
| When done using RStudio Server, terminate the job by: |  | ||||||
| 
 |  | ||||||
| 1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window) |  | ||||||
| 2. Issue the following command on the login node: |  | ||||||
| 
 |  | ||||||
|       scancel -f ${SLURM_JOB_ID} |  | ||||||
| END |  | ||||||
| 
 |  | ||||||
| source /etc/bashrc |  | ||||||
| module load apptainer |  | ||||||
| 
 |  | ||||||
| apptainer exec --cleanenv --home ${RSTUDIO_CWD} ${RSTUDIO_CWD}/${RSTUDIO_SIF} \ |  | ||||||
|     rserver --www-port ${PORT} \ |  | ||||||
|             --auth-none=0 \ |  | ||||||
|             --auth-pam-helper-path=pam-helper \ |  | ||||||
|             --auth-stay-signed-in-days=30 \ |  | ||||||
|             --auth-timeout-minutes=0 \ |  | ||||||
|             --rsession-path=/etc/rstudio/rsession.sh \ |  | ||||||
|             --server-user=${APPTAINERENV_USER} |  | ||||||
| 
 |  | ||||||
| APPTAINER_EXIT_CODE=$? |  | ||||||
| echo "rserver exited $APPTAINER_EXIT_CODE" 1>&2 |  | ||||||
| exit $APPTAINER_EXIT_CODE |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user