Compare commits
No commits in common. "49eaade66697a539ec7d40c804bff94dad9ce443" and "9477c2cbc05e5dd99a6089f9e221fe044dbc53ee" have entirely different histories.
49eaade666
...
9477c2cbc0
10
.gitignore
vendored
10
.gitignore
vendored
@ -1,10 +0,0 @@
|
||||
# ignore the R studio docker image needed by hyak
|
||||
rstudio_latest.sif
|
||||
|
||||
|
||||
# do not need to include any R items
|
||||
.Rhistory
|
||||
.cache/
|
||||
.config/
|
||||
.local/
|
||||
|
@ -1,37 +0,0 @@
|
||||
library(tidyverse)
|
||||
# data directory: /gscratch/comdata/users/mjilg/mw-repo-lifecycles/bot_activity_counts
|
||||
# load in the paritioned directories
|
||||
library(dplyr)
|
||||
monthly_file_dir = "/gscratch/comdata/users/mjilg/mw-repo-lifecycles/bot_activity_counts/011625_dab_monthly/"
|
||||
yearly_file_dir = "/gscratch/comdata/users/mjilg/mw-repo-lifecycles/bot_activity_counts/011625_dab_yearly/"
|
||||
single_file_dir = "/gscratch/comdata/users/mjilg/mw-repo-lifecycles/bot_activity_counts/011625_dab_single/"
|
||||
column_names <- c("wiki_db", "date", "event_entity", "event_action", "count")
|
||||
# define a function to combing the multiple csv files in each directory
|
||||
consolidate_csv <- function(directory, column_names) {
|
||||
file_list <- list.files(path = directory, pattern = "*.csv", full.names = TRUE)
|
||||
df_list <- lapply(file_list, function(file){
|
||||
df = read.csv(file, header = FALSE)
|
||||
colnames(df) <- column_names
|
||||
return(df)
|
||||
})
|
||||
combined_df <- do.call(rbind, df_list)
|
||||
return(combined_df)
|
||||
}
|
||||
#apply the function to our three directories of data
|
||||
monthly_df <- consolidate_csv(monthly_file_dir, column_names)
|
||||
yearly_df <- consolidate_csv(yearly_file_dir, column_names)
|
||||
single_df <- consolidate_csv(single_file_dir, column_names)
|
||||
#rbind
|
||||
combined_df <- rbind(monthly_df, yearly_df, single_df)
|
||||
rm(monthly_df)
|
||||
rm(yearly_df)
|
||||
rm(single_df)
|
||||
#making sure data columns are of the right type
|
||||
combined_df <- combined_df |>
|
||||
mutate(
|
||||
wiki_db = as.factor(wiki_db),
|
||||
date = as.Date(date),
|
||||
event_entity = as.factor(event_entity),
|
||||
event_action = as.factor(event_action),
|
||||
count = as.numeric(count)
|
||||
)
|
@ -1,100 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
#SBATCH --job-name=mgaughan-rstudio-server
|
||||
#SBATCH --partition=cpu-g2-mem2x
|
||||
|
||||
#SBATCH --time=02:00:00
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --ntasks=4
|
||||
#SBATCH --mem=20G
|
||||
|
||||
#SBATCH --signal=USR2
|
||||
#SBATCH --output=%x_%j.out
|
||||
|
||||
# This script will request a single CPU with four threads with 20GB of RAM for 2 hours.
|
||||
# You can adjust --time, --nodes, --ntasks, and --mem above to adjust these settings for your session.
|
||||
|
||||
# --output=%x_%j.out creates a output file called rstudio-server_XXXXXXXX.out
|
||||
# where the %x is short hand for --job-name above and the X's are an 8-digit
|
||||
# jobID assigned by SLURM when our job is submitted.
|
||||
|
||||
RSTUDIO_CWD="/mmfs1/home/mjilg/git/mw-lifecycle-analysis"
|
||||
RSTUDIO_SIF="rstudio_latest.sif"
|
||||
|
||||
# Create temp directory for ephemeral content to bind-mount in the container
|
||||
RSTUDIO_TMP=$(/usr/bin/python3 -c 'import tempfile; print(tempfile.mkdtemp())')
|
||||
|
||||
mkdir -p -m 700 \
|
||||
${RSTUDIO_TMP}/run \
|
||||
${RSTUDIO_TMP}/tmp \
|
||||
${RSTUDIO_TMP}/var/lib/rstudio-server
|
||||
|
||||
cat > ${RSTUDIO_TMP}/database.conf <<END
|
||||
provider=sqlite
|
||||
directory=/var/lib/rstudio-server
|
||||
END
|
||||
|
||||
# Set OMP_NUM_THREADS to prevent OpenBLAS (and any other OpenMP-enhanced
|
||||
# libraries used by R) from spawning more threads than the number of processors
|
||||
# allocated to the job.
|
||||
#
|
||||
# Set R_LIBS_USER to a path specific to rocker/rstudio to avoid conflicts with
|
||||
# personal libraries from any R installation in the host environment
|
||||
|
||||
cat > ${RSTUDIO_TMP}/rsession.sh <<END
|
||||
#!/bin/sh
|
||||
|
||||
export OMP_NUM_THREADS=${SLURM_JOB_CPUS_PER_NODE}
|
||||
export R_LIBS_USER=/gscratch/scrubbed/mjilg/R
|
||||
exec /usr/lib/rstudio-server/bin/rsession "\${@}"
|
||||
END
|
||||
|
||||
chmod +x ${RSTUDIO_TMP}/rsession.sh
|
||||
|
||||
export APPTAINER_BIND="${RSTUDIO_CWD}:${RSTUDIO_CWD},/gscratch:/gscratch,${RSTUDIO_TMP}/run:/run,${RSTUDIO_TMP}/tmp:/tmp,${RSTUDIO_TMP}/database.conf:/etc/rstudio/database.conf,${RSTUDIO_TMP}/rsession.sh:/etc/rstudio/rsession.sh,${RSTUDIO_TMP}/var/lib/rstudio-server:/var/lib/rstudio-server"
|
||||
|
||||
# Do not suspend idle sessions.
|
||||
# Alternative to setting session-timeout-minutes=0 in /etc/rstudio/rsession.conf
|
||||
export APPTAINERENV_RSTUDIO_SESSION_TIMEOUT=0
|
||||
|
||||
export APPTAINERENV_USER=$(id -un)
|
||||
export APPTAINERENV_PASSWORD=$(openssl rand -base64 15)
|
||||
|
||||
# get unused socket per https://unix.stackexchange.com/a/132524
|
||||
# tiny race condition between the python & apptainer commands
|
||||
readonly PORT=$(/mmfs1/sw/pyenv/versions/3.9.5/bin/python -c 'import socket; s=socket.socket(); s.bind(("", 0)); print(s.getsockname()[1]); s.close()')
|
||||
cat 1>&2 <<END
|
||||
1. SSH tunnel from your workstation using the following command:
|
||||
|
||||
ssh -N -L 8787:${HOSTNAME}:${PORT} ${APPTAINERENV_USER}@klone.hyak.uw.edu
|
||||
|
||||
and point your web browser to http://localhost:8787
|
||||
|
||||
2. log in to RStudio Server using the following credentials:
|
||||
|
||||
user: ${APPTAINERENV_USER}
|
||||
password: ${APPTAINERENV_PASSWORD}
|
||||
|
||||
When done using RStudio Server, terminate the job by:
|
||||
|
||||
1. Exit the RStudio Session ("power" button in the top right corner of the RStudio window)
|
||||
2. Issue the following command on the login node:
|
||||
|
||||
scancel -f ${SLURM_JOB_ID}
|
||||
END
|
||||
|
||||
source /etc/bashrc
|
||||
module load apptainer
|
||||
|
||||
apptainer exec --cleanenv --home ${RSTUDIO_CWD} ${RSTUDIO_CWD}/${RSTUDIO_SIF} \
|
||||
rserver --www-port ${PORT} \
|
||||
--auth-none=0 \
|
||||
--auth-pam-helper-path=pam-helper \
|
||||
--auth-stay-signed-in-days=30 \
|
||||
--auth-timeout-minutes=0 \
|
||||
--rsession-path=/etc/rstudio/rsession.sh \
|
||||
--server-user=${APPTAINERENV_USER}
|
||||
|
||||
APPTAINER_EXIT_CODE=$?
|
||||
echo "rserver exited $APPTAINER_EXIT_CODE" 1>&2
|
||||
exit $APPTAINER_EXIT_CODE
|
Loading…
Reference in New Issue
Block a user