updating scripts with camera ready information
This commit is contained in:
parent
2d9ce17e3a
commit
723dce0cf9
@ -8,10 +8,28 @@ import pandas as pd
|
||||
import datetime
|
||||
import argparse
|
||||
|
||||
'''
|
||||
RUNNING INSTRUCTIONS:
|
||||
[1] set up tmux environment
|
||||
[2] edit this file where marked "FIX BELOW"
|
||||
[3] install pip packages
|
||||
[4] in your tmux environment, run the following three commands
|
||||
- os.environ['GIT_SSH_COMMAND'] = 'ssh -o StrictHostKeyChecking=no'
|
||||
- os.environ['GIT_ASKPASS'] = 'false'
|
||||
- os.environ['GIT_TERMINAL_PROMPT'] = '0'
|
||||
[5] in tmux, run the script as follows with your START and STOP values
|
||||
- python3 intermediary_script.py --start_index START --stop_index STOP
|
||||
[6] the password handling is imperfect, so I would appreciate if you could check on the script every so often in case anything hangs
|
||||
|
||||
THANK YOU VERY MUCH - matt
|
||||
'''
|
||||
|
||||
#FIX BELOW: temp_dir is where the repositories will be temporarily cloned to, if you are worried about space, specify here
|
||||
temp_dir = "/data/users/mgaughan/tmp3/"
|
||||
cst = datetime.timezone(datetime.timedelta(hours=-6))
|
||||
from_date = datetime.datetime(1970, 1, 1, 12, 00, 00, tzinfo=cst)
|
||||
to_date = datetime.datetime(2024, 3, 16, 12, 00, 00, tzinfo=cst)
|
||||
#FIX BELOW: this is where the commit data will be stored, the below parent directory needs to contain the subdirs contributing_commit_data and readme_commit_data within them
|
||||
COMMIT_SAVE_PREFIX = "/data/users/mgaughan/kkex/012825_cam_revision_main/"
|
||||
|
||||
def temp_clone(vcs_link, temp_location):
|
||||
@ -126,9 +144,9 @@ def for_all_files(start_index, stop_index):
|
||||
lines = [line for line in file]
|
||||
for row in tqdm(csv.reader(lines), total=len(lines)):
|
||||
index += 1
|
||||
#time.sleep(5)
|
||||
if index < start_index:
|
||||
continue
|
||||
time.sleep(4)
|
||||
if row[0] == "":
|
||||
empty_row += 1
|
||||
continue
|
||||
@ -165,7 +183,7 @@ def for_all_files(start_index, stop_index):
|
||||
print(f'inside cloning error: {e}')
|
||||
raise ValueError(e)
|
||||
os.chdir(temp_repo_path)
|
||||
os.system(f"git checkout `git rev-list -n 1 --before='2024-03-16 12:00:00' master`")
|
||||
os.system(f"git checkout `git rev-list -n 1 --before='2024-03-16 12:00:00'`")
|
||||
os.chdir(cwd)
|
||||
has_readme_bool, has_contributing_bool = False, False
|
||||
for filename in os.listdir(temp_repo_path):
|
||||
@ -221,4 +239,6 @@ if __name__ == "__main__":
|
||||
args = parser.parse_args()
|
||||
for_all_files(args.start_index, args.stop_index)
|
||||
#temp_repo, temp_repo_path = temp_clone(" https://gitlab.gnome.org/GNOME/almanah", temp_dir)
|
||||
#delete_clone(temp_dir)
|
||||
#delete_clone(temp_dir)
|
||||
|
||||
#python3 intermediary_script.py --start_index START --stop_index STOP
|
BIN
R/012825_gam_introduction.png
Normal file
BIN
R/012825_gam_introduction.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 316 KiB |
@ -51,10 +51,8 @@ quantile(grouped_averages$x)
|
||||
quantile(all_actions_data$before_auth_new)
|
||||
quantile(all_actions_data$after_auth_new)
|
||||
|
||||
range(all_actions_data$log1p_count) # 0.000000 6.745236
|
||||
mean(all_actions_data$log1p_count) # 1.200043
|
||||
sd(all_actions_data$log1p_count)
|
||||
median(all_actions_data$log1p_count) # 0.6931472
|
||||
mean(all_actions_data$count) # 8.440981
|
||||
var(all_actions_data$count) #] 542.9546
|
||||
# now for merge
|
||||
mrg_actions_data$logged_count <- log(mrg_actions_data$count)
|
||||
mrg_actions_data$log1p_count <- log1p(mrg_actions_data$count)
|
||||
|
@ -68,14 +68,17 @@ doctypeColors <-
|
||||
, c("CONTRIBUTING", "README"))
|
||||
|
||||
time_plot <- all_actions_data |>
|
||||
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
|
||||
labs(x="Weekly Offset", y="Commit Count", color="Document Type") +
|
||||
ggplot(aes(x=week_offset, y=count, color=factor(document_type))) +
|
||||
scale_y_continuous(trans = 'log1p', labels = scales::comma) +
|
||||
labs(x="Weekly Offset", y="Commit Count", color="Document Type: ") +
|
||||
scale_color_manual(values = doctypeColors) +
|
||||
geom_smooth() +
|
||||
geom_vline(xintercept = 0)+
|
||||
theme_bw() +
|
||||
theme(legend.position = "top")
|
||||
time_plot
|
||||
|
||||
ggsave(filename = "012825_gam_introduction.png", plot = time_plot, width = 8, height = 6, dpi = 500)
|
||||
#code to change the axes
|
||||
|
||||
#scale_y_continuous(breaks = c(0, 0.5, 1.0, 1.5),
|
||||
|
@ -59,8 +59,8 @@ library(lme4)
|
||||
library(optimx)
|
||||
library(lattice)
|
||||
#some more EDA to go between Poisson and neg binomial
|
||||
var(all_actions_data$log1p_count) # 1.125429
|
||||
mean (all_actions_data$log1p_count) # 0.6426873
|
||||
var(all_actions_data$count) # 268.4449
|
||||
mean (all_actions_data$count) # 3.757298
|
||||
sd(all_actions_data$log1p_count)
|
||||
median(all_actions_data$log1p_count) #0
|
||||
var(all_actions_data$count) # 268.4449
|
||||
|
Loading…
Reference in New Issue
Block a user