updating scripts with camera ready information
This commit is contained in:
parent
2d9ce17e3a
commit
723dce0cf9
@ -8,10 +8,28 @@ import pandas as pd
|
|||||||
import datetime
|
import datetime
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
|
'''
|
||||||
|
RUNNING INSTRUCTIONS:
|
||||||
|
[1] set up tmux environment
|
||||||
|
[2] edit this file where marked "FIX BELOW"
|
||||||
|
[3] install pip packages
|
||||||
|
[4] in your tmux environment, run the following three commands
|
||||||
|
- os.environ['GIT_SSH_COMMAND'] = 'ssh -o StrictHostKeyChecking=no'
|
||||||
|
- os.environ['GIT_ASKPASS'] = 'false'
|
||||||
|
- os.environ['GIT_TERMINAL_PROMPT'] = '0'
|
||||||
|
[5] in tmux, run the script as follows with your START and STOP values
|
||||||
|
- python3 intermediary_script.py --start_index START --stop_index STOP
|
||||||
|
[6] the password handling is imperfect, so I would appreciate if you could check on the script every so often in case anything hangs
|
||||||
|
|
||||||
|
THANK YOU VERY MUCH - matt
|
||||||
|
'''
|
||||||
|
|
||||||
|
#FIX BELOW: temp_dir is where the repositories will be temporarily cloned to, if you are worried about space, specify here
|
||||||
temp_dir = "/data/users/mgaughan/tmp3/"
|
temp_dir = "/data/users/mgaughan/tmp3/"
|
||||||
cst = datetime.timezone(datetime.timedelta(hours=-6))
|
cst = datetime.timezone(datetime.timedelta(hours=-6))
|
||||||
from_date = datetime.datetime(1970, 1, 1, 12, 00, 00, tzinfo=cst)
|
from_date = datetime.datetime(1970, 1, 1, 12, 00, 00, tzinfo=cst)
|
||||||
to_date = datetime.datetime(2024, 3, 16, 12, 00, 00, tzinfo=cst)
|
to_date = datetime.datetime(2024, 3, 16, 12, 00, 00, tzinfo=cst)
|
||||||
|
#FIX BELOW: this is where the commit data will be stored, the below parent directory needs to contain the subdirs contributing_commit_data and readme_commit_data within them
|
||||||
COMMIT_SAVE_PREFIX = "/data/users/mgaughan/kkex/012825_cam_revision_main/"
|
COMMIT_SAVE_PREFIX = "/data/users/mgaughan/kkex/012825_cam_revision_main/"
|
||||||
|
|
||||||
def temp_clone(vcs_link, temp_location):
|
def temp_clone(vcs_link, temp_location):
|
||||||
@ -126,9 +144,9 @@ def for_all_files(start_index, stop_index):
|
|||||||
lines = [line for line in file]
|
lines = [line for line in file]
|
||||||
for row in tqdm(csv.reader(lines), total=len(lines)):
|
for row in tqdm(csv.reader(lines), total=len(lines)):
|
||||||
index += 1
|
index += 1
|
||||||
#time.sleep(5)
|
|
||||||
if index < start_index:
|
if index < start_index:
|
||||||
continue
|
continue
|
||||||
|
time.sleep(4)
|
||||||
if row[0] == "":
|
if row[0] == "":
|
||||||
empty_row += 1
|
empty_row += 1
|
||||||
continue
|
continue
|
||||||
@ -165,7 +183,7 @@ def for_all_files(start_index, stop_index):
|
|||||||
print(f'inside cloning error: {e}')
|
print(f'inside cloning error: {e}')
|
||||||
raise ValueError(e)
|
raise ValueError(e)
|
||||||
os.chdir(temp_repo_path)
|
os.chdir(temp_repo_path)
|
||||||
os.system(f"git checkout `git rev-list -n 1 --before='2024-03-16 12:00:00' master`")
|
os.system(f"git checkout `git rev-list -n 1 --before='2024-03-16 12:00:00'`")
|
||||||
os.chdir(cwd)
|
os.chdir(cwd)
|
||||||
has_readme_bool, has_contributing_bool = False, False
|
has_readme_bool, has_contributing_bool = False, False
|
||||||
for filename in os.listdir(temp_repo_path):
|
for filename in os.listdir(temp_repo_path):
|
||||||
@ -222,3 +240,5 @@ if __name__ == "__main__":
|
|||||||
for_all_files(args.start_index, args.stop_index)
|
for_all_files(args.start_index, args.stop_index)
|
||||||
#temp_repo, temp_repo_path = temp_clone(" https://gitlab.gnome.org/GNOME/almanah", temp_dir)
|
#temp_repo, temp_repo_path = temp_clone(" https://gitlab.gnome.org/GNOME/almanah", temp_dir)
|
||||||
#delete_clone(temp_dir)
|
#delete_clone(temp_dir)
|
||||||
|
|
||||||
|
#python3 intermediary_script.py --start_index START --stop_index STOP
|
BIN
R/012825_gam_introduction.png
Normal file
BIN
R/012825_gam_introduction.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 316 KiB |
@ -51,10 +51,8 @@ quantile(grouped_averages$x)
|
|||||||
quantile(all_actions_data$before_auth_new)
|
quantile(all_actions_data$before_auth_new)
|
||||||
quantile(all_actions_data$after_auth_new)
|
quantile(all_actions_data$after_auth_new)
|
||||||
|
|
||||||
range(all_actions_data$log1p_count) # 0.000000 6.745236
|
mean(all_actions_data$count) # 8.440981
|
||||||
mean(all_actions_data$log1p_count) # 1.200043
|
var(all_actions_data$count) #] 542.9546
|
||||||
sd(all_actions_data$log1p_count)
|
|
||||||
median(all_actions_data$log1p_count) # 0.6931472
|
|
||||||
# now for merge
|
# now for merge
|
||||||
mrg_actions_data$logged_count <- log(mrg_actions_data$count)
|
mrg_actions_data$logged_count <- log(mrg_actions_data$count)
|
||||||
mrg_actions_data$log1p_count <- log1p(mrg_actions_data$count)
|
mrg_actions_data$log1p_count <- log1p(mrg_actions_data$count)
|
||||||
|
@ -68,14 +68,17 @@ doctypeColors <-
|
|||||||
, c("CONTRIBUTING", "README"))
|
, c("CONTRIBUTING", "README"))
|
||||||
|
|
||||||
time_plot <- all_actions_data |>
|
time_plot <- all_actions_data |>
|
||||||
ggplot(aes(x=week_offset, y=log1p_count, color=factor(document_type))) +
|
ggplot(aes(x=week_offset, y=count, color=factor(document_type))) +
|
||||||
labs(x="Weekly Offset", y="Commit Count", color="Document Type") +
|
scale_y_continuous(trans = 'log1p', labels = scales::comma) +
|
||||||
|
labs(x="Weekly Offset", y="Commit Count", color="Document Type: ") +
|
||||||
scale_color_manual(values = doctypeColors) +
|
scale_color_manual(values = doctypeColors) +
|
||||||
geom_smooth() +
|
geom_smooth() +
|
||||||
geom_vline(xintercept = 0)+
|
geom_vline(xintercept = 0)+
|
||||||
theme_bw() +
|
theme_bw() +
|
||||||
theme(legend.position = "top")
|
theme(legend.position = "top")
|
||||||
time_plot
|
time_plot
|
||||||
|
|
||||||
|
ggsave(filename = "012825_gam_introduction.png", plot = time_plot, width = 8, height = 6, dpi = 500)
|
||||||
#code to change the axes
|
#code to change the axes
|
||||||
|
|
||||||
#scale_y_continuous(breaks = c(0, 0.5, 1.0, 1.5),
|
#scale_y_continuous(breaks = c(0, 0.5, 1.0, 1.5),
|
||||||
|
@ -59,8 +59,8 @@ library(lme4)
|
|||||||
library(optimx)
|
library(optimx)
|
||||||
library(lattice)
|
library(lattice)
|
||||||
#some more EDA to go between Poisson and neg binomial
|
#some more EDA to go between Poisson and neg binomial
|
||||||
var(all_actions_data$log1p_count) # 1.125429
|
var(all_actions_data$count) # 268.4449
|
||||||
mean (all_actions_data$log1p_count) # 0.6426873
|
mean (all_actions_data$count) # 3.757298
|
||||||
sd(all_actions_data$log1p_count)
|
sd(all_actions_data$log1p_count)
|
||||||
median(all_actions_data$log1p_count) #0
|
median(all_actions_data$log1p_count) #0
|
||||||
var(all_actions_data$count) # 268.4449
|
var(all_actions_data$count) # 268.4449
|
||||||
|
Loading…
Reference in New Issue
Block a user