updating data collection
This commit is contained in:
parent
eefc940a7c
commit
ba42ca4ca7
@ -10,16 +10,17 @@ import argparse
|
||||
|
||||
'''
|
||||
RUNNING INSTRUCTIONS:
|
||||
[1] set up tmux environment, most likely also using venv within it
|
||||
[1] set up tmux environment, install requirements.txt
|
||||
[2] edit this file where marked "FIX BELOW"
|
||||
[3] install pip packages
|
||||
[4] in your tmux environment, run the following three commands to handle password prompts
|
||||
[3] in your tmux environment, run the following commands to handle password prompts
|
||||
- eval "$(ssh-agent -s)"
|
||||
- ssh-add ~/.ssh/id_ed25519
|
||||
- export GIT_SSH_COMMAND='ssh -o StrictHostKeyChecking=no'
|
||||
- export GIT_ASKPASS='false'
|
||||
- export GIT_TERMINAL_PROMPT='0'
|
||||
[5] in tmux, run the script from the terminal as follows with your START and STOP values
|
||||
[4] in tmux, run the script from the terminal as follows with your START and STOP values
|
||||
- python3 intermediary_script.py --start_index START --stop_index STOP
|
||||
[6] the password handling is imperfect, so I would appreciate if you could check on the script every so often in case anything hangs
|
||||
[5] the password handling is imperfect, so I would appreciate if you could check on the script every so often in case anything hangs
|
||||
|
||||
THANK YOU VERY MUCH - matt
|
||||
'''
|
||||
@ -209,7 +210,7 @@ def for_all_files(start_index, stop_index):
|
||||
index=False,
|
||||
)
|
||||
except Exception as e:
|
||||
clone_error.append([row[5], e])
|
||||
clone_error.append([row[5], str(e)])
|
||||
print(f"outside cloning error: {e}")
|
||||
finally:
|
||||
und_repo_id = ""
|
||||
@ -224,7 +225,7 @@ def for_all_files(start_index, stop_index):
|
||||
print(clone_error)
|
||||
with open(f"{stop_index}-clone-error-output.txt", "w") as txt_file:
|
||||
for error in clone_error:
|
||||
txt_file.write(error + "\n")
|
||||
txt_file.write(', '.join(error) + "\n")
|
||||
with open(f"{stop_index}-success-output.txt", "w") as txt_file:
|
||||
txt_file.write(f"Number of Empty Rows: {empty_row} \n")
|
||||
txt_file.write(f"Number of Cloning Errors: {len(clone_error)} \n")
|
||||
|
3
12825_revision/for_batching/requirements.txt
Normal file
3
12825_revision/for_batching/requirements.txt
Normal file
@ -0,0 +1,3 @@
|
||||
GitPython==3.1.40
|
||||
pandas==2.1.2
|
||||
tqdm==4.66.1
|
@ -132,7 +132,7 @@ def diff_analysis(diffs):
|
||||
|
||||
def for_all_files(start_index, stop_index):
|
||||
cwd = os.getcwd()
|
||||
csv_path = "../final_data/deb_full_data.csv"
|
||||
csv_path = "for_batching/deb_full_data.csv"
|
||||
index = -1
|
||||
saved = []
|
||||
empty_row = 0
|
||||
@ -209,7 +209,7 @@ def for_all_files(start_index, stop_index):
|
||||
index=False,
|
||||
)
|
||||
except Exception as e:
|
||||
clone_error.append([row[5], e])
|
||||
clone_error.append([row[5], str(e)])
|
||||
print(f"outside cloning error: {e}")
|
||||
finally:
|
||||
und_repo_id = ""
|
||||
@ -224,7 +224,7 @@ def for_all_files(start_index, stop_index):
|
||||
print(clone_error)
|
||||
with open(f"{stop_index}-clone-error-output.txt", "w") as txt_file:
|
||||
for error in clone_error:
|
||||
txt_file.write(error + "\n")
|
||||
txt_file.write(', '.join(error) + "\n")
|
||||
with open(f"{stop_index}-success-output.txt", "w") as txt_file:
|
||||
txt_file.write(f"Number of Empty Rows: {empty_row} \n")
|
||||
txt_file.write(f"Number of Cloning Errors: {len(clone_error)} \n")
|
||||
|
Loading…
Reference in New Issue
Block a user