updating data collection

This commit is contained in:
Matthew Gaughan 2025-01-29 15:51:09 -06:00
parent eefc940a7c
commit ba42ca4ca7
3 changed files with 15 additions and 11 deletions

View File

@ -10,16 +10,17 @@ import argparse
''' '''
RUNNING INSTRUCTIONS: RUNNING INSTRUCTIONS:
[1] set up tmux environment, most likely also using venv within it [1] set up tmux environment, install requirements.txt
[2] edit this file where marked "FIX BELOW" [2] edit this file where marked "FIX BELOW"
[3] install pip packages [3] in your tmux environment, run the following commands to handle password prompts
[4] in your tmux environment, run the following three commands to handle password prompts - eval "$(ssh-agent -s)"
- ssh-add ~/.ssh/id_ed25519
- export GIT_SSH_COMMAND='ssh -o StrictHostKeyChecking=no' - export GIT_SSH_COMMAND='ssh -o StrictHostKeyChecking=no'
- export GIT_ASKPASS='false' - export GIT_ASKPASS='false'
- export GIT_TERMINAL_PROMPT = '0' - export GIT_TERMINAL_PROMPT='0'
[5] in tmux, run the script from the terminal as follows with your START and STOP values [4] in tmux, run the script from the terminal as follows with your START and STOP values
- python3 intermediary_script.py --start_index START --stop_index STOP - python3 intermediary_script.py --start_index START --stop_index STOP
[6] the password handling is imperfect, so I would appreciate if you could check on the script every so often in case anything hangs [5] the password handling is imperfect, so I would appreciate if you could check on the script every so often in case anything hangs
THANK YOU VERY MUCH - matt THANK YOU VERY MUCH - matt
''' '''
@ -209,7 +210,7 @@ def for_all_files(start_index, stop_index):
index=False, index=False,
) )
except Exception as e: except Exception as e:
clone_error.append([row[5], e]) clone_error.append([row[5], str(e)])
print(f"outside cloning error: {e}") print(f"outside cloning error: {e}")
finally: finally:
und_repo_id = "" und_repo_id = ""
@ -224,7 +225,7 @@ def for_all_files(start_index, stop_index):
print(clone_error) print(clone_error)
with open(f"{stop_index}-clone-error-output.txt", "w") as txt_file: with open(f"{stop_index}-clone-error-output.txt", "w") as txt_file:
for error in clone_error: for error in clone_error:
txt_file.write(error + "\n") txt_file.write(', '.join(error) + "\n")
with open(f"{stop_index}-success-output.txt", "w") as txt_file: with open(f"{stop_index}-success-output.txt", "w") as txt_file:
txt_file.write(f"Number of Empty Rows: {empty_row} \n") txt_file.write(f"Number of Empty Rows: {empty_row} \n")
txt_file.write(f"Number of Cloning Errors: {len(clone_error)} \n") txt_file.write(f"Number of Cloning Errors: {len(clone_error)} \n")

View File

@ -0,0 +1,3 @@
GitPython==3.1.40
pandas==2.1.2
tqdm==4.66.1

View File

@ -132,7 +132,7 @@ def diff_analysis(diffs):
def for_all_files(start_index, stop_index): def for_all_files(start_index, stop_index):
cwd = os.getcwd() cwd = os.getcwd()
csv_path = "../final_data/deb_full_data.csv" csv_path = "for_batching/deb_full_data.csv"
index = -1 index = -1
saved = [] saved = []
empty_row = 0 empty_row = 0
@ -209,7 +209,7 @@ def for_all_files(start_index, stop_index):
index=False, index=False,
) )
except Exception as e: except Exception as e:
clone_error.append([row[5], e]) clone_error.append([row[5], str(e)])
print(f"outside cloning error: {e}") print(f"outside cloning error: {e}")
finally: finally:
und_repo_id = "" und_repo_id = ""
@ -224,7 +224,7 @@ def for_all_files(start_index, stop_index):
print(clone_error) print(clone_error)
with open(f"{stop_index}-clone-error-output.txt", "w") as txt_file: with open(f"{stop_index}-clone-error-output.txt", "w") as txt_file:
for error in clone_error: for error in clone_error:
txt_file.write(error + "\n") txt_file.write(', '.join(error) + "\n")
with open(f"{stop_index}-success-output.txt", "w") as txt_file: with open(f"{stop_index}-success-output.txt", "w") as txt_file:
txt_file.write(f"Number of Empty Rows: {empty_row} \n") txt_file.write(f"Number of Empty Rows: {empty_row} \n")
txt_file.write(f"Number of Cloning Errors: {len(clone_error)} \n") txt_file.write(f"Number of Cloning Errors: {len(clone_error)} \n")