updating data collection
This commit is contained in:
parent
eefc940a7c
commit
ba42ca4ca7
@ -10,16 +10,17 @@ import argparse
|
|||||||
|
|
||||||
'''
|
'''
|
||||||
RUNNING INSTRUCTIONS:
|
RUNNING INSTRUCTIONS:
|
||||||
[1] set up tmux environment, most likely also using venv within it
|
[1] set up tmux environment, install requirements.txt
|
||||||
[2] edit this file where marked "FIX BELOW"
|
[2] edit this file where marked "FIX BELOW"
|
||||||
[3] install pip packages
|
[3] in your tmux environment, run the following commands to handle password prompts
|
||||||
[4] in your tmux environment, run the following three commands to handle password prompts
|
- eval "$(ssh-agent -s)"
|
||||||
|
- ssh-add ~/.ssh/id_ed25519
|
||||||
- export GIT_SSH_COMMAND='ssh -o StrictHostKeyChecking=no'
|
- export GIT_SSH_COMMAND='ssh -o StrictHostKeyChecking=no'
|
||||||
- export GIT_ASKPASS='false'
|
- export GIT_ASKPASS='false'
|
||||||
- export GIT_TERMINAL_PROMPT = '0'
|
- export GIT_TERMINAL_PROMPT='0'
|
||||||
[5] in tmux, run the script from the terminal as follows with your START and STOP values
|
[4] in tmux, run the script from the terminal as follows with your START and STOP values
|
||||||
- python3 intermediary_script.py --start_index START --stop_index STOP
|
- python3 intermediary_script.py --start_index START --stop_index STOP
|
||||||
[6] the password handling is imperfect, so I would appreciate if you could check on the script every so often in case anything hangs
|
[5] the password handling is imperfect, so I would appreciate if you could check on the script every so often in case anything hangs
|
||||||
|
|
||||||
THANK YOU VERY MUCH - matt
|
THANK YOU VERY MUCH - matt
|
||||||
'''
|
'''
|
||||||
@ -209,7 +210,7 @@ def for_all_files(start_index, stop_index):
|
|||||||
index=False,
|
index=False,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
clone_error.append([row[5], e])
|
clone_error.append([row[5], str(e)])
|
||||||
print(f"outside cloning error: {e}")
|
print(f"outside cloning error: {e}")
|
||||||
finally:
|
finally:
|
||||||
und_repo_id = ""
|
und_repo_id = ""
|
||||||
@ -224,7 +225,7 @@ def for_all_files(start_index, stop_index):
|
|||||||
print(clone_error)
|
print(clone_error)
|
||||||
with open(f"{stop_index}-clone-error-output.txt", "w") as txt_file:
|
with open(f"{stop_index}-clone-error-output.txt", "w") as txt_file:
|
||||||
for error in clone_error:
|
for error in clone_error:
|
||||||
txt_file.write(error + "\n")
|
txt_file.write(', '.join(error) + "\n")
|
||||||
with open(f"{stop_index}-success-output.txt", "w") as txt_file:
|
with open(f"{stop_index}-success-output.txt", "w") as txt_file:
|
||||||
txt_file.write(f"Number of Empty Rows: {empty_row} \n")
|
txt_file.write(f"Number of Empty Rows: {empty_row} \n")
|
||||||
txt_file.write(f"Number of Cloning Errors: {len(clone_error)} \n")
|
txt_file.write(f"Number of Cloning Errors: {len(clone_error)} \n")
|
||||||
|
3
12825_revision/for_batching/requirements.txt
Normal file
3
12825_revision/for_batching/requirements.txt
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
GitPython==3.1.40
|
||||||
|
pandas==2.1.2
|
||||||
|
tqdm==4.66.1
|
@ -132,7 +132,7 @@ def diff_analysis(diffs):
|
|||||||
|
|
||||||
def for_all_files(start_index, stop_index):
|
def for_all_files(start_index, stop_index):
|
||||||
cwd = os.getcwd()
|
cwd = os.getcwd()
|
||||||
csv_path = "../final_data/deb_full_data.csv"
|
csv_path = "for_batching/deb_full_data.csv"
|
||||||
index = -1
|
index = -1
|
||||||
saved = []
|
saved = []
|
||||||
empty_row = 0
|
empty_row = 0
|
||||||
@ -209,7 +209,7 @@ def for_all_files(start_index, stop_index):
|
|||||||
index=False,
|
index=False,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
clone_error.append([row[5], e])
|
clone_error.append([row[5], str(e)])
|
||||||
print(f"outside cloning error: {e}")
|
print(f"outside cloning error: {e}")
|
||||||
finally:
|
finally:
|
||||||
und_repo_id = ""
|
und_repo_id = ""
|
||||||
@ -224,7 +224,7 @@ def for_all_files(start_index, stop_index):
|
|||||||
print(clone_error)
|
print(clone_error)
|
||||||
with open(f"{stop_index}-clone-error-output.txt", "w") as txt_file:
|
with open(f"{stop_index}-clone-error-output.txt", "w") as txt_file:
|
||||||
for error in clone_error:
|
for error in clone_error:
|
||||||
txt_file.write(error + "\n")
|
txt_file.write(', '.join(error) + "\n")
|
||||||
with open(f"{stop_index}-success-output.txt", "w") as txt_file:
|
with open(f"{stop_index}-success-output.txt", "w") as txt_file:
|
||||||
txt_file.write(f"Number of Empty Rows: {empty_row} \n")
|
txt_file.write(f"Number of Empty Rows: {empty_row} \n")
|
||||||
txt_file.write(f"Number of Cloning Errors: {len(clone_error)} \n")
|
txt_file.write(f"Number of Cloning Errors: {len(clone_error)} \n")
|
||||||
|
Loading…
Reference in New Issue
Block a user