updated scripts
This commit is contained in:
		
							parent
							
								
									a342c2a67f
								
							
						
					
					
						commit
						c57b760d5c
					
				| @ -26,16 +26,23 @@ def get_file(vcs_link, commit_hash, is_readme): | ||||
|     repo = Repo.clone_from(vcs_link, full_temp_path) | ||||
|     commit = repo.commit(commit_hash) | ||||
|     #getting the name of the file from the root directory | ||||
|     target_filename = "" | ||||
|     for filename in os.listdir(full_temp_path): | ||||
|         if is_readme: | ||||
|             target_filename = "README.md" | ||||
|             #target_filename = "README.md" | ||||
|             if "README" in filename: | ||||
|                 target_filename = filename | ||||
|         else: | ||||
|             target_filename = "CONTRIBUTING.md" | ||||
|             #target_filename = "CONTRIBUTING.md" | ||||
|             if "CONTRIBUTING" in filename: | ||||
|                 target_filename = filename | ||||
|     targetfile = commit.tree / target_filename | ||||
|     if target_filename == "": | ||||
|         return "NoFile" | ||||
|     try: | ||||
|         targetfile = commit.tree / target_filename | ||||
|     except KeyError: | ||||
|         # why would a file not be in the commit tree? but would be in the directory? | ||||
|         return "KeyError" | ||||
|     if is_readme: | ||||
|         last_path = "readme" | ||||
|     else: | ||||
| @ -45,6 +52,7 @@ def get_file(vcs_link, commit_hash, is_readme): | ||||
|             file.write(f.read().decode('utf-8')) | ||||
|         file.close() | ||||
|     shutil.rmtree(full_temp_path, ignore_errors=True) | ||||
|     return "NoError" | ||||
| 
 | ||||
| def for_all_files(): | ||||
|     #toggle this based on readme or contributing files | ||||
| @ -52,17 +60,20 @@ def for_all_files(): | ||||
|     csv_path = "kk_031624_pr_did.csv" | ||||
|     index = -1 | ||||
|     with open(csv_path, 'r') as file: | ||||
|         #csv_reader = csv.DictReader(file) | ||||
|         lines = [line for line in file] | ||||
|         for row in tqdm(csv.reader(lines), total=len(lines)): | ||||
|             index += 1 | ||||
|             if index == 0: | ||||
|                 continue | ||||
|             if row[0] == "": | ||||
|                 continue | ||||
|             #print(row[0]) | ||||
|             get_file(row[0], row[2], readme_is) | ||||
|             #get_file('https://github.com/tqdm/tqdm', 'fbe7952cce11e8073378b063bdae7ab277a96eb8', True) | ||||
|         with open('031824_spec_errors.csv', "w") as writing_file: | ||||
|             #csv_reader = csv.DictReader(file) | ||||
|             lines = [line for line in file] | ||||
|             for row in tqdm(csv.reader(lines), total=len(lines)): | ||||
|                 index += 1 | ||||
|                 if index == 0: | ||||
|                     continue | ||||
|                 if row[0] == "": | ||||
|                     continue | ||||
|                 #print(row[0]) | ||||
|                 return_value = get_file(row[0], row[2], readme_is) | ||||
|                 if return_value != "NoError": | ||||
|                     writing_file.write(row[0], row[2], readme_is, return_value) | ||||
|                 #get_file('https://github.com/tqdm/tqdm', 'fbe7952cce11e8073378b063bdae7ab277a96eb8', True) | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     for_all_files() | ||||
|  | ||||
| @ -34,7 +34,7 @@ def file_get_pr(upstream_vcs_link, me_read): | ||||
|     else: | ||||
|         full_temp_path = temp_dir + upstream_vcs_link.split('/')[- 1] + ".git" | ||||
|     print(upstream_vcs_link) | ||||
|     if upstream_vcs_link == "https://gitlab.com/ubports/core": | ||||
|     if upstream_vcs_link == "https://gitlab.com/ubports/core" or upstream_vcs_link == "https://gitlab.freedesktop.org/xorg/lib": | ||||
|         shutil.rmtree(full_temp_path, ignore_errors=True) | ||||
|         return {} | ||||
|     repo = Git(uri=upstream_vcs_link, gitpath=full_temp_path) | ||||
| @ -145,11 +145,12 @@ def pr_count(start, end, commits, author_roster, commit_roster): | ||||
|             return [by_week, by_week_merge, new_authors, new_committers, author_roster, commit_roster] | ||||
| 
 | ||||
| def for_files(): | ||||
|     #csv_path = "final_data/deb_contribfile_roster.csv" | ||||
|     csv_path = "final_data/deb_readme_roster.csv" | ||||
|     count = 0  | ||||
|     with open(csv_path, 'r') as file: | ||||
|         csv_reader = csv.DictReader(file) | ||||
|         with open('kk_test_031624_pr_did.csv', "w") as writing_file: | ||||
|         with open('kk_031624_pr_did.csv', "w") as writing_file: | ||||
|             # this would also have to get switched fro the cont dataset | ||||
|             keys = ['upstream_vcs_link', "first_readme", "readme_commit_hash", "before_allcom_read", "before_mrg_read", "after_allcom_read", "after_mrg_read", 'before_auth_new', 'after_commit_new', 'after_auth_new', 'before_commit_new'] | ||||
|             dict_writer = csv.DictWriter(writing_file, keys) | ||||
| @ -158,7 +159,10 @@ def for_files(): | ||||
|                 count += 1 | ||||
|                 print(row['upstream_vcs_link']) | ||||
|                 # this would have to get switched to false for the cont dataset | ||||
|                 dict_row = file_get_pr(row['upstream_vcs_link'].strip(), True) | ||||
|                 try: | ||||
|                     dict_row = file_get_pr(row['upstream_vcs_link'].strip(), True) | ||||
|                 except: | ||||
|                     dict_row = {} | ||||
|                 dict_writer.writerow(dict_row) | ||||
|      | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user