import requests import bz2 import shutil import os import sys #FILE_LOC_PREFIX = "/data_ext/users/nws8519/mw-repo-lifecycles/wiki_activity_data/single_activity_files/" def decompress(filepath): decompressed_filepath = filepath[:-4] with bz2.BZ2File(filepath) as fr, open(decompressed_filepath,"wb") as fw: shutil.copyfileobj(fr,fw) print(f"Decompressed {decompressed_filepath}") os.remove(filepath) print(f"Deleted {filepath}") def decompress_directory(directory_name): # Traverse the directory OSErrors = 0 for root, dirs, files in os.walk(directory_name): for file in files: if file.endswith('.bz2'): # Full path to the file filepath = os.path.join(root, file) print(filepath) # Apply the decompress function try: decompress(filepath) except OSError: OSErrors += 1 print(f"OSError @ {filepath}") os.remove(filepath) print(f"Deleted {filepath}") return OSErrors def cleanup(directory_name): for root, dirs, files in os.walk(directory_name): for file in files: if file.endswith('.bz2'): filepath = os.path.join(root, file) os.remove(filepath) print(f"Deleted {filepath}") if __name__ == "__main__": #batch_parallel_for_single() decompression_errors = decompress_directory(sys.argv[1]) print(f"We had {decompression_errors} OSErrors during decompression.") #cleanup(FILE_LOC_PREFIX)