1
0
mw-convo-collections/src/helper_scripts/decompression_script.py

39 lines
1.2 KiB
Python

import requests
import bz2
import shutil
import os
FILE_LOC_PREFIX = "/data/users/mgaughan/mw-repo-lifecycles/wiki_activity_data/yearly_activity_files/"
def decompress(filepath):
decompressed_filepath = filepath[:-4]
with bz2.BZ2File(filepath) as fr, open(decompressed_filepath,"wb") as fw:
shutil.copyfileobj(fr,fw)
print(f"Decompressed {decompressed_filepath}")
os.remove(filepath)
print(f"Deleted {filepath}")
def decompress_directory(directory_name):
# Traverse the directory
for root, dirs, files in os.walk(directory_name):
for file in files:
if file.endswith('.bz2'):
# Full path to the file
filepath = os.path.join(root, file)
print(filepath)
# Apply the decompress function
decompress(filepath)
def cleanup(directory_name):
for root, dirs, files in os.walk(directory_name):
for file in files:
if file.endswith('.bz2'):
filepath = os.path.join(root, file)
os.remove(filepath)
print(f"Deleted {filepath}")
if __name__ == "__main__":
#batch_parallel_for_single()
decompress_directory(FILE_LOC_PREFIX)
#cleanup(FILE_LOC_PREFIX)