39 lines
1.2 KiB
Python
39 lines
1.2 KiB
Python
import requests
|
|
import bz2
|
|
import shutil
|
|
import os
|
|
|
|
FILE_LOC_PREFIX = "/data/users/mgaughan/mw-repo-lifecycles/wiki_activity_data/yearly_activity_files/"
|
|
|
|
def decompress(filepath):
|
|
decompressed_filepath = filepath[:-4]
|
|
with bz2.BZ2File(filepath) as fr, open(decompressed_filepath,"wb") as fw:
|
|
shutil.copyfileobj(fr,fw)
|
|
print(f"Decompressed {decompressed_filepath}")
|
|
os.remove(filepath)
|
|
print(f"Deleted {filepath}")
|
|
|
|
def decompress_directory(directory_name):
|
|
# Traverse the directory
|
|
for root, dirs, files in os.walk(directory_name):
|
|
for file in files:
|
|
if file.endswith('.bz2'):
|
|
# Full path to the file
|
|
filepath = os.path.join(root, file)
|
|
print(filepath)
|
|
# Apply the decompress function
|
|
decompress(filepath)
|
|
|
|
def cleanup(directory_name):
|
|
for root, dirs, files in os.walk(directory_name):
|
|
for file in files:
|
|
if file.endswith('.bz2'):
|
|
filepath = os.path.join(root, file)
|
|
os.remove(filepath)
|
|
print(f"Deleted {filepath}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
#batch_parallel_for_single()
|
|
decompress_directory(FILE_LOC_PREFIX)
|
|
#cleanup(FILE_LOC_PREFIX) |