Eu achei que retornar o arquivo mais compactado fazia parte do escopo rsrs.
Demorei mais mas valeu a pena.
Segue o código.
import os
def GetBestCompression ():
possible_extensions = ['.bz2', '.deflate', '.gz', '.lz4', '.json', '.snappy']
output_dir = "/FileStore/tables/arquivos_curso/json_zip_types/"
subdirs_lst = [subdirs[1] for subdirs in dbutils.fs.ls(output_dir)]
compacted_files = {}
for subdir in subdirs_lst:
path = output_dir + subdir
files = dbutils.fs.ls(path)
for file in files:
if file.isFile():
file_name, file_extension = os.path.splitext(file.name)
file_path = file.path
file_size = file.size
if file_extension in possible_extensions:
compacted_files[file_name] = {'extension': file_extension, 'size': file_size}
smallest_file = min(compacted_files.items(), key=lambda x: x[1]['size'])
print('menor arquivo: {}'.format(smallest_file))