Advertisement
Guest User

File extension finder with Human Readable

a guest
Oct 17th, 2019
184
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.79 KB | None | 0 0
  1. #!/usr/bin/python2.7
  2.  
  3. import os, sys
  4. from stat import *
  5.  
  6. # This is a quick program to solve a problem... recursively walk down a file tree accounting the files that are found. Display the totals at the end.
  7.  
  8. MAX_EXT_LENGTH = 5
  9.  
  10. class file_extension(object):
  11.     count = 0
  12.     size = 0 # total size
  13.  
  14.  
  15. class file_sorter(object):
  16.     def __init__(self, top_location):
  17.         self.top_location = top_location
  18.         self.file_ext = {}
  19.         self.failed_count = 0
  20.  
  21.     def count_extensions(self):
  22.         """
  23.        This function recursively walks the directory looking for files. When it finds one
  24.        it checks to see if we already have seen this extension, if not it adds an entry to the dictionary
  25.        then it adds one to the count and gets the size
  26.        """
  27.         for root, dirs, files in os.walk(self.top_location):
  28.             for name in files:
  29.                 full_path = os.path.join(root, name)
  30.                 #print full_path
  31.                 extension = name.split('.')[-1].lower() # we just want the last bit and let's lower case it so .MP3 and .mp3 are the same time.
  32.                 if len(extension) > MAX_EXT_LENGTH: # bail on weird super long extensions
  33.                     continue
  34.                 #print extension
  35.                 if not extension: # what if you have a file without an extension?
  36.                     continue
  37.                 if not self.file_ext.has_key(extension):
  38.                     self.file_ext[extension] = file_extension()
  39.                     self.file_ext[extension].extension = extension
  40.                
  41.                 try:
  42.                     size = os.stat(full_path).st_size
  43.                 except:
  44.                     #print "Failed on file:", full_path
  45.                     self.failed_count += 1
  46.                     continue
  47.                 self.file_ext
  48.                 self.file_ext[extension].count += 1
  49.                 self.file_ext[extension].size += size # this is in bytes
  50.    
  51.     def print_results(self):
  52.         # This just prints sorting by key..
  53.         #for key in sorted(self.file_ext.keys()):
  54.         #    print "%-10s\t%d\t%d" % (key, self.file_ext[key].count, self.file_ext[key].size)
  55.         """
  56.        This function reverse sorts the dictionary using the size as a lamda key
  57.        change size to count to get the counts
  58.        """
  59.         print "Extension\tCount\tSize"
  60.         for key in sorted(self.file_ext, key=lambda i: int(self.file_ext[i].size), reverse = True):
  61.             print "%-10s\t%d\t%s" % (key, self.file_ext[key].count, self.human_size(self.file_ext[key].size))
  62.         print "Faile to scan %d files" % self.failed_count
  63.      
  64.     def human_size(self, size_bytes):
  65.         """
  66.        format a size in bytes into a 'human' file size, e.g. bytes, KB, MB, GB, TB, PB
  67.        Note that bytes/KB will be reported in whole numbers but MB and above will have greater precision
  68.        e.g. 1 byte, 43 bytes, 443 KB, 4.3 MB, 4.43 GB, etc
  69.        """
  70.         if size_bytes == 1:
  71.             # because I really hate unnecessary plurals
  72.             return "1 byte"
  73.  
  74.         suffixes_table = [('bytes',0),('KB',0),('MB',1),('GB',2),('TB',2), ('PB',2)]
  75.  
  76.         num = float(size_bytes)
  77.         for suffix, precision in suffixes_table:
  78.             if num < 1024.0:
  79.                 break
  80.             num /= 1024.0
  81.  
  82.         if precision == 0:
  83.             formatted_size = "%d" % num
  84.         else:
  85.             formatted_size = str(round(num, ndigits=precision))
  86.  
  87.         return "%s %s" % (formatted_size, suffix)  
  88.        
  89.        
  90.                    
  91.  
  92. def main(argv):
  93.     if len(argv) == 0:
  94.         print("Doh! You must enter a path to search!")
  95.         exit(-1)
  96.     x = file_sorter(argv[0])
  97.     x.count_extensions()
  98.     x.print_results()
  99.  
  100. if __name__ == '__main__':
  101.     main(sys.argv[1:])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement