Advertisement
indraginanjar

defrag.py - 'don't remember the original name/source

Sep 11th, 2011
64
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 18.10 KB | None | 0 0
  1. #!/usr/bin/env python
  2. from __future__ import division
  3. import sys, os, re
  4. import getopt
  5. import sys
  6. import time
  7. import os
  8. import subprocess
  9. from collections import deque
  10.  
  11. try:
  12.     import psyco
  13.     psyco.full()
  14. except: pass
  15. ext_pattern=re.compile(r'.*:\s+(\d+) extents? found')
  16. def _supports_progress(f):
  17.     if not hasattr(f, 'isatty'):
  18.         return False
  19.     if not f.isatty():
  20.         return False
  21.     if os.environ.get('TERM') == 'dumb':
  22.         # e.g. emacs compile window
  23.         return False
  24.     return True
  25.  
  26. def ProgressBar(to_file=sys.stderr, **kwargs):
  27.     """Abstract factory"""
  28.     if _supports_progress(to_file):
  29.         return TTYProgressBar(to_file=to_file, **kwargs)
  30.     else:
  31.         return DotsProgressBar(to_file=to_file, **kwargs)
  32.  
  33. class _BaseProgressBar(object):
  34.     def __init__(self,
  35.                  to_file=sys.stderr,
  36.                  show_pct=False,
  37.                  show_spinner=False,
  38.                  show_bar=True,
  39.                  show_count=True,
  40.                  to_messages_file=sys.stdout,
  41.                  _stack=None):
  42.         object.__init__(self)
  43.         self.to_file = to_file
  44.         self.to_messages_file = to_messages_file
  45.         self.last_msg = None
  46.         self.last_cnt = None
  47.         self.last_total = None
  48.         self.show_pct = show_pct
  49.         self.show_spinner = show_spinner
  50.         self.show_bar = show_bar
  51.         self.show_count = show_count
  52.         self._stack = _stack
  53.         # seed throttler
  54.         self.MIN_PAUSE = 0.5 # seconds
  55.         now = time.time()
  56.         # starting now
  57.         self.start_time = now
  58.         # next update should not throttle
  59.         self.last_update = now - self.MIN_PAUSE - 1
  60.  
  61.     def finished(self):
  62.         """Return this bar to its progress stack."""
  63.         self.clear()
  64.         assert self._stack is not None
  65.         self._stack.return_pb(self)
  66.  
  67.     def note(self, fmt_string, *args, **kwargs):
  68.         """Record a note without disrupting the progress bar."""
  69.         self.clear()
  70.         self.to_messages_file.write(fmt_string % args)
  71.         self.to_messages_file.write('\n')
  72.  
  73.     def child_progress(self, **kwargs):
  74.         return ChildProgress(**kwargs)
  75.  
  76. class DotsProgressBar(_BaseProgressBar):
  77.  
  78.     def __init__(self, **kwargs):
  79.         _BaseProgressBar.__init__(self, **kwargs)
  80.         self.last_msg = None
  81.         self.need_nl = False
  82.        
  83.     def tick(self):
  84.         self.update()
  85.        
  86.     def update(self, msg=None, current_cnt=None, total_cnt=None):
  87.         if msg and msg != self.last_msg:
  88.             if self.need_nl:
  89.                 self.to_file.write('\n')
  90.            
  91.             self.to_file.write(msg + ': ')
  92.             self.last_msg = msg
  93.         self.need_nl = True
  94.         self.to_file.write('.')
  95.        
  96.     def clear(self):
  97.         if self.need_nl:
  98.             self.to_file.write('\n')
  99.        
  100.     def child_update(self, message, current, total):
  101.         self.tick()
  102.    
  103. class TTYProgressBar(_BaseProgressBar):
  104.     """Progress bar display object.
  105.  
  106.    Several options are available to control the display.  These can
  107.    be passed as parameters to the constructor or assigned at any time:
  108.  
  109.    show_pct
  110.        Show percentage complete.
  111.    show_spinner
  112.        Show rotating baton.  This ticks over on every update even
  113.        if the values don't change.
  114.    show_eta
  115.        Show predicted time-to-completion.
  116.    show_bar
  117.        Show bar graph.
  118.    show_count
  119.        Show numerical counts.
  120.  
  121.    The output file should be in line-buffered or unbuffered mode.
  122.    """
  123.     SPIN_CHARS = r'/-\|'
  124.  
  125.  
  126.     def __init__(self, **kwargs):
  127.         #from bzrlib.osutils import terminal_width
  128.         #TODO: Determine terminal width
  129.         _BaseProgressBar.__init__(self, **kwargs)
  130.         self.spin_pos = 0
  131.         self.width = 80
  132.         self.start_time = time.time()
  133.         self.last_updates = deque()
  134.         self.child_fraction = 0
  135.    
  136.  
  137.     def throttle(self):
  138.         """Return True if the bar was updated too recently"""
  139.         # time.time consistently takes 40/4000 ms = 0.01 ms.
  140.         # but every single update to the pb invokes it.
  141.         # so we use time.time which takes 20/4000 ms = 0.005ms
  142.         # on the downside, time.time() appears to have approximately
  143.         # 10ms granularity, so we treat a zero-time change as 'throttled.'
  144.        
  145.         now = time.time()
  146.         interval = now - self.last_update
  147.         # if interval > 0
  148.         if interval < self.MIN_PAUSE:
  149.             return True
  150.  
  151.         self.last_updates.append(now - self.last_update)
  152.         self.last_update = now
  153.         return False
  154.        
  155.  
  156.     def tick(self):
  157.         self.update(self.last_msg, self.last_cnt, self.last_total,
  158.                     self.child_fraction)
  159.  
  160.     def child_update(self, message, current, total):
  161.         if current is not None and total != 0:
  162.             child_fraction = float(current) / total
  163.             if self.last_cnt is None:
  164.                 pass
  165.             elif self.last_cnt + child_fraction <= self.last_total:
  166.                 self.child_fraction = child_fraction
  167.         if self.last_msg is None:
  168.             self.last_msg = ''
  169.         self.tick()
  170.  
  171.  
  172.     def update(self, msg, current_cnt=None, total_cnt=None,
  173.                child_fraction=0):
  174.         """Update and redraw progress bar."""
  175.  
  176.         if current_cnt < 0:
  177.             current_cnt = 0
  178.            
  179.         if current_cnt > total_cnt:
  180.             total_cnt = current_cnt
  181.        
  182.  
  183.         old_msg = self.last_msg
  184.         # save these for the tick() function
  185.         self.last_msg = msg
  186.         self.last_cnt = current_cnt
  187.         self.last_total = total_cnt
  188.         self.child_fraction = child_fraction
  189.  
  190.         # each function call takes 20ms/4000 = 0.005 ms,
  191.         # but multiple that by 4000 calls -> starts to cost.
  192.         # so anything to make this function call faster
  193.         # will improve base 'diff' time by up to 0.1 seconds.
  194.         if old_msg == self.last_msg and self.throttle():
  195.             return
  196.  
  197.         if self.show_spinner:
  198.             spin_str = self.SPIN_CHARS[self.spin_pos % 4] + ' '            
  199.         else:
  200.             spin_str = ''
  201.  
  202.         # always update this; it's also used for the bar
  203.         self.spin_pos += 1
  204.  
  205.         if self.show_pct and self.last_total and self.last_cnt:
  206.             pct = 100.0 * ((self.last_cnt + self.child_fraction) / self.last_total)
  207.             pct_str = ' (%5.1f%%)' % pct
  208.         else:
  209.             pct_str = ''
  210.  
  211.         if not self.show_count:
  212.             count_str = ''
  213.         elif self.last_cnt is None:
  214.             count_str = ''
  215.         elif self.last_total is None:
  216.             count_str = ' %i' % (self.last_cnt)
  217.         else:
  218.             # make both fields the same size
  219.             t = '%i' % (self.last_total)
  220.             c = '%*i' % (len(t), self.last_cnt)
  221.             count_str = ' ' + c + '/' + t
  222.  
  223.         if self.show_bar:
  224.             # progress bar, if present, soaks up all remaining space
  225.             cols = self.width - 1 - len(self.last_msg) - len(spin_str) - len(pct_str) \
  226.                    - len(count_str) - 3
  227.  
  228.             if self.last_total:
  229.                 # number of markers highlighted in bar
  230.                 markers = int(round(float(cols) *
  231.                               (self.last_cnt + self.child_fraction) / self.last_total))
  232.                 bar_str = '[' + ('=' * markers).ljust(cols) + '] '
  233.             elif False:
  234.                 # don't know total, so can't show completion.
  235.                 # so just show an expanded spinning thingy
  236.                 m = self.spin_pos % cols
  237.                 ms = (' ' * m + '*').ljust(cols)
  238.                
  239.                 bar_str = '[' + ms + '] '
  240.             else:
  241.                 bar_str = ''
  242.         else:
  243.             bar_str = ''
  244.  
  245.         m = spin_str + self.last_msg + bar_str + count_str + pct_str
  246.  
  247.         assert len(m) < self.width
  248.         self.to_file.write('\r' + m.ljust(self.width - 1))
  249.         #self.to_file.flush()
  250.            
  251.     def clear(self):        
  252.         self.to_file.write('\r%s\r' % (' ' * (self.width - 1)))
  253.         #self.to_file.flush()        
  254.  
  255.  
  256. class ChildProgress(_BaseProgressBar):
  257.     """A progress indicator that pushes its data to the parent"""
  258.     def __init__(self, _stack, **kwargs):
  259.         _BaseProgressBar.__init__(self, _stack=_stack, **kwargs)
  260.         self.parent = _stack.top()
  261.         self.current = None
  262.         self.total = None
  263.         self.child_fraction = 0
  264.         self.message = None
  265.  
  266.     def update(self, msg, current_cnt=None, total_cnt=None):
  267.         self.current = current_cnt
  268.         self.total = total_cnt
  269.         self.message = msg
  270.         self.child_fraction = 0
  271.         self.tick()
  272.  
  273.     def child_update(self, message, current, total):
  274.         if current is None or total == 0:
  275.             self.child_fraction = 0
  276.         else:
  277.             self.child_fraction = float(current) / total
  278.         self.tick()
  279.  
  280.     def tick(self):
  281.         if self.current is None:
  282.             count = None
  283.         else:
  284.             count = self.current+self.child_fraction
  285.             if count > self.total:
  286.                 count = self.total
  287.         self.parent.child_update(self.message, count, self.total)
  288.  
  289.     def clear(self):
  290.         pass
  291.  
  292.     def note(self, *args, **kwargs):
  293.         self.parent.note(*args, **kwargs)
  294.  
  295.  
  296. def str_tdelta(delt):
  297.     if delt is None:
  298.         return "-:--:--"
  299.     delt = int(round(delt))
  300.     return '%d:%02d:%02d' % (delt/3600,
  301.                              (delt/60) % 60,
  302.                              delt % 60)
  303.  
  304. def sort_by_value(d):
  305.     """ Returns the keys of dictionary d sorted by their values """
  306.     items=d.items()
  307.     backitems=[ [v[1],v[0]] for v in items]
  308.     backitems.sort()
  309.     backitems.reverse()
  310.     return [ backitems[i][1] for i in xrange(0,len(backitems))]
  311.    
  312. def copy(args):
  313.     progress=subprocess.Popen(args,stdout=subprocess.PIPE,stderr=subprocess.PIPE).stdout
  314.     c=progress.readline()
  315.     c=progress.readline()
  316.     c=progress.readline()
  317.     while c != '':
  318.         c=progress.read(1)
  319.         if c =='\n':
  320.             progress.read()
  321.             sys.stdout.write(' '*80+'\r')
  322.             return
  323.         sys.stdout.write(c)
  324.         if c == '\r':
  325.             sys.stdout.write("     ")
  326.             sys.stdout.flush()
  327.    
  328. def defrag(file, fs, fragments, threshold):
  329.     #Performs a defrag
  330.     if fragments<= threshold:
  331.         print "     Already defragmented."
  332.         return fragments
  333.     status=subprocess.Popen(['lsof','-f','--',file], stdout=subprocess.PIPE).communicate()[0]
  334.     if len(status)>1 and not status.startswith("COMMAND"):
  335.         print "     Unable to determine if file is open, skipping..."
  336.         return fragments
  337.        
  338.     if len(status)>1 and status.split('\n')[1].split()[3].find('w')>=0:
  339.         print "     File is open for write, skipping..."
  340.         return fragments
  341.     import shutil
  342.     try:
  343.         os.mkdir(os.path.dirname(fs)+"/.defrag")
  344.     except OSError: pass
  345.     os.chmod(os.path.dirname(fs)+"/.defrag",0)
  346.     old_mtime=os.path.getmtime(file)
  347.     copy(["rsync",'-a','--progress', file,os.path.dirname(fs)+"/.defrag/"])
  348.     new_numfrags=numfrags(os.path.dirname(fs)+"/.defrag/"+os.path.basename(file))
  349.     if new_numfrags >= fragments:
  350.         print "     No improvement (%.1f --> %.1f)" % (fragments,new_numfrags)
  351.         os.unlink(os.path.dirname(fs)+"/.defrag/"+os.path.basename(file))
  352.         return fragments
  353.     else:
  354.         if new_numfrags <= threshold:
  355.             print "     Fully defragmented!"
  356.         else:
  357.             print "     Improved: (%.1f --> %.1f)" % (fragments,new_numfrags)
  358.  
  359.         if os.path.getmtime(file) == old_mtime:
  360.             shutil.move(os.path.dirname(fs)+"/.defrag/"+os.path.basename(file), file)
  361.             return new_numfrags
  362.         else:
  363.             print "     Aborted: file changed during defrag."
  364.             return numfrags(file)
  365.  
  366. def build_filelist(fs):
  367.     #Uses find to get list of files on desired filesystem
  368.     list=subprocess.Popen(["/usr/bin/find",fs,'-xdev','-type','f', '-print0'],stdout=subprocess.PIPE).communicate()[0].split('\0')
  369.     return list[:-1]
  370. def numfrags(file):
  371.     #Uses filefrag to determine # of fragments.
  372.     fragresult=subprocess.Popen(["filefrag",file],stdout=subprocess.PIPE).communicate()[0]
  373.     match=ext_pattern.search(fragresult)
  374.     size = os.path.getsize(file)
  375.     if not match:
  376.         print "Error analyzing",file
  377.         return 0
  378.     else:
  379.         frags=int(match.expand(r'\1'))
  380.         if frags <= 1 or size == 0:
  381.             return 0
  382.     return (frags/(size/unit))
  383.  
  384.    
  385. def run(path,threshold=0.5,passes=-1,list=None, fragmatrix=None):
  386.     if not fragmatrix:
  387.         print "Building list of files to analyze...",
  388.         sys.stdout.flush()
  389.         list=build_filelist(path)
  390.         print "done!"
  391.         fragmatrix={}
  392.         n=0
  393.         pb = ProgressBar(show_pct=True, show_bar=True, show_spinner=True)
  394.         for file in list:
  395.             pb.update((file+" "*25)[:25]+" ",float(n),len(list))
  396.             n+=1
  397.             f=numfrags(file)
  398.             if f> threshold: fragmatrix[file]=f
  399.         pb.clear()
  400.         print "\nAnalyze finished.",
  401.         if len(fragmatrix) == 0:
  402.             print "\n%s is not fragmented. Go home." % path
  403.             sys.exit(0)
  404.     most_fragmented_files=sort_by_value(fragmatrix)
  405.     frags=sum(fragmatrix.values())
  406.     print "%.1f" % (len(most_fragmented_files)*100.0/len(list)),"%% fragmentation (%d files)," % len(most_fragmented_files),  "%.1f average frags" % (float(frags)/len(fragmatrix))
  407.     print "Fragmented files:"
  408.     try:
  409.         for i in xrange(0,10):
  410.             print "%.2f\t%s" % (fragmatrix[most_fragmented_files[i]], most_fragmented_files[i])
  411.         print "..."
  412.     except IndexError: pass
  413.     if passes <0:
  414.         while True:
  415.             try:
  416.                 tmp=raw_input("How many passes to run? [10] ")
  417.                 if len(tmp)==0:
  418.                     passes=-10
  419.                     break
  420.                 passes=int(tmp)*abs(passes)/passes
  421.                 break
  422.             except ValueError: pass
  423.     try:    
  424.         for k in xrange(0,abs(passes)):
  425.             n=0
  426.             rfrags=0
  427.             t=0
  428.             for t in most_fragmented_files:
  429.                 rfrags+=fragmatrix[t]
  430.             print "\n",(str(k+1))*60
  431.             print "===> Pass", k+1,"of", abs(passes),"<===    Remaining Fragmentation %d/%d (%d%%)" % (rfrags,frags,(rfrags/frags)*100)
  432.             print (str(k+1))*60+"\n"
  433.             for i in most_fragmented_files:
  434.                 n+=1
  435.                 print "\n  Pass %d of %d, %d/%d (%d%%):" % (k+1,abs(passes),n,len(most_fragmented_files),100*n/len(most_fragmented_files)), "%.1f frags" % fragmatrix[i], i
  436.                 try:
  437.                     fragmatrix[i]=defrag(i,path,fragmatrix[i],threshold)
  438.                 except OSError:
  439.                     print "     Error defragmenting. Did the file disappear?"
  440.             tmp=most_fragmented_files[:]
  441.             for i in tmp:
  442.             if fragmatrix[i] <= threshold:
  443.                     most_fragmented_files.remove(i)
  444.  
  445.     finally:
  446.        
  447.         print "========= COMPLETE ==========="
  448.         print "Remaining Fragmented Files:"
  449.         try:
  450.             most_fragmented_files.reverse()
  451.             for i in most_fragmented_files:
  452.                 print "%.2f\t%s" % (fragmatrix[i], i)
  453.         except IndexError: pass
  454.         print "Frags/unit Before:\t %.2f" % frags
  455.         nowfrags=0
  456.         for i in most_fragmented_files:
  457.             nowfrags+=fragmatrix[i]
  458.         print "Frags/unit After: \t %.2f" % nowfrags
  459.         print "Improvement:    \t %.1f %%" % abs((frags-nowfrags)/frags * 100)
  460.         print "==============================="
  461.         if nowfrags and passes < 0:
  462.             response='Fish'
  463.             while not response in ('Y','y','N','n',''):
  464.                 response=raw_input("\nThere is still fragmentation. Run another set of passes? [Y/n]")
  465.             if response in ('Y','y',''):
  466.                 run(path,threshold,passes,list,fragmatrix)
  467.  
  468. def fillunits():
  469.     udict={}
  470.     n = 1
  471.     ni = 1
  472.     for key in ('B','K','M','G','T','P','E','Z','Y'):
  473.     udict[key]=n
  474.     udict[key+'B']=n
  475.     udict[key+'I']=ni
  476.     udict[key+'IB']=ni
  477.     n *= 1000
  478.     ni *= 1024
  479.     return udict    
  480.  
  481. try:
  482.     opts=None
  483.     try:
  484.         opts=getopt.gnu_getopt(sys.argv[1:],'hn:t:a:u',('help', 'passes=', 'threshold=' ,'analyze','unit='))
  485.         passes = -1
  486.         threshold = .5
  487.         unit = 1024*1024
  488.         unitstr='MiB'
  489.         unitdict=fillunits()
  490.         for i in opts[0]:
  491.             if   i[0] in ('-n','--passes'):
  492.                 passes=int(i[1])
  493.             elif i[0] in ('-h', '--help'):
  494.                 raise getopt.GetoptError("")
  495.             elif i[0] in ('-t','--threshold'):
  496.         try:
  497.             threshold=float(i[1])
  498.         except:
  499.             raise getopt.GetoptError("Threshold must be a float")
  500.             elif i[0] in ('-a','--analyze'):
  501.                 passes=0
  502.                 threshold=0
  503.             elif i[0] in ('-u','--unit'):
  504.         if i[1].isdigit(): unit = int(i[1])
  505.         elif i[1].upper() in unitdict.keys():
  506.             unit = unitdict[i[1].upper()]
  507.             unitstr=i[1]
  508.         else: raise getopt.GetoptError("Unit may be a positive integer or a unit (B KB MB GB TB PB EB ZB YB):\n with K,KB=1000, KI=KIB=1024 ,etc")
  509.     del unitdict    
  510.     if os.getuid():
  511.         if passes == 0:
  512.         print "This usually requires root access, but will try anyway"
  513.         else:
  514.         raise getopt.GetoptError("Everything bu analysis always requires root/sudo access.")
  515.         try:
  516.             if os.path.isdir(opts[1][0]) and opts[1][0][-1] != '/':
  517.                 opts[1][0]+="/"
  518.             run(opts[1][0], threshold, passes)
  519.         except IndexError:
  520.             raise getopt.GetoptError("No path specified")  
  521.     except getopt.GetoptError, info:
  522.         print info
  523.         print "%s [-h] [-n passes] [--passes n] [-t threshold] [--threshold n] [-a] [--analyze] [-u unit] [--unit u] [--help] path"% sys.argv[0]
  524. finally:
  525.     if opts and len(opts[1]) and os.path.isdir(os.path.dirname(opts[1][0])+"/.defrag"):
  526.         subprocess.Popen(['rm','-f','-r',os.path.dirname(opts[1][0])+"/.defrag"], stdout=subprocess.PIPE).communicate()[0]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement