Advertisement
homer512

proper tail py

Jun 17th, 2014
193
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.54 KB | None | 0 0
  1. #!/usr/bin/python2
  2.  
  3.  
  4. """Mimics UNIX tail, especially its behavior when dealing with large files
  5.  
  6. The I/O operations have nearly the same pattern as tail
  7. """
  8.  
  9. import collections
  10. import sys
  11. import errno
  12.  
  13.  
  14. def tail_unseekable(filein, count):
  15.     """Fallback for special files like pipes or terminals
  16.  
  17.    Arguments:
  18.    filein -- file-like object opened for reading
  19.    count -- number of lines to be written to stdout
  20.    """
  21.     sys.stdout.writelines(collections.deque(filein, count))
  22.  
  23.  
  24. def read_blocks(filein, positions, endpos):
  25.     """Yields strings of 8192 bytes length from filein
  26.  
  27.    Arguments:
  28.    filein -- seekable file opened for reading
  29.    positions -- iterable of seek positions from which to read
  30.    endpos -- a position beyond which shall not be read
  31.    """
  32.     blocks = ((blockpos, min(8192, endpos - blockpos)) for blockpos
  33.               in positions)
  34.     for blockpos, blocklen in blocks:
  35.         filein.seek(blockpos)
  36.         yield filein.read(blocklen)
  37.  
  38.  
  39. def count_lines(filein, count, endpos):
  40.     """Reads the given file backwards, stopping after counting count lnes
  41.  
  42.    Arguments:
  43.    filein -- seekable file opened for reading. After returning, the file
  44.              position will be right after the returned data block
  45.    count -- number of lines to find
  46.    endpos -- a position beyond which shall not be read
  47.  
  48.    Returns:
  49.    (linecount, blockdata)
  50.    linecount -- the number of lines actually counted. This can be higher than
  51.                 count or lower (when the file is too short)
  52.    blockdata -- the last block read from file, included in linecount
  53.    """
  54.     linecount = 0
  55.     blockdata = None
  56.     blockpositions = reversed(xrange(0, endpos, 8192))
  57.     for blockdata in read_blocks(filein, blockpositions, endpos):
  58.         blocklines = blockdata.count('\n')
  59.         linecount += blocklines
  60.         if linecount >= count:
  61.             break
  62.     return (linecount, blockdata)
  63.  
  64.  
  65. def print_partial(blockdata, actual_count, wanted_count):
  66.     """Prints the tail of blockdata to stdout, skipping superfluous lines
  67.  
  68.    Arguments:
  69.    blockdata -- a string containing lines
  70.    actual_count -- number of lines in blockdata (and beyond)
  71.    wanted_count -- number of lines that shall be written
  72.    """
  73.     toomuch = actual_count - wanted_count
  74.     startpos = 0
  75.     for _ in xrange(toomuch):
  76.         startpos = blockdata.index('\n', startpos) + 1
  77.     sys.stdout.write(buffer(blockdata, startpos))
  78.  
  79.  
  80. def print_to_pos(filein, endpos):
  81.     """Prints filein to stdout from the current position to endpos"""
  82.     blockpos = filein.tell()
  83.     wholeblocks, remainder = divmod(endpos - blockpos, 8192)
  84.     blocklens = [8192] * wholeblocks
  85.     blocklens.append(remainder)
  86.     blocks = (filein.read(blocklen) for blocklen in blocklens)
  87.     for block in blocks:
  88.         sys.stdout.write(block)
  89.     # sys.stdout.writelines(blocks) # works, too, but buffers output
  90.  
  91.  
  92. def tail(filein, count):
  93.     """Writes the last count lines from filein to stdout"""
  94.     try:
  95.         filein.seek(0, 2)
  96.     except IOError as err:
  97.         if err.errno == errno.ESPIPE:
  98.             tail_unseekable(filein, count)
  99.             return
  100.         else:
  101.             raise
  102.     endpos = filein.tell()
  103.     linecount, blockdata = count_lines(filein, count, endpos)
  104.     print_partial(blockdata, linecount, count)
  105.     print_to_pos(filein, endpos)
  106.  
  107.  
  108. def main():
  109.     filename = sys.argv[1]
  110.     count = 100
  111.     with open(filename, 'r', 8192) as fd:
  112.         tail(fd, count)
  113.     return
  114.  
  115.  
  116. if __name__ == '__main__':
  117.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement