Advertisement
Guest User

Untitled

a guest
Feb 27th, 2015
804
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.24 KB | None | 0 0
  1. #!/usr/bin/env python
  2. #
  3. # Building a tar file chunk-by-chunk.
  4. #
  5. # taken from https://gist.github.com/chipx86/9598b1e4a9a1a7831054
  6. # This is a quick bit of sample code for streaming data to a tar file,
  7. # building it piece-by-piece. The tarfile is built on-the-fly and streamed
  8. # back out. This is useful for web applications that need to dynamically
  9. # build a tar file without swamping the server.
  10. import os
  11. import tarfile
  12. try:
  13.     from cStringIO import StringIO
  14. except ImportError:
  15.     from StringIO import StringIO
  16.  
  17.  
  18. out_filename = '~/file.tar'
  19. exclude_dirs = ['exclude_dir']
  20. top_dir = '~/tar_dir'
  21. exclude_files = ('.tar',)
  22.  
  23. BLOCK_SIZE = 4096
  24.  
  25.  
  26. class FileStream(object):
  27.     def __init__(self):
  28.         self.buffer = StringIO()
  29.         self.offset = 0
  30.  
  31.     def write(self, s):
  32.         self.buffer.write(s)
  33.         self.offset += len(s)
  34.  
  35.     def close(self):
  36.         self.buffer.close()
  37.  
  38.     def tell(self):
  39.         return self.offset
  40.  
  41.     def pop(self):
  42.         s = self.buffer.getvalue()
  43.         self.buffer.close()
  44.         self.buffer = StringIO()
  45.         return s
  46.  
  47.  
  48. streaming_fp = FileStream()
  49.  
  50. # works
  51. tar = tarfile.TarFile.open(out_filename, 'w:', streaming_fp)
  52. # fails with stream and/or compression enabled
  53. # tar = tarfile.TarFile.open(out_filename, 'w|gz', streaming_fp)
  54.  
  55.  
  56. def stream_build_tar(in_filename):
  57.     try:
  58.         stat = os.stat(in_filename)
  59.         tar_info = tarfile.TarInfo(in_filename)
  60.         # Note that you can get this information from the storage backend,
  61.         # but it's valid for either to raise a NotImplementedError, so it's
  62.         # important to check.
  63.         #
  64.         # Things like the mode or ownership won't be available.
  65.         tar_info.mtime = stat.st_mtime
  66.         tar_info.size = stat.st_size
  67.         tar.addfile(tar_info)
  68.  
  69.         yield
  70.  
  71.         with open(in_filename, 'r') as in_fp:
  72.  
  73.             while True:
  74.                 s = in_fp.read(BLOCK_SIZE)
  75.  
  76.                 if len(s) > 0:
  77.                     tar.fileobj.write(s)
  78.  
  79.                     yield
  80.  
  81.                 if len(s) < BLOCK_SIZE:
  82.                     blocks, remainder = divmod(tar_info.size, tarfile.BLOCKSIZE)
  83.  
  84.                     if remainder > 0:
  85.                         tar.fileobj.write(tarfile.NUL *
  86.                                           (tarfile.BLOCKSIZE - remainder))
  87.  
  88.                         yield
  89.  
  90.                         blocks += 1
  91.  
  92.                     tar.offset += blocks * tarfile.BLOCKSIZE
  93.                     break
  94.  
  95.         yield
  96.     except (OSError, IOError):
  97.         # if there is a broken link in the path the process fails
  98.         print 'error processing: ', in_filename
  99.         pass
  100.  
  101. with open(out_filename, 'w') as out_fp:
  102.     for root, dirs, files in os.walk(top_dir, topdown=True):
  103.         dirs[:] = [d for d in dirs if d not in exclude_dirs]
  104.         files[:] = [f for f in files if not f.endswith(exclude_files)]
  105.  
  106.         for file_ in files:
  107.             file_ = os.path.join(root, file_)
  108.  
  109.             for i in stream_build_tar(file_):
  110.                 block = streaming_fp.pop()
  111.  
  112.                 if len(block) > 0:
  113.                     out_fp.write(block)
  114.                     out_fp.flush()
  115.  
  116.         tar.close()
  117.  
  118. print 'Wrote tar file to %s' % out_filename
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement