Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- #
- # Building a tar file chunk-by-chunk.
- #
- # taken from https://gist.github.com/chipx86/9598b1e4a9a1a7831054
- # This is a quick bit of sample code for streaming data to a tar file,
- # building it piece-by-piece. The tarfile is built on-the-fly and streamed
- # back out. This is useful for web applications that need to dynamically
- # build a tar file without swamping the server.
- import os
- import tarfile
- try:
- from cStringIO import StringIO
- except ImportError:
- from StringIO import StringIO
- out_filename = '~/file.tar'
- exclude_dirs = ['exclude_dir']
- top_dir = '~/tar_dir'
- exclude_files = ('.tar',)
- BLOCK_SIZE = 4096
- class FileStream(object):
- def __init__(self):
- self.buffer = StringIO()
- self.offset = 0
- def write(self, s):
- self.buffer.write(s)
- self.offset += len(s)
- def close(self):
- self.buffer.close()
- def tell(self):
- return self.offset
- def pop(self):
- s = self.buffer.getvalue()
- self.buffer.close()
- self.buffer = StringIO()
- return s
- streaming_fp = FileStream()
- # works
- tar = tarfile.TarFile.open(out_filename, 'w:', streaming_fp)
- # fails with stream and/or compression enabled
- # tar = tarfile.TarFile.open(out_filename, 'w|gz', streaming_fp)
- def stream_build_tar(in_filename):
- try:
- stat = os.stat(in_filename)
- tar_info = tarfile.TarInfo(in_filename)
- # Note that you can get this information from the storage backend,
- # but it's valid for either to raise a NotImplementedError, so it's
- # important to check.
- #
- # Things like the mode or ownership won't be available.
- tar_info.mtime = stat.st_mtime
- tar_info.size = stat.st_size
- tar.addfile(tar_info)
- yield
- with open(in_filename, 'r') as in_fp:
- while True:
- s = in_fp.read(BLOCK_SIZE)
- if len(s) > 0:
- tar.fileobj.write(s)
- yield
- if len(s) < BLOCK_SIZE:
- blocks, remainder = divmod(tar_info.size, tarfile.BLOCKSIZE)
- if remainder > 0:
- tar.fileobj.write(tarfile.NUL *
- (tarfile.BLOCKSIZE - remainder))
- yield
- blocks += 1
- tar.offset += blocks * tarfile.BLOCKSIZE
- break
- yield
- except (OSError, IOError):
- # if there is a broken link in the path the process fails
- print 'error processing: ', in_filename
- pass
- with open(out_filename, 'w') as out_fp:
- for root, dirs, files in os.walk(top_dir, topdown=True):
- dirs[:] = [d for d in dirs if d not in exclude_dirs]
- files[:] = [f for f in files if not f.endswith(exclude_files)]
- for file_ in files:
- file_ = os.path.join(root, file_)
- for i in stream_build_tar(file_):
- block = streaming_fp.pop()
- if len(block) > 0:
- out_fp.write(block)
- out_fp.flush()
- tar.close()
- print 'Wrote tar file to %s' % out_filename
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement