Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import zipfile
- class ZipWalker:
- """
- Provide an interator that walks the internal syslog data in all the
- zip archives identified at the specified path.
- """
- def __init__(self, path):
- self._path = path
- self._archives = self._populate()
- def __iter__(self):
- return self.nextline()
- def nextline(self):
- for archive in self._archives.keys():
- archive = zipfile.ZipFile(self._path + archive)
- for member in archive.namelist():
- member = archive.open(member)
- for line in member.readlines():
- yield line
- def archives(self):
- return self._archives.keys()
- def files(self):
- files = []
- for archive in self._archives.keys():
- for file in self._archives[archive]:
- files.append(file)
- return files
- def _populate(self):
- archives = {}
- for file in os.listdir(self._path):
- if zipfile.is_zipfile(self._path + file):
- archives[file] = []
- for logfile in zipfile.ZipFile(self._path + file).namelist():
- archives[file].append(logfile)
- return archives
- if __name__ == '__main__':
- logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)
- path = "/home/oxseyn/Documents/hp_data/2011.07.03-2011.07.11/archives/"
- walker = ZipWalker(path)
- logging.debug('starting run')
- x = 0
- for line in walker:
- # if x % 25000 == 0:
- # print x
- x = x + 1
- print x
- logging.debug('ended run')
Add Comment
Please, Sign In to add comment