Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- import codecs
- import cStringIO
- class CsvIter(Object):
- def __init__(self, csvFileName):
- self.csvFileName = csvFileName
- self.fileEncoding_ = self._get_encoding(self.csvFileName)
- self.csvfile = codecs.open(self.csvFileName, "r+", encoding="utf-8")
- self.data = self.mapfile(self.csvfile)
- self.lookup = self._get_row_lookup(self.data)
- def __getitem__(self, key):
- """return an item from a memory-mapped csv file"""
- try:
- if key == 0:
- start, end = 0, self.lookup[key]
- else:
- start, end = self.lookup[key - 1], self.lookup[key]
- except KeyError:
- raise IndexError("index out of range")
- return next(csv.reader(cStringIO.StringIO(self.data[start: end])))
- def mapfile(self, fileObj):
- size = os.path.getsize(fileObj.name)
- return mmap.mmap(fileObj.fileno(), size)
- def _get_row_lookup(self, data):
- lino, record_start, lookup = 0, 0, {}
- while True:
- line = data.readline()
- record_start += len(line)
- lookup[lino] = record_start
- lino += 1
- if not line:
- break
- return lookup
- # source: https://docs.python.org/2/library/csv.html (bottom of page)
- def unicode_csv_reader(self, unicode_csv_data, dialect, encoding, **kwargs):
- csv_reader = csv.reader(self.utf_8_encoder(unicode_csv_data, encoding),
- dialect=dialect, **kwargs)
- for row in csv_reader:
- yield [unicode(cell, encoding) for cell in row]
- def utf_8_encoder(self, unicode_csv_data, encoding):
- for line in unicode_csv_data:
- yield line.encode(encoding)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement