Guest User

CSV parser with DOM support

a guest
May 20th, 2010
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.49 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. """
  3.    MoinMoin - Parser for CSV data
  4.  
  5.    This parser convert CSV to DOM tree format.
  6.  
  7.    It supports the following parser arguments:
  8.  
  9.     * delimiter/separator: the delimiter to use instead of ;
  10.     * quotechar: quoting character, default off, must be ascii!
  11.     * show: comma-separated list of columns to show only
  12.     * hide: comma-separated list of columns to hide
  13.     * autofilter: comma-separated list of columns to equip with
  14.                   auto-filter drop down
  15.     * name: name of the dataset
  16.     * link: comma separated list of columns that take links, separate
  17.             the link and the description with a space
  18.     * static_cols: comma-separated list of columns that are static
  19.                    and present in each row
  20.     * static_vals: comma-separated list of values for those static
  21.                    columns
  22.  
  23.    The static column feature is only really useful if the dataset
  24.    postprocessed by some other plugin collecting data from multiple
  25.    wiki pages.
  26.  
  27.    @copyright: 2007, 2008 Johannes Berg <[email protected]>
  28.                2010, DmitryAndreev
  29.    @license: GNU GPL, see COPYING for details.
  30. """
  31.  
  32. from csv import reader, QUOTE_NONE, QUOTE_MINIMAL, Sniffer
  33. from _csv import Error
  34.  
  35. from MoinMoin.wikiutil import escape
  36. from MoinMoin.util.tree import moin_page
  37.  
  38. Dependencies = ['time']
  39.  
  40. class Parser:
  41.     extensions = ['.csv']
  42.     Dependencies = []
  43.  
  44.     def _read_rows(self, r):
  45.         if self._first_row is not None:
  46.             yield self._first_row
  47.         for row in r:
  48.             yield row
  49.  
  50.     def __init__(self, raw, request, **kw):
  51.         self.request = request
  52.         self._first_row = None
  53.         formatter = request.formatter
  54.  
  55.         # workaround csv.reader deficiency by encoding to utf-8
  56.         # removes empty lines in front of the csv table
  57.         data = raw.encode('utf-8').lstrip('\n').split('\n')
  58.  
  59.         delimiter = ';'
  60.         # Previous versions of this parser have used only the delimiter ";" (by default).
  61.         # This version now tries to sniff the delimiter from the list preferred_delimiters
  62.         # Although the Python csv sniffer had quite some changes from py 2.3 to 2.5.1, we try
  63.         # to avoid problems for the case it does not find a delimiter in some given data.
  64.         # Newer versions of the sniffer do raise an _csv.Error while older versions do
  65.         # return a whitespace as delimiter.
  66.         if data[0]:
  67.             try:
  68.                 preferred_delimiters = [',', '\t', ';', ' ', ':']
  69.                 delimiter = Sniffer().sniff(data[0], preferred_delimiters).delimiter or ';'
  70.             except Error:
  71.                 pass
  72.  
  73.         visible = None
  74.         hiddenindexes = []
  75.         hiddencols = []
  76.         autofiltercols = []
  77.         staticcols = []
  78.         staticvals = []
  79.         linkcols = []
  80.         quotechar = '\x00' # can't be entered
  81.         quoting = QUOTE_NONE
  82.         name = None
  83.         hdr = reader([kw.get('format_args', '').strip().encode('utf-8')], delimiter=" ")
  84.         args = hdr.next()
  85.  
  86.         for arg in args:
  87.             arg = arg.decode('utf-8')
  88.             try:
  89.                 key, val = arg.split('=', 1)
  90.             except:
  91.                 # handle compatibility with original 'csv' parser
  92.                 if arg.startswith('-'):
  93.                     try:
  94.                         hiddenindexes.append(int(arg[1:]) - 1)
  95.                     except ValueError:
  96.                         pass
  97.                 else:
  98.                     delimiter = arg.encode('utf-8')
  99.                 continue
  100.             if key == 'separator' or key == 'delimiter':
  101.                 delimiter = val.encode('utf-8')
  102.             if key == 'quotechar':
  103.                 if val == val.encode('utf-8'):
  104.                     quotechar = val.encode('utf-8')
  105.                     quoting = QUOTE_MINIMAL
  106.             elif key == 'show':
  107.                 visible = val.split(',')
  108.             elif key == 'hide':
  109.                 hiddencols = val.split(',')
  110.             elif key == 'autofilter':
  111.                 autofiltercols = val.split(',')
  112.             elif key == 'name':
  113.                 name = val
  114.             elif key == 'static_cols':
  115.                 staticcols = val.split(',')
  116.             elif key == 'static_vals':
  117.                 staticvals = val.split(',')
  118.             elif key == 'link':
  119.                 linkcols = val.split(',')
  120.  
  121.         if len(staticcols) > len(staticvals):
  122.             staticvals.extend([''] * (len(staticcols)-len(staticvals)))
  123.         elif len(staticcols) < len(staticvals):
  124.             staticvals = staticvals[:len(staticcols)]
  125.  
  126.         r = reader(data, delimiter=delimiter, quotechar=quotechar, quoting=quoting)
  127.         cols = map(lambda x: x.decode('utf-8'), r.next()) + staticcols
  128.         self._show_header = True
  129.         if cols == staticcols:
  130.             try:
  131.                 self._first_row = map(lambda x: x.decode('utf-8'), r.next())
  132.                 cols = [None] * len(self._first_row) + staticcols
  133.                 self._show_header = False
  134.             except StopIteration:
  135.                 pass
  136.            
  137.         num_entry_cols = len(cols) - len(staticcols)
  138.  
  139.         if not visible is None:
  140.             for col in cols:
  141.                 if not col in visible:
  142.                     hiddencols.append(col)
  143.  
  144.         linkparse = [False] * len(cols)
  145.  
  146.         columns_attrib = []
  147.  
  148.         for colidx in range(len(cols)):
  149.             col = cols[colidx]
  150.             print col,colidx
  151.             autofilter = col in autofiltercols
  152.             hidden = col in hiddencols or colidx in hiddenindexes
  153.             columns_attrib = {'autofilter':autofilter, 'hidden':hidden}
  154.             linkparse[colidx] = col in linkcols
  155.         rows = []
  156.         for row in self._read_rows(r):
  157.             row = map(lambda x: x.decode('utf-8'), row)
  158.             if len(row) > num_entry_cols:
  159.                 row = row[:num_entry_cols]
  160.             elif len(row) < num_entry_cols:
  161.                 row.extend([''] * (num_entry_cols-len(row)))
  162.             row += staticvals
  163.             cells = []
  164.  
  165.             for colidx in range(len(row)):
  166.                 item = row[colidx]
  167.                 if linkparse[colidx]:
  168.                     try:
  169.                         url, item = item.split(' ', 1)
  170.                         if url == '':
  171.                             display = escape(item)
  172.                         else: pass
  173.                             display = ''.join([
  174.                                 formatter.url(1, url=url),
  175.                                 formatter.text(item),
  176.                                 formatter.url(0)])
  177.                     except ValueError:
  178.                         display = escape(item)
  179.                 else:
  180.                     display = escape(item)
  181.                 if(not (col in hiddencols or colidx in hiddenindexes)):
  182.                     cell = moin_page.table_cell(children=(display, ))
  183.                     cells.append(cell)                        
  184.             rows.append(moin_page.table_row(children=cells))
  185.         self.table = ''
  186.         table_body = ''
  187.         if(len(rows) > 0):
  188.             print self._first_row
  189.             table_body = moin_page.table_body(children=rows[1:])
  190.             table_header = moin_page.table_header(children=(\
  191.                                     moin_page.table_row(children=(row[0], ),\
  192.                                     table_body))
  193.             self.table = moin_page.table(children=(table_header, ))
  194.  
  195.     def convert(self):
  196.         return self.table
Advertisement
Add Comment
Please, Sign In to add comment