Advertisement
Guest User

parser

a guest
Jun 8th, 2012
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.47 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # coding: utf-8
  3. import re
  4. f = open('mini_db', 'r').readlines()
  5. db = list()
  6. r_id = re.compile(r'^\(\d+\)', re.U|re.I) #id string
  7. r_ident = re.compile(r'^\((?P<id>\d+)\) (?P<class_est>[\w\s]+) / (?P<class_eng>[\w\s]+)$', re.M|re.I|re.S)
  8. r_pedigree_match = re.compile(r'[\w\s]+/[\w\s]+', re.M|re.I|re.S)
  9. r_pedigree = re.compile(r'(?P<pedigree_est>[\w\s]+) / (?P<pedigree_eng>[\w\s]+)$', re.M|re.I|re.S)
  10. r_data_match = re.compile(r'^\d+ [\w\s\-\']+ om: [\w\s]+', re.M|re.I|re.S)
  11. r_data = re.compile(r'^(?P<dog_id>\d+) (?P<dog_name>[\w\s\-\']+) om: (?P<owner>[\w\s&/]+),(?P<place>[\w\s]+),(?P<country>)[\w\s]+$', re.M|re.S|re.I)
  12.  
  13. for line in f:
  14.     #parsing lines
  15.     if re.match(r_id, line):
  16.         ident = re.match(r_ident, line)
  17.         db.append(ident.groupdict())
  18.         db_item = db[-1]
  19.         db_item['pedigree'] = list()
  20.     elif re.match(r_pedigree_match, line):
  21.         pedigree = re.match(r_pedigree, line)
  22.         db_item['pedigree'].append(pedigree.groupdict())
  23.         pedigree_item = db_item['pedigree'][-1]
  24.         pedigree_item['dogs'] = list()
  25.     elif re.match(r_data_match, line):
  26.         data = re.match(r_data, line)
  27.         pedigree_item['dogs'].append(data.groupdict())
  28.        
  29. for item in db:
  30.     klass = item['class_eng']
  31.     fmt = ['%s(%s)' % (p['pedigree_eng'], len(p['dogs'])) for p in item['pedigree']]
  32.     print "%s: %s" % (klass.replace('\n', '').title(),
  33.                     ", ".join(fmt).replace('\n', '').title())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement