parser

#!/usr/bin/env python
# coding: utf-8
import re
f = open('mini_db', 'r').readlines()
db = list()
r_id = re.compile(r'^\(\d+\)', re.U|re.I) #id string
r_ident = re.compile(r'^\((?P<id>\d+)\) (?P<class_est>[\w\s]+) / (?P<class_eng>[\w\s]+)$', re.M|re.I|re.S)
r_pedigree_match = re.compile(r'[\w\s]+/[\w\s]+', re.M|re.I|re.S)
r_pedigree = re.compile(r'(?P<pedigree_est>[\w\s]+) / (?P<pedigree_eng>[\w\s]+)$', re.M|re.I|re.S)
r_data_match = re.compile(r'^\d+ [\w\s\-\']+ om: [\w\s]+', re.M|re.I|re.S)
r_data = re.compile(r'^(?P<dog_id>\d+) (?P<dog_name>[\w\s\-\']+) om: (?P<owner>[\w\s&/]+),(?P<place>[\w\s]+),(?P<country>)[\w\s]+$', re.M|re.S|re.I)

for line in f:
    #parsing lines
    if re.match(r_id, line):
        ident = re.match(r_ident, line)
        db.append(ident.groupdict())
        db_item = db[-1]
        db_item['pedigree'] = list()
    elif re.match(r_pedigree_match, line):
        pedigree = re.match(r_pedigree, line)
        db_item['pedigree'].append(pedigree.groupdict())
        pedigree_item = db_item['pedigree'][-1]
        pedigree_item['dogs'] = list()
    elif re.match(r_data_match, line):
        data = re.match(r_data, line)
        pedigree_item['dogs'].append(data.groupdict())

for item in db:
    klass = item['class_eng']
    fmt = ['%s(%s)' % (p['pedigree_eng'], len(p['dogs'])) for p in item['pedigree']]
    print "%s: %s" % (klass.replace('\n', '').title(),
                    ", ".join(fmt).replace('\n', '').title())