Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- encoding:utf-8 -*-
- from cpy.table import *
- from cpy.parser import Parser
- import datetime
- class MyIdentNormalizer(object):
- def __call__(self, ident):
- return re.sub(r'_.*$', "", ident)
- parser = Parser(ident_normalizer=MyIdentNormalizer())
- def my_post_processor(tp, _):
- if tp['series_id'] in ['353701202']:
- tp['value'] = int(round(float(tp['value']) / 0.6))
- return [tp]
- @parser.source('file')
- class Table(Columns, Subtables, HtmlTable):
- normalizer = CleanHtml() & CleanAllNonAlphaNumUnicode()
- table_regexes = [r'Kottayam[^<]*?<table.*?table[^>]*>(.*?)</table>.*?<table']
- post_extract = RememberRowByRegex(MONTH_ROW, r'{0}'.format(FULL_MONTH_REGEX), 1)
- my_date = MonthFromRow(MONTH_ROW, FULL_MONTH_REGEX, FULL_MONTH_NAMES)
- @prdata
- def date(self):
- my_date = self.my_date()
- if not my_date:
- print('NO DATE')
- return None
- match = re.search(r'per\s*100Kg\s*From.*?((?:19|20)\d{2}).*?to', self.prdata['body'])
- if match:
- year = match.group(1)
- return year + my_date
- print('DID NOT MATCH YEAR')
- return None
- value = ValueFromCell(post_processor=my_post_processor)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement