Untitled


# -*- encoding:utf-8 -*-
from cpy.table import *
from cpy.parser import Parser
import datetime


class MyIdentNormalizer(object):

    def __call__(self, ident):
        return re.sub(r'_.*$', "", ident)


parser = Parser(ident_normalizer=MyIdentNormalizer())


def my_post_processor(tp, _):

    if tp['series_id'] in ['353701202']:
        tp['value'] = int(round(float(tp['value']) / 0.6))

    return [tp]


@parser.source('file')
class Table(Columns, Subtables, HtmlTable):
    normalizer = CleanHtml() & CleanAllNonAlphaNumUnicode()
    table_regexes = [r'Kottayam[^<]*?<table.*?table[^>]*>(.*?)</table>.*?<table']
    post_extract = RememberRowByRegex(MONTH_ROW, r'{0}'.format(FULL_MONTH_REGEX), 1)
    my_date = MonthFromRow(MONTH_ROW, FULL_MONTH_REGEX, FULL_MONTH_NAMES)

    @prdata
    def date(self):
        my_date = self.my_date()
        if not my_date:
            print('NO DATE')
            return None
        match = re.search(r'per\s*100Kg\s*From.*?((?:19|20)\d{2}).*?to', self.prdata['body'])
        if match:
            year = match.group(1)
            return year + my_date
        print('DID NOT MATCH YEAR')
        return None


    value = ValueFromCell(post_processor=my_post_processor)