Advertisement
Guest User

Untitled

a guest
Jan 24th, 2019
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.24 KB | None | 0 0
  1.  
  2. # -*- encoding:utf-8 -*-
  3. from cpy.table import *
  4. from cpy.parser import Parser
  5. import datetime
  6.  
  7.  
  8. class MyIdentNormalizer(object):
  9.  
  10. def __call__(self, ident):
  11. return re.sub(r'_.*$', "", ident)
  12.  
  13.  
  14. parser = Parser(ident_normalizer=MyIdentNormalizer())
  15.  
  16.  
  17. def my_post_processor(tp, _):
  18.  
  19. if tp['series_id'] in ['353701202']:
  20. tp['value'] = int(round(float(tp['value']) / 0.6))
  21.  
  22. return [tp]
  23.  
  24.  
  25. @parser.source('file')
  26. class Table(Columns, Subtables, HtmlTable):
  27. normalizer = CleanHtml() & CleanAllNonAlphaNumUnicode()
  28. table_regexes = [r'Kottayam[^<]*?<table.*?table[^>]*>(.*?)</table>.*?<table']
  29. post_extract = RememberRowByRegex(MONTH_ROW, r'{0}'.format(FULL_MONTH_REGEX), 1)
  30. my_date = MonthFromRow(MONTH_ROW, FULL_MONTH_REGEX, FULL_MONTH_NAMES)
  31.  
  32. @prdata
  33. def date(self):
  34. my_date = self.my_date()
  35. if not my_date:
  36. print('NO DATE')
  37. return None
  38. match = re.search(r'per\s*100Kg\s*From.*?((?:19|20)\d{2}).*?to', self.prdata['body'])
  39. if match:
  40. year = match.group(1)
  41. return year + my_date
  42. print('DID NOT MATCH YEAR')
  43. return None
  44.  
  45.  
  46. value = ValueFromCell(post_processor=my_post_processor)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement