SHARE
TWEET

Untitled

a guest Apr 18th, 2019 58 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env python3
  2. # NB Depends on dict preserving the insert order (CPython >= 3.6, PyPy all)
  3.  
  4. from csv           import DictReader
  5. from datetime      import datetime as DateTime
  6. from os            import scandir
  7. from simplejsonseq import dump
  8. from sys           import argv, stderr, stdout
  9.  
  10. import ietfcsv  # ietf-tab CSV dialect
  11.  
  12. NAME = '%Y%m%dT%H%MZ.tsv'
  13. HEAD = ('region №', 'region', 'ТВО №', 'Центр ТВО', '№ ВД')
  14.  
  15. def without(dict, *keys):
  16.     keys = set(keys)
  17.     return {k: v for k, v in dict.items() if k not in keys}
  18.  
  19. rows = {}  # aggregated data
  20. keys = []  # final row order
  21.  
  22. if len(argv) > 1 and argv[1] == '-v':
  23.     def trace(*args, **named):
  24.         print(*args, **named, file=stderr)
  25. else:
  26.     def trace(*args, **named):
  27.         pass
  28.  
  29. for entry in sorted(scandir(), key=lambda e: e.name):
  30.     if not entry.name.endswith('.tsv'): continue
  31.     trace(entry.name, end=': ')
  32.     with open(entry.name, newline='\r\n') as tsv:
  33.         keys = []
  34.         time = DateTime.strptime(entry.name, NAME)
  35.         prevlen, updated = len(rows), 0
  36.  
  37.         for line in DictReader(tsv, dialect='ietf-tab'):
  38.             del line['Кількість виборчих дільниць в окрузі']
  39.             del line['Кількість виборчих дільниць щодо яких '
  40.                      'надійшли відомості']
  41.             assert 'timestamp' not in line
  42.             line['timestamp'] = time.strftime('%Y-%m-%dT%H:%MZ')
  43.  
  44.             row = {k: line.pop(k) for k in HEAD}
  45.             row['history'] = [line]
  46.             key = (row['ТВО №'], int(row['№ ВД']))
  47.             keys.append(key)
  48.  
  49.             row = rows.setdefault(key, row)
  50.             if (without(row['history'][-1], 'timestamp') !=
  51.                 without(line, 'timestamp')):
  52.                 row['history'].append(line)
  53.                 updated += 1
  54.  
  55.         trace('{} lines, {} inserted, {} updated'
  56.               .format(len(keys), len(rows)-prevlen, updated))
  57.         if len(rows)-prevlen == 0 and updated == 0:
  58.             print('warning: {}: no changes'.format(entry.name),
  59.                   file=stderr)
  60.         assert set(keys) <= set(rows)
  61.         if len(keys) < len(rows):
  62.             for k, row in rows.items():
  63.                 if k in keys: continue
  64.                 print('warning: {}: ТВО {}, ВД {} missing'
  65.                       .format(entry.name,
  66.                               row['ТВО №'],
  67.                               row['№ ВД']),
  68.                       file=stderr)
  69.  
  70. for k, row in rows.items():
  71.     # Not {r.pop('timestamp'): r for r ...} because CPython evaluates
  72.     # dictionary comprehensions in the wrong order (#29652)
  73.     row['history'] = {r['timestamp']: without(r, 'timestamp')
  74.                       for r in row['history']}
  75. stdout.reconfigure(newline='\r\n')
  76. dump((rows[k] for k in keys), stdout, ensure_ascii=False, indent='\t')
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top