Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- # NB Depends on dict preserving the insert order (CPython >= 3.6, PyPy all)
- from csv import DictReader
- from datetime import datetime as DateTime
- from os import scandir
- from simplejsonseq import dump
- from sys import argv, stderr, stdout
- import ietfcsv # ietf-tab CSV dialect
- NAME = '%Y%m%dT%H%MZ.tsv'
- HEAD = ('region №', 'region', 'ТВО №', 'Центр ТВО', '№ ВД')
- def without(dict, *keys):
- keys = set(keys)
- return {k: v for k, v in dict.items() if k not in keys}
- rows = {} # aggregated data
- keys = [] # final row order
- if len(argv) > 1 and argv[1] == '-v':
- def trace(*args, **named):
- print(*args, **named, file=stderr)
- else:
- def trace(*args, **named):
- pass
- for entry in sorted(scandir(), key=lambda e: e.name):
- if not entry.name.endswith('.tsv'): continue
- trace(entry.name, end=': ')
- with open(entry.name, newline='\r\n') as tsv:
- keys = []
- time = DateTime.strptime(entry.name, NAME)
- prevlen, updated = len(rows), 0
- for line in DictReader(tsv, dialect='ietf-tab'):
- del line['Кількість виборчих дільниць в окрузі']
- del line['Кількість виборчих дільниць щодо яких '
- 'надійшли відомості']
- assert 'timestamp' not in line
- line['timestamp'] = time.strftime('%Y-%m-%dT%H:%MZ')
- row = {k: line.pop(k) for k in HEAD}
- row['history'] = [line]
- key = (row['ТВО №'], int(row['№ ВД']))
- keys.append(key)
- row = rows.setdefault(key, row)
- if (without(row['history'][-1], 'timestamp') !=
- without(line, 'timestamp')):
- row['history'].append(line)
- updated += 1
- trace('{} lines, {} inserted, {} updated'
- .format(len(keys), len(rows)-prevlen, updated))
- if len(rows)-prevlen == 0 and updated == 0:
- print('warning: {}: no changes'.format(entry.name),
- file=stderr)
- assert set(keys) <= set(rows)
- if len(keys) < len(rows):
- for k, row in rows.items():
- if k in keys: continue
- print('warning: {}: ТВО {}, ВД {} missing'
- .format(entry.name,
- row['ТВО №'],
- row['№ ВД']),
- file=stderr)
- for k, row in rows.items():
- # Not {r.pop('timestamp'): r for r ...} because CPython evaluates
- # dictionary comprehensions in the wrong order (#29652)
- row['history'] = {r['timestamp']: without(r, 'timestamp')
- for r in row['history']}
- stdout.reconfigure(newline='\r\n')
- dump((rows[k] for k in keys), stdout, ensure_ascii=False, indent='\t')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement