Advertisement
Guest User

Untitled

a guest
Apr 18th, 2019
80
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.51 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. # NB Depends on dict preserving the insert order (CPython >= 3.6, PyPy all)
  3.  
  4. from csv import DictReader
  5. from datetime import datetime as DateTime
  6. from os import scandir
  7. from simplejsonseq import dump
  8. from sys import argv, stderr, stdout
  9.  
  10. import ietfcsv # ietf-tab CSV dialect
  11.  
  12. NAME = '%Y%m%dT%H%MZ.tsv'
  13. HEAD = ('region №', 'region', 'ТВО №', 'Центр ТВО', '№ ВД')
  14.  
  15. def without(dict, *keys):
  16. keys = set(keys)
  17. return {k: v for k, v in dict.items() if k not in keys}
  18.  
  19. rows = {} # aggregated data
  20. keys = [] # final row order
  21.  
  22. if len(argv) > 1 and argv[1] == '-v':
  23. def trace(*args, **named):
  24. print(*args, **named, file=stderr)
  25. else:
  26. def trace(*args, **named):
  27. pass
  28.  
  29. for entry in sorted(scandir(), key=lambda e: e.name):
  30. if not entry.name.endswith('.tsv'): continue
  31. trace(entry.name, end=': ')
  32. with open(entry.name, newline='\r\n') as tsv:
  33. keys = []
  34. time = DateTime.strptime(entry.name, NAME)
  35. prevlen, updated = len(rows), 0
  36.  
  37. for line in DictReader(tsv, dialect='ietf-tab'):
  38. del line['Кількість виборчих дільниць в окрузі']
  39. del line['Кількість виборчих дільниць щодо яких '
  40. 'надійшли відомості']
  41. assert 'timestamp' not in line
  42. line['timestamp'] = time.strftime('%Y-%m-%dT%H:%MZ')
  43.  
  44. row = {k: line.pop(k) for k in HEAD}
  45. row['history'] = [line]
  46. key = (row['ТВО №'], int(row['№ ВД']))
  47. keys.append(key)
  48.  
  49. row = rows.setdefault(key, row)
  50. if (without(row['history'][-1], 'timestamp') !=
  51. without(line, 'timestamp')):
  52. row['history'].append(line)
  53. updated += 1
  54.  
  55. trace('{} lines, {} inserted, {} updated'
  56. .format(len(keys), len(rows)-prevlen, updated))
  57. if len(rows)-prevlen == 0 and updated == 0:
  58. print('warning: {}: no changes'.format(entry.name),
  59. file=stderr)
  60. assert set(keys) <= set(rows)
  61. if len(keys) < len(rows):
  62. for k, row in rows.items():
  63. if k in keys: continue
  64. print('warning: {}: ТВО {}, ВД {} missing'
  65. .format(entry.name,
  66. row['ТВО №'],
  67. row['№ ВД']),
  68. file=stderr)
  69.  
  70. for k, row in rows.items():
  71. # Not {r.pop('timestamp'): r for r ...} because CPython evaluates
  72. # dictionary comprehensions in the wrong order (#29652)
  73. row['history'] = {r['timestamp']: without(r, 'timestamp')
  74. for r in row['history']}
  75. stdout.reconfigure(newline='\r\n')
  76. dump((rows[k] for k in keys), stdout, ensure_ascii=False, indent='\t')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement