Advertisement
Guest User

Untitled

a guest
Aug 1st, 2015
205
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.00 KB | None | 0 0
  1. from collections import defaultdict
  2. def get_overlap(pair, v2_extractor=None, v4_extractor=None):
  3. v2_blobs = pair['v2'].get('data', {}).get('days', {}) # {'YYYY-MM-DD': dict}
  4. v4_blobs = pair['v4'] # [{'creationDate': 'YYYY-MM-DD:...', 'k': val, ...}, ...]
  5. # One blob per date in v2, multiple per date in v4
  6. results = {'v2': {}, 'v4': defaultdict(list)}
  7. if not (v2_blobs and v4_blobs):
  8. return results
  9.  
  10. v2_dates = v2_blobs.keys()
  11. v2_dates.sort()
  12. v4_blobs.sort(key=lambda d: d['creationDate'])
  13. v2_start, v2_end = v2_dates[0], v2_dates[-1] # possibly same
  14. start = end = None
  15.  
  16. # Find overlap and walk v4 at same time.
  17. for v4 in v4_blobs:
  18. v4_date = v4['creationDate'][:10]
  19. # Walk start up as far as we must.
  20. if v4_date < v2_start:
  21. # If v2 is entirely after v4, we never get past here.
  22. continue
  23. elif not start:
  24. start = v4_date
  25. # We have at least one overlapping date.
  26. if v4_date <= v2_end:
  27. # Walk end up as far as we can.
  28. end = v4_date
  29. else:
  30. break
  31. value = v4_extractor(v4) if v4_extractor else v4
  32. results['v4'][v4_date].append(value)
  33. if end is None: # We never reached last line of the loop.
  34. return results
  35.  
  36. for v2_date in v2_dates:
  37. if v2_date < start:
  38. continue
  39. if v2_date > end:
  40. break
  41. value = v2_extractor(v2_blobs[v2_date]) if v2_extractor else v2_blobs[v2_date]
  42. results['v2'][v2_date] = value
  43. return results
  44.  
  45. def test_get_overlap():
  46. d1 = '1999-12-31'
  47. dt1 = d1 + 'T23:59:59.999Z'
  48. d2 = '2000-01-01'
  49. dt2 = d2 + 'T00:00:00.001Z'
  50. simple = {'v2': {'data': {'days': {d1: None, d2: test_get_overlap}}},
  51. 'v4': [{'creationDate': dt1},
  52. {'creationDate': dt2}]}
  53. assert get_overlap(simple) == {'v2': simple['v2']['data']['days'],
  54. 'v4': {d1: [{'creationDate': dt1}],
  55. d2: [{'creationDate': dt2}]}}
  56. # If either or both schema versions are empty, so is the result.
  57. empty_in = {'v2': {}, 'v4': []}
  58. empty = {'v2': {}, 'v4': {}}
  59. assert get_overlap(empty_in) == empty
  60. assert get_overlap({'v2': simple['v2'], 'v4': []}) == empty
  61. assert get_overlap({'v2': {'data': {'neighs': None}}, 'v4': simple['v4']}) == empty
  62.  
  63. d3 = '2001-02-03'
  64. dt3 = d3 + 'T03:59:59.001Z'
  65. dt35 = d3 + 'T03:59:59.999Z'
  66. d4 = '2002-03-04'
  67. dt4 = d4 + 'T13:07:57.222Z'
  68. dt45 = d4 + 'T13:09:59.333Z'
  69. def v2_extractor(d):
  70. return d['value']
  71. all_v2 = dict([(v, {'value': i}) for i, v in enumerate([d1, d2, d3, d4])])
  72. all_v2_out = dict([(k, d['value']) for k, d in all_v2.items()])
  73. all_v4 = [{'creationDate': dt, 'last': dt[-4:]} for dt in [dt1, dt2, dt3, dt35, dt4, dt45]]
  74. def v4_extractor(d):
  75. return d['last']
  76. all_v4_out = {d1: ['999Z'], d2: ['001Z'], d3: ['001Z', '999Z'], d4: ['222Z', '333Z']}
  77.  
  78. # Perfect overlap, multiple blobs per day in v4, extractor functions
  79. input = {'v2': {'data': {'days': all_v2}},
  80. 'v4': all_v4}
  81. output = {'v2': all_v2_out,
  82. 'v4': all_v4_out}
  83. result = get_overlap(input, v2_extractor, v4_extractor)
  84. assert result == output
  85.  
  86. # Remove v2's head and v4's tail
  87. from copy import deepcopy
  88. input2 = deepcopy(input)
  89. del input2['v2']['data']['days'][d1]
  90. input2['v4'].pop()
  91. input2['v4'].pop()
  92. output2 = deepcopy(output)
  93. del output2['v2'][d1]
  94. del output2['v2'][d4]
  95. del output2['v4'][d1]
  96. del output2['v4'][d4]
  97. assert get_overlap(input2, v2_extractor, v4_extractor) == output2
  98. # And again
  99. del input2['v2']['data']['days'][d2]
  100. del output2['v2'][d2]
  101. del output2['v4'][d2]
  102. assert get_overlap(input2, v2_extractor, v4_extractor) == output2
  103.  
  104. # And again; now we're empty
  105. input2['v4'].pop()
  106. input2['v4'].pop()
  107. del output2['v2'][d3]
  108. del output2['v4'][d3]
  109. assert output2 == empty
  110. assert get_overlap(input2, v2_extractor, v4_extractor) == output2
  111.  
  112. # Since the algorithm treats v2 and v4 asymmetrically, also test opposite
  113. input3 = deepcopy(input)
  114. output3 = deepcopy(output)
  115. del input3['v2']['data']['days'][d4]
  116. del output3['v2'][d4]
  117. del output3['v4'][d4]
  118. assert get_overlap(input3, v2_extractor, v4_extractor) == output3
  119. input3['v4'].pop(0)
  120. del output3['v2'][d1]
  121. del output3['v4'][d1]
  122. assert get_overlap(input3, v2_extractor, v4_extractor) == output3
  123. del input3['v2']['data']['days'][d3]
  124. del output3['v2'][d3]
  125. del output3['v4'][d3]
  126. assert get_overlap(input3, v2_extractor, v4_extractor) == output3
  127. v4_head = input3['v4'].pop(0)
  128. del output3['v2'][d2]
  129. del output3['v4'][d2]
  130. assert empty == output3
  131. assert get_overlap(input3, v2_extractor, v4_extractor) == output3
  132. input3['v4'] = [v4_head] + input3['v4']
  133. del input3['v2']['data']['days'][d2]
  134. assert get_overlap(input3, v2_extractor, v4_extractor) == output3
  135.  
  136. print "all passed"
  137. test_get_overlap()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement