Guest User

Untitled

a guest
Nov 21st, 2017
540
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.89 KB | None | 0 0
  1. [("MLGuy42", "andrew@gmail.com", "123-4567"),
  2. ("CS229DungeonMaster", "123-4567", "ml@stanford.edu"),
  3. ("Doomguy", "john@oculus.com", "carmack@gmail.com"),
  4. ("andrew26", "andrew@gmail.com", "mlguy@gmail.com")]
  5.  
  6. [[0, 1, 3], [2]]
  7.  
  8. def find_duplicates(user_info):
  9. results = list()
  10. seen = dict()
  11. for i, user in enumerate(user_info):
  12. first_seen = True
  13. key_info = None
  14. for info in user:
  15. if info in seen:
  16. first_seen = False
  17. key_info = info
  18. break
  19. if first_seen:
  20. results.append([i])
  21. pos = len(results) - 1
  22. else:
  23. index = seen[key_info]
  24. results[index].append(i)
  25. pos = index
  26. for info in user:
  27. seen[info] = pos
  28. return results
  29.  
  30. def find_duplicates(user_info):
  31. results = list()
  32. seen = dict()
  33. for i, user in enumerate(user_info):
  34. for info in user:
  35. if info in seen:
  36. index = seen[info]
  37. results[index].append(i)
  38. pos = index
  39. break
  40. else:
  41. results.append([i])
  42. pos = len(results) - 1
  43. for info in user:
  44. seen[info] = pos
  45. return results
  46.  
  47. from random import randrange
  48. from timeit import timeit
  49. MAXVALUE = 1000
  50.  
  51. for a in range(5): #I wanted to make sure I checked 5 to make sure I don't get a outlier data set that effects my ability to use timeit reasonably.
  52. user_info = [[randrange(MAXVALUE) for i in range(3)] for _ in range(1000)]
  53. print(timeit(lambda: find_duplicates(user_info), number=10000))
  54.  
  55. import unittest
  56. #import or paste your function here
  57.  
  58. a = ('a', 'a@a', '1')
  59. b = ('b', 'b@b', '2')
  60. c = ('c', 'c@c', '3')
  61. ab = ('a', 'b@b', '12')
  62.  
  63. tests = [
  64. ([a, b], [[0], [1]]),
  65. ([a, b, c], [[0], [1], [2]]),
  66. ([a, b], [[0], [1]]),
  67. ([a, ab, b], [[0, 1, 2]]),
  68. ([a, ab, b, c], [[0, 1, 2],[3]]),
  69. ([a, ab, c, b], [[0, 1, 3],[2]]),
  70. ([c, a, ab, b], [[0],[1, 2, 3]]),
  71. ([a, b, ab], [[0, 1, 2]]),
  72. ]
  73.  
  74. class Test(unittest.TestCase):
  75.  
  76. def test_some(self):
  77. for n, t in enumerate(tests):
  78. ud = t[0]
  79. ref = t[1]
  80. res = find_duplicates(ud)
  81. assert ref==res, "n:{}, ud:{}, ref:{}, res:{}".format(n, ud, ref, res)
  82.  
  83. if __name__ == "__main__":
  84. unittest.main()
  85.  
  86. ======================================================================
  87. FAIL: test_some (user_info.Test)
  88. ----------------------------------------------------------------------
  89. Traceback (most recent call last):
  90. File "C:UsersVerenaworkspaceuser_infosrcuser_info.py", line xx, in test_some
  91. assert ref==res, "n:{}, ud:{}, ref:{}, res:{}".format(n, ud, ref, res)
  92. AssertionError: n:7, ud:[('a', 'a@a', '1'), ('b', 'b@b', '2'), ('a', 'b@b', '12')], ref:[[0, 1, 2]], res:[[0, 2], [1]]
  93.  
  94. ----------------------------------------------------------------------
  95. Ran 1 test in 0.001s
Add Comment
Please, Sign In to add comment