Guest User

Untitled

a guest
Nov 21st, 2017
484
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.60 KB | None | 0 0
  1. [("MLGuy42", "andrew@gmail.com", "123-4567"),
  2. ("CS229DungeonMaster", "123-4567", "ml@stanford.edu"),
  3. ("Doomguy", "john@oculus.com", "carmack@gmail.com"),
  4. ("andrew26", "andrew@gmail.com", "mlguy@gmail.com")]
  5.  
  6. [[0, 1, 3], [2]]
  7.  
  8. def find_duplicates(user_info):
  9. results = list()
  10. seen = dict()
  11. for i, user in enumerate(user_info):
  12. first_seen = True
  13. key_info = None
  14. for info in user:
  15. if info in seen:
  16. first_seen = False
  17. key_info = info
  18. break
  19. if first_seen:
  20. results.append([i])
  21. pos = len(results) - 1
  22. else:
  23. index = seen[key_info]
  24. results[index].append(i)
  25. pos = index
  26. for info in user:
  27. seen[info] = pos
  28. return results
  29.  
  30. def find_duplicates(user_info):
  31. results = list()
  32. seen = dict()
  33. for i, user in enumerate(user_info):
  34. for info in user:
  35. if info in seen:
  36. index = seen[info]
  37. results[index].append(i)
  38. pos = index
  39. break
  40. else:
  41. results.append([i])
  42. pos = len(results) - 1
  43. for info in user:
  44. seen[info] = pos
  45. return results
  46.  
  47. from random import randrange
  48. from timeit import timeit
  49. MAXVALUE = 1000
  50.  
  51. for a in range(5): #I wanted to make sure I checked 5 to make sure I don't get a outlier data set that effects my ability to use timeit reasonably.
  52. user_info = [[randrange(MAXVALUE) for i in range(3)] for _ in range(1000)]
  53. print(timeit(lambda: find_duplicates(user_info), number=10000))
Add Comment
Please, Sign In to add comment