Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- [("MLGuy42", "andrew@gmail.com", "123-4567"),
- ("CS229DungeonMaster", "123-4567", "ml@stanford.edu"),
- ("Doomguy", "john@oculus.com", "carmack@gmail.com"),
- ("andrew26", "andrew@gmail.com", "mlguy@gmail.com")]
- [[0, 1, 3], [2]]
- def find_duplicates(user_info):
- results = list()
- seen = dict()
- for i, user in enumerate(user_info):
- first_seen = True
- key_info = None
- for info in user:
- if info in seen:
- first_seen = False
- key_info = info
- break
- if first_seen:
- results.append([i])
- pos = len(results) - 1
- else:
- index = seen[key_info]
- results[index].append(i)
- pos = index
- for info in user:
- seen[info] = pos
- return results
- def find_duplicates(user_info):
- results = list()
- seen = dict()
- for i, user in enumerate(user_info):
- for info in user:
- if info in seen:
- index = seen[info]
- results[index].append(i)
- pos = index
- break
- else:
- results.append([i])
- pos = len(results) - 1
- for info in user:
- seen[info] = pos
- return results
- from random import randrange
- from timeit import timeit
- MAXVALUE = 1000
- for a in range(5): #I wanted to make sure I checked 5 to make sure I don't get a outlier data set that effects my ability to use timeit reasonably.
- user_info = [[randrange(MAXVALUE) for i in range(3)] for _ in range(1000)]
- print(timeit(lambda: find_duplicates(user_info), number=10000))
- import unittest
- #import or paste your function here
- a = ('a', 'a@a', '1')
- b = ('b', 'b@b', '2')
- c = ('c', 'c@c', '3')
- ab = ('a', 'b@b', '12')
- tests = [
- ([a, b], [[0], [1]]),
- ([a, b, c], [[0], [1], [2]]),
- ([a, b], [[0], [1]]),
- ([a, ab, b], [[0, 1, 2]]),
- ([a, ab, b, c], [[0, 1, 2],[3]]),
- ([a, ab, c, b], [[0, 1, 3],[2]]),
- ([c, a, ab, b], [[0],[1, 2, 3]]),
- ([a, b, ab], [[0, 1, 2]]),
- ]
- class Test(unittest.TestCase):
- def test_some(self):
- for n, t in enumerate(tests):
- ud = t[0]
- ref = t[1]
- res = find_duplicates(ud)
- assert ref==res, "n:{}, ud:{}, ref:{}, res:{}".format(n, ud, ref, res)
- if __name__ == "__main__":
- unittest.main()
- ======================================================================
- FAIL: test_some (user_info.Test)
- ----------------------------------------------------------------------
- Traceback (most recent call last):
- File "C:UsersVerenaworkspaceuser_infosrcuser_info.py", line xx, in test_some
- assert ref==res, "n:{}, ud:{}, ref:{}, res:{}".format(n, ud, ref, res)
- AssertionError: n:7, ud:[('a', 'a@a', '1'), ('b', 'b@b', '2'), ('a', 'b@b', '12')], ref:[[0, 1, 2]], res:[[0, 2], [1]]
- ----------------------------------------------------------------------
- Ran 1 test in 0.001s
Add Comment
Please, Sign In to add comment