Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- df = pd.DataFrame([[0,6,4,2], [1,2,3,4], [1,2,3,5,], [1,2,4,5], [2,7,8,9], [1,2,5,7], [1,3,7,9], [3,6,9,12], [5,10,15,20]])
- def del_similar_records(df):
- r_set_list = list()
- bool_list = list()
- similar = 3
- for r in df.itertuples():
- r_set = set(r)
- if all(list(map(lambda x: len(x & r_set) < similar, r_set_list))):
- bool_list.append(True)
- r_set_list.append(r_set)
- else:
- bool_list.append(False)
- return bool_list
- b_list = del_similar_records(df)
- print(df[b_list])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement