Advertisement
Guest User

Untitled

a guest
Jul 4th, 2019
172
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.55 KB | None | 0 0
  1. df = pd.DataFrame([[0,6,4,2], [1,2,3,4], [1,2,3,5,], [1,2,4,5], [2,7,8,9], [1,2,5,7], [1,3,7,9], [3,6,9,12], [5,10,15,20]])
  2.  
  3. def del_similar_records(df):
  4.     r_set_list = list()
  5.     bool_list = list()
  6.     similar = 3
  7.  
  8.     for r in df.itertuples():
  9.         r_set = set(r)
  10.         if all(list(map(lambda x: len(x & r_set) < similar, r_set_list))):
  11.             bool_list.append(True)
  12.             r_set_list.append(r_set)
  13.         else:
  14.             bool_list.append(False)
  15.  
  16.     return bool_list
  17.  
  18. b_list = del_similar_records(df)
  19.  
  20. print(df[b_list])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement