Advertisement
yerseg

Untitled

Jul 18th, 2019
123
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.94 KB | None | 0 0
  1. import pandas as pd
  2. import re
  3. import datrie
  4. import string
  5. from matplotlib import pylab
  6. import datetime
  7. import numpy as np
  8.  
  9. data = pd.read_csv("raw_search_data.csv", sep=';', error_bad_lines=False)
  10. data = data.dropna()
  11. data['query'] = data['query'].apply(lambda x: str(x).lower())
  12. data = data[list(map(lambda x: len(x) < 50, data[:]['query']))]
  13. data['datetime'] = data['datetime'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S"))
  14. data = data.reset_index()
  15. data = data.drop(columns=['index'])
  16. uid_set = set(data['uid'])
  17.  
  18. idx_for_delete = []
  19. for uid, i in zip(uid_set, range(len(uid_set))):
  20.     cur_data = data[data.uid == uid]
  21.     idxs = data[data.uid == uid].index
  22.     if (max(cur_data['datetime']) - min(cur_data['datetime'])) <= datetime.timedelta(seconds=10):
  23.         idxs = idxs.delete(np.argmax(list(map(lambda x: len(x), cur_data['query']))))
  24.         idx_for_delete += list(idxs)
  25.         print (idxs)
  26.         print (i)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement