Advertisement
Guest User

Untitled

a guest
May 24th, 2019
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.59 KB | None | 0 0
  1.  
  2. def load_pool(file, stop_size=None):
  3.     if file == "pool":
  4.         filename = "\sport_pool_20190305_20190307"
  5.     elif file == "common":
  6.         filename = "\out_common"
  7.     elif file == "allowed":
  8.         filename = "\out_allowed"
  9.     elif file == "shuffled":
  10.         filename = "\shuf"
  11.     else:
  12.         print("filename error")
  13.  
  14.     df = pd.read_csv(r'C:\Users\Anastasiya\Desktopиплом' + filename, delimiter='\t',
  15.                      encoding='utf-8', nrows=300000, low_memory=False,
  16.                      names=['query', 'factors', 'urls', 'target', 'clicks'])
  17.  
  18.     cnt = 0
  19.     data = []
  20.     target = []
  21.     cnt0 = 0
  22.     cnt1 = 0
  23.     len_of_facts = len(list(str(df.values[0][1])[8:].split()))
  24.  
  25.     for ex in df.values:
  26.         query = str(ex[0])[6:]
  27.  
  28.         if query == "":
  29.             continue
  30.  
  31.         facts = list(str(ex[1])[8:].split())
  32.         urls = list(str(ex[2])[5:].split())
  33.         targ = int(str(ex[3])[7:])
  34.  
  35.         if len(facts) != len_of_facts:
  36.             continue
  37.  
  38.         cur_list = [query]
  39.         query_facts = facts[:]
  40.         cur_list += list(map(float, query_facts))
  41.  
  42.         data.append(cur_list)
  43.         if targ == 0:
  44.             target.append(0)
  45.             cnt0 += 1
  46.         else:
  47.             target.append(1)
  48.             cnt1 += 1
  49.  
  50.         cnt += 1
  51.         if cnt % 5000 == 0:
  52.             print("total: {}, 0: {}, 1: {}".format(cnt, cnt0, cnt1))
  53.  
  54.         if stop_size is not None:
  55.             if cnt >= stop_size:
  56.                 break
  57.  
  58.     print("data is loaded, cnt0 = {}, cnt1 = {}".format(cnt0, cnt1))
  59.     return data, target
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement