caparol6991

get_popularity

Dec 9th, 2019
184
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. def get_popularity(df):
  2.  
  3. start_time = time.time()
  4.  
  5. #rozdziel wyswietlone hotele i ceny
  6. impressions_explode = explode(df, "impressions")
  7. prices_explode = explode(df, "prices")
  8.  
  9. action_type = df['action_type'] == "clickout item"
  10. clicks = df.loc[action_type]
  11.  
  12. action_type = impressions_explode['action_type'] == "clickout item"
  13. views = impressions_explode.loc[action_type]
  14.  
  15. #zlicz klikniecia w kazdy hotel
  16. clicks_count = clicks['reference'].value_counts()
  17. clicks_count.index = clicks_count.index.astype(int)
  18.  
  19. #zlicz ile razy hotel byl wyswietlony
  20. views_count = views['impressions'].value_counts()
  21. ctr = clicks_count.divide(views_count)
  22.  
  23. #polacz hotele z ich cenami
  24. prices_temp = {'reference': impressions_explode.impressions, 'Price': prices_explode.prices}
  25. prices = pd.DataFrame(prices_temp)
  26.  
  27. #prices.set_index('reference')
  28.  
  29. #wyrzuc wszystkie duplikujace sie wiersze z cenami hotelow
  30. prices = prices.drop_duplicates('reference')
  31.  
  32. #polacz klikniecia, wyswietlenia i CTR hotelu w jedno
  33. df_combined = {'Clicks': clicks_count, 'Views': views_count, "CTR": ctr}
  34. result = pd.DataFrame(df_combined)
  35. result = result.rename_axis('reference').reset_index()
  36. result = result.drop_duplicates('reference')
  37. result = result.dropna()
  38.  
  39. #polacz poprzedni dataframe z cenami
  40. result2 = pd.merge(result, prices, on='reference')
  41. result2 = result2.sort_values('Clicks', ascending=False)
  42.  
  43. print(str(time.time() - start_time))
  44.  
  45. return result2
RAW Paste Data