Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def get_popularity(df):
- start_time = time.time()
- #rozdziel wyswietlone hotele i ceny
- impressions_explode = explode(df, "impressions")
- prices_explode = explode(df, "prices")
- action_type = df['action_type'] == "clickout item"
- clicks = df.loc[action_type]
- action_type = impressions_explode['action_type'] == "clickout item"
- views = impressions_explode.loc[action_type]
- #zlicz klikniecia w kazdy hotel
- clicks_count = clicks['reference'].value_counts()
- clicks_count.index = clicks_count.index.astype(int)
- #zlicz ile razy hotel byl wyswietlony
- views_count = views['impressions'].value_counts()
- ctr = clicks_count.divide(views_count)
- #polacz hotele z ich cenami
- prices_temp = {'reference': impressions_explode.impressions, 'Price': prices_explode.prices}
- prices = pd.DataFrame(prices_temp)
- #prices.set_index('reference')
- #wyrzuc wszystkie duplikujace sie wiersze z cenami hotelow
- prices = prices.drop_duplicates('reference')
- #polacz klikniecia, wyswietlenia i CTR hotelu w jedno
- df_combined = {'Clicks': clicks_count, 'Views': views_count, "CTR": ctr}
- result = pd.DataFrame(df_combined)
- result = result.rename_axis('reference').reset_index()
- result = result.drop_duplicates('reference')
- result = result.dropna()
- #polacz poprzedni dataframe z cenami
- result2 = pd.merge(result, prices, on='reference')
- result2 = result2.sort_values('Clicks', ascending=False)
- print(str(time.time() - start_time))
- return result2
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement