Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- file_obj = open('coursera_sessions_train.txt', 'r')
- data_train = list(file_obj)
- file_obj2 = open('coursera_sessions_test.txt', 'r')
- data_test = list(file_obj2)
- views_freq = dict()
- buys_freq = dict()
- for line in data_train:
- views, buys = line.strip().split(';')
- views = views.split(',')
- buys = buys.split(',')
- for id_viewed in views:
- if id_viewed in views_freq:
- views_freq[str(id_viewed)] += 1
- else:
- views_freq[str(id_viewed)] = 1
- for id_bought in buys:
- if id_bought in buys_freq:
- buys_freq[str(id_bought)] += 1
- else:
- buys_freq[str(id_bought)] = 1
- # 1. Сортировка по просмотрам, на тренировочных данных
- prec_1_list = []
- prec_5_list = []
- rec_1_list = []
- rec_5_list = []
- for line in data_train:
- views, buys = line.strip().split(';')
- views = views.split(',')
- buys = buys.split(',')
- if buys[0] != '':
- views = np.unique(np.array(views))
- freqs=[]
- views = list(views)
- for i in range(len(views)):
- freqs.append([i, views[i], views_freq[str(views[i])]])
- freqs.sort(cmp=lambda (x1, y1, z1), (x2, y2, z2): cmp(-z1, -z2) or cmp(x1, x2))
- #для AP@1, AR@1
- recom_1 = freqs[0][1]
- if recom_1 in buys:
- prec_1 = 1
- rec_1 = 1. / len(buys)
- else:
- prec_1 = 0
- rec_1 = 0
- prec_1_list.append(prec_1)
- rec_1_list.append(rec_1)
- #для AP@5, AR@5
- recom_5 = []
- if len(views) < 5:
- recom_5 = views
- znam = 0
- for j in range(len(views)):
- if recom_5[j] in buys:
- znam += 1
- #prec_5 = float(znam) / len(views)
- prec_5 = float(znam) / 5
- rec_5 = float(znam) / len(buys)
- else:
- znam = 0
- for j in range(5):
- recom_5.append(freqs[j][1])
- if recom_5[j] in buys:
- znam += 1
- prec_5 = float(znam) / 5
- rec_5 = float(znam) / len(buys)
- prec_5_list.append(prec_5)
- rec_5_list.append(rec_5)
- ans1 = []
- ans1.append(np.array(rec_1_list).mean())
- ans1.append(np.array(prec_1_list).mean())
- ans1.append(np.array(rec_5_list).mean())
- ans1.append(np.array(prec_5_list).mean())
- print ans1
- #2. Сортировка по просмотрам, на тестовых данных
- prec_1_list = []
- prec_5_list = []
- rec_1_list = []
- rec_5_list = []
- for line in data_test:
- views, buys = line.strip().split(';')
- views = views.split(',')
- buys = buys.split(',')
- if buys[0] != '':
- views = np.unique(np.array(views))
- freqs=[]
- views = list(views)
- for i in range(len(views)):
- if views[i] in views_freq:
- freqs.append([i, views[i], views_freq[str(views[i])]])
- else:
- freqs.append([i, views[i], 0])
- if freqs != []:
- freqs.sort(cmp=lambda (x1, y1, z1), (x2, y2, z2): cmp(-z1, -z2) or cmp(x1, x2))
- #для AP@1, AR@1
- recom_1 = freqs[0][1]
- if recom_1 in buys:
- prec_1 = 1
- rec_1 = 1. / len(buys)
- else:
- prec_1 = 0
- rec_1 = 0
- prec_1_list.append(prec_1)
- rec_1_list.append(rec_1)
- #для AP@5, AR@5
- #recom_5 = []
- znam = 0
- for j in range(5):
- if j < len(freqs):
- #recom_5.append(freqs[j][1])
- if freqs[j][1] in buys:
- znam += 1
- prec_5 = float(znam) / 5
- rec_5 = float(znam) / len(buys)
- prec_5_list.append(prec_5)
- rec_5_list.append(rec_5)
- ans2 = []
- ans2.append(np.array(rec_1_list).mean())
- ans2.append(np.array(prec_1_list).mean())
- ans2.append(np.array(rec_5_list).mean())
- ans2.append(np.array(prec_5_list).mean())
- print ans2
- # 3. Сортировка по покупкам, на тренировочных данных
- prec_1_list = []
- prec_5_list = []
- rec_1_list = []
- rec_5_list = []
- for line in data_train:
- views, buys = line.strip().split(';')
- views = views.split(',')
- buys = buys.split(',')
- if buys[0] != '':
- views = np.unique(np.array(views))
- freqs=[]
- views = list(views)
- for i in range(len(views)):
- if views[i] in buys_freq:
- freqs.append([i, views[i], buys_freq[str(views[i])]])
- #else:
- #freqs.append([i, views[i], 0])
- #print freqs
- if freqs != []:
- freqs.sort(cmp=lambda (x1, y1, z1), (x2, y2, z2): cmp(-z1, -z2) or cmp(x1, x2))
- #для AP@1, AR@1
- recom_1 = freqs[0][1]
- if recom_1 in buys:
- prec_1 = 1
- rec_1 = 1. / len(buys)
- else:
- prec_1 = 0
- rec_1 = 0
- prec_1_list.append(prec_1)
- rec_1_list.append(rec_1)
- #для AP@5, AR@5
- recom_5 = []
- znam = 0
- for j in range(5):
- if j < len(freqs):
- recom_5.append(freqs[j][1])
- if freqs[j][1] in buys:
- znam += 1
- prec_5 = float(znam) / 5
- rec_5 = float(znam) / len(buys)
- prec_5_list.append(prec_5)
- rec_5_list.append(rec_5)
- ans3 = []
- ans3.append(np.array(rec_1_list).mean())
- ans3.append(np.array(prec_1_list).mean())
- ans3.append(np.array(rec_5_list).mean())
- ans3.append(np.array(prec_5_list).mean())
- print ans3
- # 4. Сортировка по покупкам, на тестовых данных
- prec_1_list = []
- prec_5_list = []
- rec_1_list = []
- rec_5_list = []
- count = 0
- for line in data_test:
- views, buys = line.strip().split(';')
- views = views.split(',')
- buys = buys.split(',')
- if buys[0] != '':
- count += 1
- views = np.unique(np.array(views))
- buys = np.unique(np.array(buys))
- freqs=[]
- views = list(views)
- for i in range(len(views)):
- if views[i] in buys_freq:
- freqs.append([i, views[i], buys_freq[str(views[i])]])
- else:
- if views[i] not in views_freq:
- freqs.append([i, views[i], 0])
- if freqs != []:
- freqs.sort(cmp=lambda (x1, y1, z1), (x2, y2, z2): cmp(-z1, -z2) or cmp(x1, x2))
- #freqs.sort(key = lambda (x1,y1,z1): -z1)
- #для AP@1, AR@1
- recom_1 = freqs[0][1]
- if recom_1 in buys:
- prec_1 = 1.
- rec_1 = 1. / len(buys)
- else:
- prec_1 = 0.
- rec_1 = 0.
- prec_1_list.append(prec_1)
- rec_1_list.append(rec_1)
- #для AP@5, AR@5
- recom_5 = []
- znam = 0.
- for j in range(5):
- if j < len(freqs):
- recom_5.append(freqs[j][1])
- if freqs[j][1] in buys:
- znam += 1
- prec_5 = float(znam) / 5
- rec_5 = float(znam) / len(buys)
- prec_5_list.append(prec_5)
- rec_5_list.append(rec_5)
- ans4 = []
- ans4.append(np.array(rec_1_list).mean())
- ans4.append(np.array(prec_1_list).mean())
- ans4.append(np.array(rec_5_list).mean())
- ans4.append(np.array(prec_5_list).mean())
- print ans4
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement