Guest User

Untitled

a guest
Jan 20th, 2018
109
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.18 KB | None | 0 0
  1. from keras_tqdm import TQDMNotebookCallback
  2.  
  3. from keras.models import model_from_json
  4.  
  5. from keras.preprocessing.image import ImageDataGenerator
  6.  
  7. from keras.preprocessing import image, sequence
  8.  
  9. import numpy as np
  10.  
  11. import pandas as pd
  12.  
  13. import keras
  14.  
  15. from keras import backend as K
  16.  
  17. from keras.utils.data_utils import get_file
  18.  
  19. from keras.utils import np_utils
  20.  
  21. from keras.utils.np_utils import to_categorical
  22.  
  23. from keras.models import Sequential, Model
  24.  
  25. from keras.layers import Input, Embedding, dot, Reshape, merge, LSTM, Bidirectional
  26.  
  27. from keras.layers import TimeDistributed, Activation, SimpleRNN, GRU,PReLU
  28.  
  29. from keras.layers.core import Flatten, Dense, Dropout, Lambda
  30.  
  31. from keras.regularizers import l2, l1
  32.  
  33. from keras.layers.normalization import BatchNormalization
  34.  
  35. from keras.optimizers import SGD, RMSprop, Adam
  36.  
  37. from keras.metrics import categorical_crossentropy, categorical_accuracy
  38.  
  39. from keras.layers.convolutional import *
  40.  
  41. from keras.preprocessing import image, sequence
  42.  
  43. from keras.preprocessing.text import Tokenizer
  44.  
  45. from keras.layers import Conv2D, Dense, MaxPool2D, Flatten, Input
  46.  
  47. from keras.applications import ResNet50, VGG16, VGG19
  48.  
  49. from sklearn.metrics import log_loss
  50.  
  51. import time
  52.  
  53. from sklearn.cross_validation import train_test_split
  54.  
  55. from sklearn.preprocessing import MinMaxScaler
  56.  
  57. from keras import regularizers
  58.  
  59. ## #load data
  60.  
  61. path = "data/"
  62.  
  63. ratings = pd.read_csv(path+"ratings.csv") #change this to load ratings.csv file as is appropriate
  64.  
  65. users = ratings.userId.unique()
  66.  
  67. movies = ratings.movieId.unique()
  68.  
  69. userid2idx = {usernum:i for i,usernum in enumerate(users)}
  70.  
  71. movieid2idx = {movienum:i for i,movienum in enumerate(movies)}
  72.  
  73.  
  74. # #convert the dataFrame "ratings" from excel into an array
  75. # #
  76. # # can also use dataFrame method .as_matrix()
  77. # #
  78.  
  79. ratingAra = np.zeros(shape=(len(users),len(movies)))
  80. userIdAra = np.zeros(shape=(len(users),1))
  81. movieIdAra = np.zeros(shape=(len(movies),1))
  82. for i in range(0,len(ratings)):
  83. userid = ratings[i:i+1].userId.get_values()[0]
  84. row = userid2idx[userid]
  85. movie = ratings[i:i+1].movieId.get_values()[0]
  86. col = movieid2idx[movie]
  87. userIdAra[row]=userid
  88. movieIdAra[col]=movie
  89. ratingAra[row,col]=ratings[i:i+1].rating.get_values()[0]
  90.  
  91. numUsers = len(users)
  92. numMovies = len(movies)
  93. numUsers, numMovies
  94.  
  95. ### #inner dimension of matrices in product decomposition
  96. numFactors = 50
  97.  
  98. #######################################
  99. # #model
  100. #
  101.  
  102. inp_user = Input(shape=(1,), dtype='int64',name='inp_user')
  103. inp_movie = Input(shape=(1,), dtype='int64',name='inp_movie')
  104.  
  105. u = Embedding(numUsers, numFactors, embeddings_initializer='uniform', embeddings_regularizer=regularizers.l2(1e-4), input_length=1)(inp_user)
  106.  
  107. m = Embedding(numMovies, numFactors, embeddings_initializer='uniform', embeddings_regularizer=regularizers.l2(1e-4), input_length=1)(inp_movie)
  108.  
  109. x = dot([u,m], axes=2) # = dot([u,m], axes=2) #or = merge([u,m], mode='dot') #dot product along the second axis of 50 elements
  110.  
  111. f = Flatten()(x)
  112. model = Model([inp_user, inp_movie],f)
  113.  
  114. #######################################
  115.  
  116. model.compile(Adam(0.001), loss='mse')
  117.  
  118.  
  119. #
  120. ### #set up training & validation sets
  121. #
  122.  
  123. np.random.seed = 1
  124. mask = np.random.rand(len(ratings)) < 0.8
  125. trainSet= ratings[mask]
  126. valSet = ratings[~mask]
  127.  
  128.  
  129. ### #model requires numpy arrays
  130. #
  131.  
  132. userVecT = trainSet.userId.as_matrix()
  133. movieVecT = trainSet.movieId.as_matrix()
  134. ratingVecT = trainSet.rating.as_matrix()
  135. userVecV = valSet.userId.as_matrix()
  136. movieVecV = valSet.movieId.as_matrix()
  137. ratingVecV = valSet.rating.as_matrix()
  138. type(userVecT), userVecT.shape
  139.  
  140.  
  141. ### #fit model to training set
  142. #
  143.  
  144. model.fit([userVec, movieVec], ratingVec, batch_size=64, epochs=1,
  145. validation_data=([userVecV, movieVecV], ratingVecV))
  146.  
  147. # # in original code keras 1, this was done:
  148. # #
  149. # # model.fit([trainSet.userId, trainSet.movieId], trainSet.rating, batch_size=64, epochs=1,
  150. # # [userIdAra, movieIdAra], ratingAra, batch_size=64, epochs=1,
  151. # # validation_data=([valSet.userId, valSet.movieId], valSet.rating))
  152.  
  153. ########################################################################
  154. ###
  155. ### # gives error InvalidArgumentError: indices[5,0] = 39183 is not in [0, 9066)
  156. ###
  157. ########################################################################
Add Comment
Please, Sign In to add comment