Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from keras_tqdm import TQDMNotebookCallback
- from keras.models import model_from_json
- from keras.preprocessing.image import ImageDataGenerator
- from keras.preprocessing import image, sequence
- import numpy as np
- import pandas as pd
- import keras
- from keras import backend as K
- from keras.utils.data_utils import get_file
- from keras.utils import np_utils
- from keras.utils.np_utils import to_categorical
- from keras.models import Sequential, Model
- from keras.layers import Input, Embedding, dot, Reshape, merge, LSTM, Bidirectional
- from keras.layers import TimeDistributed, Activation, SimpleRNN, GRU,PReLU
- from keras.layers.core import Flatten, Dense, Dropout, Lambda
- from keras.regularizers import l2, l1
- from keras.layers.normalization import BatchNormalization
- from keras.optimizers import SGD, RMSprop, Adam
- from keras.metrics import categorical_crossentropy, categorical_accuracy
- from keras.layers.convolutional import *
- from keras.preprocessing import image, sequence
- from keras.preprocessing.text import Tokenizer
- from keras.layers import Conv2D, Dense, MaxPool2D, Flatten, Input
- from keras.applications import ResNet50, VGG16, VGG19
- from sklearn.metrics import log_loss
- import time
- from sklearn.cross_validation import train_test_split
- from sklearn.preprocessing import MinMaxScaler
- from keras import regularizers
- ## #load data
- path = "data/"
- ratings = pd.read_csv(path+"ratings.csv") #change this to load ratings.csv file as is appropriate
- users = ratings.userId.unique()
- movies = ratings.movieId.unique()
- userid2idx = {usernum:i for i,usernum in enumerate(users)}
- movieid2idx = {movienum:i for i,movienum in enumerate(movies)}
- # #convert the dataFrame "ratings" from excel into an array
- # #
- # # can also use dataFrame method .as_matrix()
- # #
- ratingAra = np.zeros(shape=(len(users),len(movies)))
- userIdAra = np.zeros(shape=(len(users),1))
- movieIdAra = np.zeros(shape=(len(movies),1))
- for i in range(0,len(ratings)):
- userid = ratings[i:i+1].userId.get_values()[0]
- row = userid2idx[userid]
- movie = ratings[i:i+1].movieId.get_values()[0]
- col = movieid2idx[movie]
- userIdAra[row]=userid
- movieIdAra[col]=movie
- ratingAra[row,col]=ratings[i:i+1].rating.get_values()[0]
- numUsers = len(users)
- numMovies = len(movies)
- numUsers, numMovies
- ### #inner dimension of matrices in product decomposition
- numFactors = 50
- #######################################
- # #model
- #
- inp_user = Input(shape=(1,), dtype='int64',name='inp_user')
- inp_movie = Input(shape=(1,), dtype='int64',name='inp_movie')
- u = Embedding(numUsers, numFactors, embeddings_initializer='uniform', embeddings_regularizer=regularizers.l2(1e-4), input_length=1)(inp_user)
- m = Embedding(numMovies, numFactors, embeddings_initializer='uniform', embeddings_regularizer=regularizers.l2(1e-4), input_length=1)(inp_movie)
- x = dot([u,m], axes=2) # = dot([u,m], axes=2) #or = merge([u,m], mode='dot') #dot product along the second axis of 50 elements
- f = Flatten()(x)
- model = Model([inp_user, inp_movie],f)
- #######################################
- model.compile(Adam(0.001), loss='mse')
- #
- ### #set up training & validation sets
- #
- np.random.seed = 1
- mask = np.random.rand(len(ratings)) < 0.8
- trainSet= ratings[mask]
- valSet = ratings[~mask]
- ### #model requires numpy arrays
- #
- userVecT = trainSet.userId.as_matrix()
- movieVecT = trainSet.movieId.as_matrix()
- ratingVecT = trainSet.rating.as_matrix()
- userVecV = valSet.userId.as_matrix()
- movieVecV = valSet.movieId.as_matrix()
- ratingVecV = valSet.rating.as_matrix()
- type(userVecT), userVecT.shape
- ### #fit model to training set
- #
- model.fit([userVec, movieVec], ratingVec, batch_size=64, epochs=1,
- validation_data=([userVecV, movieVecV], ratingVecV))
- # # in original code keras 1, this was done:
- # #
- # # model.fit([trainSet.userId, trainSet.movieId], trainSet.rating, batch_size=64, epochs=1,
- # # [userIdAra, movieIdAra], ratingAra, batch_size=64, epochs=1,
- # # validation_data=([valSet.userId, valSet.movieId], valSet.rating))
- ########################################################################
- ###
- ### # gives error InvalidArgumentError: indices[5,0] = 39183 is not in [0, 9066)
- ###
- ########################################################################
Add Comment
Please, Sign In to add comment