SHARE
TWEET

Untitled

a guest Oct 23rd, 2019 69 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from nltk.corpus import stopwords
  2. from sklearn.metrics.pairwise import linear_kernel
  3. from sklearn.feature_extraction.text import CountVectorizer
  4. from sklearn.feature_extraction.text import TfidfVectorizer
  5. from sklearn.decomposition import LatentDirichletAllocation
  6. import random
  7. import re, nltk, spacy, gensim
  8. import pyLDAvis
  9. import pyLDAvis.sklearn
  10. import matplotlib.pyplot as plt
  11. %matplotlib inline
  12. pd.set_option('display.max_columns', 50)
  13.  
  14. df = pd.read_csv('data/Seattle_Hotels_dirty.csv', encoding="latin-1")
  15. df.set_index('name', inplace = True)
  16. tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
  17. tfidf_matrix = tf.fit_transform(df['desc'])
  18. cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)
  19.  
  20. indices = pd.Series(df.index)
  21. def recommendations(name, cosine_similarities = cosine_similarities):
  22.    
  23.     recommended_hotels = []
  24.    
  25.     # gettin the index of the hotel that matches the name
  26.     idx = indices[indices == name].index[0]
  27.  
  28.     # creating a Series with the similarity scores in descending order
  29.     score_series = pd.Series(cosine_similarities[idx]).sort_values(ascending = False)
  30.  
  31.     # getting the indexes of the 10 most similar hotels except itself
  32.     top_10_indexes = list(score_series.iloc[1:6].index)
  33.    
  34.     # populating the list with the names of the top 10 matching hotels
  35.     for i in top_10_indexes:
  36.         recommended_hotels.append(list(df.index)[i])
  37.        
  38.     return recommended_hotels
  39.    
  40. recommendations('Hilton Garden Inn Seattle Downtown')
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!
 
Top