Advertisement
Guest User

Untitled

a guest
Mar 27th, 2017
51
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.08 KB | None | 0 0
  1. # Here's only a solution, initial part of code isn't included.
  2.  
  3. num_steps = 100001
  4.  
  5. with tf.Session(graph=graph) as session:
  6. tf.initialize_all_variables().run()
  7. print('Initialized')
  8. average_loss = 0
  9. for step in range(num_steps):
  10. # Algorithmically, these models are similar, except that CBOW predicts center words from
  11. # context words, while the skip-gram does the inverse and predicts source context-words from
  12. # the center words.
  13. # Thus we just need to swap batch_labels and batch_data in the line below to get the other model (skip-gram -> CBOW).
  14. batch_labels, batch_data = generate_batch(batch_size, num_skips, skip_window)
  15. feed_dict = { train_dataset : batch_data.reshape(batch_size), train_labels : batch_labels.reshape(batch_size, 1) }
  16. _, l = session.run([optimizer, loss], feed_dict=feed_dict)
  17. average_loss += l
  18. if step % 2000 == 0:
  19. if step > 0:
  20. average_loss = average_loss / 2000
  21. print('Average loss at step %d: %f' % (step, average_loss))
  22. average_loss = 0
  23. if step % 10000 == 0:
  24. sim = similarity.eval()
  25. for i in xrange(valid_size):
  26. valid_word = reverse_dictionary[valid_examples[i]]
  27. top_k = 8
  28. nearest = (-sim[i, :]).argsort()[1:top_k+1]
  29. log = 'Nearest to %s:' % valid_word
  30. for k in xrange(top_k):
  31. close_word = reverse_dictionary[nearest[k]]
  32. log = '%s %s,' % (log, close_word)
  33. print(log)
  34. final_embeddings = normalized_embeddings.eval()
  35.  
  36. num_points = 400
  37.  
  38. tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
  39. two_d_embeddings = tsne.fit_transform(final_embeddings[1:num_points+1, :])
  40.  
  41. def plot(embeddings, labels):
  42. assert embeddings.shape[0] >= len(labels), 'More labels than embeddings'
  43. pylab.figure(figsize=(15,15)) # in inches
  44. for i, label in enumerate(labels):
  45. x, y = embeddings[i,:]
  46. pylab.scatter(x, y)
  47. pylab.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points',
  48. ha='right', va='bottom')
  49. pylab.show()
  50.  
  51. words = [reverse_dictionary[i] for i in range(1, num_points+1)]
  52. plot(two_d_embeddings, words)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement