Advertisement
Guest User

Untitled

a guest
Aug 25th, 2016
58
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.98 KB | None | 0 0
  1. class Embedding(object):
  2. def __init__(self,vocab_file,vectors_file):
  3. with open(vocab_file, 'r') as f:
  4. words = [x.rstrip().split(' ')[0] for x in f.readlines()]
  5.  
  6. with open(vectors_file, 'r') as f:
  7. vectors = {}
  8. for line in f:
  9. vals = line.rstrip().split(' ')
  10. vectors[vals[0]] = [float(x) for x in vals[1:]]
  11.  
  12. vocab_size = len(words)
  13. vocab = {w: idx for idx, w in enumerate(words)}
  14. ivocab = {idx: w for idx, w in enumerate(words)}
  15.  
  16. vector_dim = len(vectors[ivocab[0]])
  17. W = np.zeros((vocab_size, vector_dim))
  18. for word, v in vectors.items():
  19. if word == '<unk>':
  20. continue
  21. W[vocab[word], :] = v
  22.  
  23. # normalize each word vector to unit variance
  24. W_norm = np.zeros(W.shape)
  25. d = (np.sum(W ** 2, 1) ** (0.5))
  26. W_norm = (W.T / d).T
  27.  
  28. self.W = W_norm
  29. self.vocab = vocab
  30. self.ivocab = ivocab
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement