Advertisement
Guest User

Untitled

a guest
Aug 17th, 2019
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.48 KB | None | 0 0
  1. """Iterative Normalization"""
  2.  
  3. from argparse import ArgumentParser
  4. import numpy as np
  5.  
  6. def load_embed(filename, max_vocab=-1):
  7. words, embeds = [], []
  8. with open(filename, 'r') as f:
  9. next(f)
  10. for line in f:
  11. word, vector = line.rstrip().split(' ', 1)
  12. vector = np.fromstring(vector, sep=' ')
  13. words.append(word)
  14. embeds.append(vector)
  15. if len(embeds) == max_vocab:
  16. break
  17. return words, np.array(embeds)
  18.  
  19.  
  20. def main():
  21. parser = ArgumentParser()
  22. parser.add_argument('input_file')
  23. parser.add_argument('output_file')
  24. parser.add_argument('--normalize', default='renorm,center,renorm,center,renorm,center,renorm,center,renorm,center,renorm', type=str)
  25. parser.add_argument('--max_vocab', default=-1, type=int)
  26. args = parser.parse_args()
  27.  
  28. words, embeds = load_embed(args.input_file, max_vocab=args.max_vocab)
  29.  
  30. for t in args.normalize.split(','):
  31. if t == 'center':
  32. embeds -= embeds.mean(axis=0)[np.newaxis, :]
  33. elif t == 'renorm':
  34. embeds /= np.linalg.norm(embeds, axis=1)[:, np.newaxis] + 1e-8
  35. elif t != '':
  36. raise Exception('Unknown normalization type: "%s"' % t)
  37.  
  38. with open(args.output_file, 'w') as f:
  39. print >> f, embeds.shape[0], embeds.shape[1]
  40. for word, embed in zip(words, embeds):
  41. vector_str = ' '.join(`x` for x in embed)
  42. print >> f, word, vector_str
  43.  
  44.  
  45. if __name__ == '__main__':
  46. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement