Guest User

Untitled

a guest
Nov 21st, 2017
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.64 KB | None | 0 0
  1. from revscoring.features import wikitext
  2. from revscoring.features.modifiers import max, sub
  3. from revscoring.languages import english
  4. from revscoring import Feature
  5. from revscoring.datasources import Datasource, revision_oriented
  6. from revscoring.dependencies import solve
  7. from gensim.models.keyedvectors import KeyedVectors
  8. import numpy as np
  9. import pdb
  10.  
  11. word2vec = None
  12. VECTORS_DIM = 300
  13.  
  14. def load_word2vec(filepath):
  15. global word2vec
  16. if word2vec is not None:
  17. return word2vec
  18. #word2vec = KeyedVectors.load_word2vec_format(filepath,
  19. # binary=True, limit=20000)
  20. print("called here")
  21. word2vec = np.random.uniform(0, 1, size=300)
  22. return word2vec
  23.  
  24.  
  25. def get_word_vectors(non_stop_tokens):
  26. word2vec = load_word2vec('')
  27. vector = np.zeros((1, VECTORS_DIM))
  28. words_added = 0
  29. for tok in non_stop_tokens:
  30. try:
  31. vec = word2vec[tok]
  32. vector += vec
  33. words_added += 1
  34. except:
  35. continue
  36. return vector/words_added
  37.  
  38. word_vectors = Datasource("word_vectors",
  39. get_word_vectors,
  40. depends_on=[english.stopwords.revision.datasources.non_stopwords])
  41.  
  42. def get_feature_at(idx):
  43. return Feature(
  44. "word_vector_{}".format(idx),
  45. lambda vec: vec[idx],
  46. depends_on=[word_vectors],
  47. returns=float
  48. )
  49.  
  50. features = []
  51. for i in range(0,300):
  52. features.append(get_feature_at(i))
  53.  
  54. vectors = solve(features, cache={revision_oriented.revision.text: 'Hey ther, how are you?'})
Add Comment
Please, Sign In to add comment