Advertisement
Guest User

Untitled

a guest
Feb 20th, 2019
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.32 KB | None | 0 0
  1. import gensim
  2. from sklearn.manifold import TSNE
  3. import matplotlib.pyplot as plt
  4.  
  5. model = gensim.models.KeyedVectors.load_word2vec_format('MUSE/dumped/debug/8gywilp7r1/vectors-rw1.txt')
  6. model2 = gensim.models.KeyedVectors.load_word2vec_format('MUSE/dumped/debug/8gywilp7r1/vectors-lg1.txt')
  7.  
  8. lg_dict = {
  9. "katonda": "god",
  10. "emu": "one",
  11. "kabaka": "king",
  12. "mutabani" : "son",
  13. "mukazi":"son-in-law",
  14. "bantu":"people",
  15. "lukalu":"land",
  16. "ttaka":"land",
  17. "ettaka":"land",
  18. "nsi":"land",
  19. "olukalu":"land",
  20. "nnyumba":"house",
  21. "ennyumba":"house",
  22. "omulenzi":"son",
  23. "taata":"father",
  24. "ekintu":"thing",
  25. "engoye":"thing",
  26. "kintu":"thing",
  27. "mukono":"hand",
  28. "omukono":"hand",
  29. "kifo":"place",
  30. "ekifo":"place",
  31. "ekibuga":"city",
  32. "kibuga":"city",
  33. "ensi":"earth",
  34. "erinnya":"name",
  35. "lunaku":"day",
  36. "misana":"day",
  37. "emisana":"day",
  38. "olunaku":"day",
  39. "mutima":"heart",
  40. "ekigambo":"word",
  41. "kigambo":"word",
  42. "byombi":"two",
  43. "mannyo":"two",
  44. "ekikoola":"two",
  45. "erinnyo":"two",
  46. "liiso":"eye",
  47. "eriiso":"eye",
  48. "nnyindo":"nose",
  49. "ennyindo":"nose",
  50. "omukwano":"love",
  51. "mukwano":"love",
  52. "mwanyina":"brother",
  53. "kwagalana":"to-love-each-other",
  54. "omuntu":"man",
  55. "omusajja":"man",
  56. "musajja":"man",
  57. }
  58.  
  59. rw_dict = {
  60. "imana": "god",
  61. "rimwe": "one",
  62. "umwami": "king",
  63. "mwene" : "son",
  64. "umukwe":"son-in-law",
  65. "rubanda":"people",
  66. "isambu":"land",
  67. "igihugu":"land",
  68. "ubutaka":"land",
  69. "inzu":"house",
  70. "akazu":"house",
  71. "baba":"father",
  72. "ikintu":"thing",
  73. "akantu":"small-thing",
  74. "ikiganza":"hand",
  75. "umukono":"hand",
  76. "ahantu":"place",
  77. "igitaka":"earth",
  78. "isi":"earth",
  79. "izina":"name",
  80. "umunsi":"day",
  81. "umutima":"heart",
  82. "ijambo":"word",
  83. "ebyiri":"two",
  84. "kabiri":"two",
  85. "ijisho":"eye",
  86. "izuru":"nose",
  87. "ishyanga":"foreign-country",
  88. "urukundo":"love",
  89. "musaza":"brother-of-a-female",
  90. "umugambi":"plan",
  91. "umugambanyi":"traitor",
  92. "umusaza":"old-man",
  93. "umusore":"male-teenager",
  94. "data":"paternal-uncle",
  95. "sebukwe":"father-in-law-of-someone-else",
  96. "sobukwe":"father-in-law-of-person-being-spoken-to",
  97. "kurandata":"to-lead-by-the-hand",
  98. "gukunda":"to-like-love",
  99. "gukundana":"to-like-or-love-each-other",
  100. "gukundwa":"to-be-loved"
  101. }
  102.  
  103. def tsne_plot(model1, model2):
  104. "Creates and TSNE model and plots it"
  105. labels1 = []
  106. tokens1 = []
  107.  
  108. labels2 = []
  109. tokens2 = []
  110.  
  111. for word in model1.wv.vocab:
  112. tokens1.append(model1[word])
  113. labels1.append(word)
  114.  
  115. for word in model2.wv.vocab:
  116. tokens2.append(model2[word])
  117. labels2.append(word)
  118.  
  119. tsne_model1 = TSNE(perplexity=50, n_components=2, init='pca', n_iter=2500, random_state=23)
  120. new_values1 = tsne_model1.fit_transform(tokens1)
  121.  
  122. tsne_model2 = TSNE(perplexity=50, n_components=2, init='pca', n_iter=2500, random_state=23)
  123. new_values2 = tsne_model2.fit_transform(tokens2)
  124.  
  125. x1 = []
  126. y1 = []
  127. for value in new_values1:
  128. x1.append(value[0])
  129. y1.append(value[1])
  130.  
  131. x2 = []
  132. y2 = []
  133. for value in new_values2:
  134. x2.append(value[0])
  135. y2.append(value[1])
  136.  
  137. plt.figure(figsize=(16, 16))
  138. for i in range(len(x1)):
  139. translation = ''
  140. if (labels1[i]) in (rw_dict.keys()):
  141. translation = rw_dict[labels1[i]]
  142. else:
  143. translation = "null"
  144. plt.scatter(x1[i],y1[i],c='b')
  145. plt.annotate(labels1[i] + " - " + translation,
  146. xy=(x1[i], y1[i]),
  147. xytext=(5, 2),
  148. textcoords='offset points',
  149. ha='right',
  150. va='bottom')
  151.  
  152. for i in range(len(x2)):
  153. translation = ''
  154. if (labels2[i]) in (lg_dict.keys()):
  155. translation = lg_dict[labels2[i]]
  156. else:
  157. translation = "null"
  158. plt.scatter(x2[i],y2[i],c='g')
  159. plt.annotate(labels2[i] + " - " + translation,
  160. xy=(x2[i], y2[i]),
  161. xytext=(5, 2),
  162. textcoords='offset points',
  163. ha='right',
  164. va='bottom')
  165.  
  166. plt.savefig('shared-filtered.png')
  167. plt.show()
  168.  
  169. tsne_plot(model, model2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement