Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import gensim
- from sklearn.manifold import TSNE
- import matplotlib.pyplot as plt
- model = gensim.models.KeyedVectors.load_word2vec_format('MUSE/dumped/debug/8gywilp7r1/vectors-rw1.txt')
- model2 = gensim.models.KeyedVectors.load_word2vec_format('MUSE/dumped/debug/8gywilp7r1/vectors-lg1.txt')
- lg_dict = {
- "katonda": "god",
- "emu": "one",
- "kabaka": "king",
- "mutabani" : "son",
- "mukazi":"son-in-law",
- "bantu":"people",
- "lukalu":"land",
- "ttaka":"land",
- "ettaka":"land",
- "nsi":"land",
- "olukalu":"land",
- "nnyumba":"house",
- "ennyumba":"house",
- "omulenzi":"son",
- "taata":"father",
- "ekintu":"thing",
- "engoye":"thing",
- "kintu":"thing",
- "mukono":"hand",
- "omukono":"hand",
- "kifo":"place",
- "ekifo":"place",
- "ekibuga":"city",
- "kibuga":"city",
- "ensi":"earth",
- "erinnya":"name",
- "lunaku":"day",
- "misana":"day",
- "emisana":"day",
- "olunaku":"day",
- "mutima":"heart",
- "ekigambo":"word",
- "kigambo":"word",
- "byombi":"two",
- "mannyo":"two",
- "ekikoola":"two",
- "erinnyo":"two",
- "liiso":"eye",
- "eriiso":"eye",
- "nnyindo":"nose",
- "ennyindo":"nose",
- "omukwano":"love",
- "mukwano":"love",
- "mwanyina":"brother",
- "kwagalana":"to-love-each-other",
- "omuntu":"man",
- "omusajja":"man",
- "musajja":"man",
- }
- rw_dict = {
- "imana": "god",
- "rimwe": "one",
- "umwami": "king",
- "mwene" : "son",
- "umukwe":"son-in-law",
- "rubanda":"people",
- "isambu":"land",
- "igihugu":"land",
- "ubutaka":"land",
- "inzu":"house",
- "akazu":"house",
- "baba":"father",
- "ikintu":"thing",
- "akantu":"small-thing",
- "ikiganza":"hand",
- "umukono":"hand",
- "ahantu":"place",
- "igitaka":"earth",
- "isi":"earth",
- "izina":"name",
- "umunsi":"day",
- "umutima":"heart",
- "ijambo":"word",
- "ebyiri":"two",
- "kabiri":"two",
- "ijisho":"eye",
- "izuru":"nose",
- "ishyanga":"foreign-country",
- "urukundo":"love",
- "musaza":"brother-of-a-female",
- "umugambi":"plan",
- "umugambanyi":"traitor",
- "umusaza":"old-man",
- "umusore":"male-teenager",
- "data":"paternal-uncle",
- "sebukwe":"father-in-law-of-someone-else",
- "sobukwe":"father-in-law-of-person-being-spoken-to",
- "kurandata":"to-lead-by-the-hand",
- "gukunda":"to-like-love",
- "gukundana":"to-like-or-love-each-other",
- "gukundwa":"to-be-loved"
- }
- def tsne_plot(model1, model2):
- "Creates and TSNE model and plots it"
- labels1 = []
- tokens1 = []
- labels2 = []
- tokens2 = []
- for word in model1.wv.vocab:
- tokens1.append(model1[word])
- labels1.append(word)
- for word in model2.wv.vocab:
- tokens2.append(model2[word])
- labels2.append(word)
- tsne_model1 = TSNE(perplexity=50, n_components=2, init='pca', n_iter=2500, random_state=23)
- new_values1 = tsne_model1.fit_transform(tokens1)
- tsne_model2 = TSNE(perplexity=50, n_components=2, init='pca', n_iter=2500, random_state=23)
- new_values2 = tsne_model2.fit_transform(tokens2)
- x1 = []
- y1 = []
- for value in new_values1:
- x1.append(value[0])
- y1.append(value[1])
- x2 = []
- y2 = []
- for value in new_values2:
- x2.append(value[0])
- y2.append(value[1])
- plt.figure(figsize=(16, 16))
- for i in range(len(x1)):
- translation = ''
- if (labels1[i]) in (rw_dict.keys()):
- translation = rw_dict[labels1[i]]
- else:
- translation = "null"
- plt.scatter(x1[i],y1[i],c='b')
- plt.annotate(labels1[i] + " - " + translation,
- xy=(x1[i], y1[i]),
- xytext=(5, 2),
- textcoords='offset points',
- ha='right',
- va='bottom')
- for i in range(len(x2)):
- translation = ''
- if (labels2[i]) in (lg_dict.keys()):
- translation = lg_dict[labels2[i]]
- else:
- translation = "null"
- plt.scatter(x2[i],y2[i],c='g')
- plt.annotate(labels2[i] + " - " + translation,
- xy=(x2[i], y2[i]),
- xytext=(5, 2),
- textcoords='offset points',
- ha='right',
- va='bottom')
- plt.savefig('shared-filtered.png')
- plt.show()
- tsne_plot(model, model2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement