• API
• FAQ
• Tools
• Archive
SHARE
TWEET

# Untitled

a guest Dec 10th, 2019 70 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. import gensim
2.
3. from gensim.models import KeyedVectors
7. # calculate: (king - man) + woman = ?
8. result = model.most_similar(positive=['woman', 'king'], negative=['man'], topn=1)
9. print(result)
10.
11. import numpy as np
12.
13. word2vecdict = {}
14. for word in model.vocab:
15.     word2vecdict[word] = np.array(model[word])
16. print(word2vecdict["king"])
17.
18. with open("word2vecdict.pkl", "wb") as out_file:
19.     pickle.dump(word2vecdict, out_file, protocol=pickle.HIGHEST_PROTOCOL)
20.
21. import pickle
22.
23. word2phonesvec = {}
24. with open("C:\\Users\\Миша\\Desktop\\word2phones.pkl" , "rb") as file:
26. for word in data.keys():
27.     if word in word2vecdict.keys():
28.         word2phonesvec[word] = (data[word], word2vecdict[word])
29. print(word2phonesvec["is"])
30.
32. temp_word = []
33. temp_vec = []
36.     for word in temp_word:
37.         try:
38.             temp_vec.append(word2vecdict[word])
39.         except Exception:
40.             pass
43.
44. opposite_dict = {}
66. opposite_dict
67.
68. with open("sounds_in_words_dict.pkl" , "rb") as file:
70. data
71.
72. def GetCos(v1 , v2):
73.     m = np.dot(v1, v2)
74.     return m
75.
76. projection_dict = {}
77. for ph in data.keys():
78.     projection_dict[ph] = {}
79.     for opp in opposite_dict.keys():
80.         axes = opposite_dict[opp]
81.         axes = axes / np.linalg.norm(axes)
82.         axes_distributions = []
83.         for arr in data[ph]:
84.             distribution_list = []
85.             for word in arr:
86.                 try:
87.                     word_vector = np.array(list(word2vecdict[word]))
88.                     word_vector = word_vector / np.linalg.norm(word_vector)
89.                     distribution_list.append(GetCos(word_vector, axes))
90.                 except:
91.                     pass
92.             axes_distributions.append(distribution_list)
93.            # print('{} - {} Arrays len: {}, {}, {}'.format(ph, opp, len(axes_distributions[0]), len(axes_distributions[1]), len(axes_distributions[2])))
94.         projection_dict[ph][opp] = axes_distributions
95. projection_dict
96.
97. import scipy.stats as stats
98. with open("projection_dict.pkl" , "rb") as file:
100.
101.
102. dist_dict ={}
103.
104. def GetDist(elem):
105.     if len(elem[0]) > 20 and len(elem[1]) > 20:
106.         f_w = stats.mannwhitneyu(elem[0], elem[1])[1]
107.     else :
108.         f_w = None
109.     if len(elem[1]) > 20 and len(elem[2]) > 20:
110.         f_wo = stats.mannwhitneyu(elem[1], elem[2])[1]
111.     else :
112.         f_wo = None
113.     if len(elem[0]) > 20 and len(elem[2]) > 20:
114.         w_wo = stats.mannwhitneyu(elem[0], elem[2])[1]
115.     else :
116.         w_wo = None
117.     return (("f_w" , f_w),("w_wo" , w_wo),("f_wo" , f_wo))
118. for ph in projection_dict.keys():
119.     dist_dict[ph] = {}
120.     for opp in projection_dict[ph]:
121.         dist_dict[ph][opp] = GetDist(projection_dict[ph][opp])
122. dist_dict
123.
124.
125. def GetNewTuple(elem):
126.     res = []
127.     if elem[0] != None and elem[0] < 0.001:
128.         res.append(["first_with", elem[0]])
129.     if elem[1] != None and elem[1] < 0.001:
130.         res.append(["with_without",elem[1]])
131.     if elem[2] != None and elem[2] < 0.001:
132.         res.append(["first_without",elem[2]])
133.     return res
134.
135. with open("dist_dict.pkl", "rb") as file:
137. result_thousand = {}
138. for ph in data.keys():
139.     result_thousand[ph] = {}
140.     for opp in data[ph]:
141.         if len(GetNewTuple(data[ph][opp]))!= 0 :
142.             result_thousand[ph][opp] = GetNewTuple(data[ph][opp])
143. result_thousand
144. def GetString(elem):
145.     elemstr = []
146.     for e in elem:
147.         elemstr.append(str(e))
148.     res = " ".join(elemstr)
149.     return res
150.
151. for ph in result_thousand.keys():
152.     for opp in result_thousand[ph].keys():
153.         temp = GetString(result_thousand[ph][opp])
154.         with open("thousands.txt" , "w") as file:
155.             try:
156.                 file.write(ph + " " + opp + " "+ temp)
157.             except:
158.                 pass
159.             file.write("/n")
160. file.close()
161.
162. import shlex
163. with open('thousands.txt', 'r') as f:
165.     f_str = f.read().replace("," , " ")
166.     f_str = "".join(c for c in f_str if c not in "\'")
167.     f_str = "".join(c for c in f_str if c not in "\(")
168.     f_str = "".join(c for c in f_str if c not in "\)")
169.     f_str = "".join(c for c in f_str if c not in "\[")
170.     f_str = "".join(c for c in f_str if c not in "\]")
171.     f_str = "".join(c for c in f_str if c not in "\{")
172.     f_str = "".join(c for c in f_str if c not in "\}")
173.     file = open("thousands1.txt" , "w")
174.     file.write(f_str)
175.     file.close()
176. f_str
177.
178. def GetNewTuple(elem):
179.     res = []
180.     if elem[0] != None and elem[0] < 0.01:
181.         res.append(["first_with", elem[0]])
182.     if elem[1] != None and elem[1] < 0.01:
183.         res.append(["with_without",elem[1]])
184.     if elem[2] != None and elem[2] < 0.01:
185.         res.append(["first_without",elem[2]])
186.     return res
187.
188. with open("dist_dict.pkl", "rb") as file: