Advertisement
Guest User

Untitled

a guest
Jul 21st, 2017
58
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.62 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. #'Sultan Alzahrani'
  3.  
  4. # This program to test similarity between two words....
  5. from sklearn.feature_extraction.text import TfidfVectorizer
  6.  
  7. from Preprocessing import UnpickleIt,Reprocess_and_create_LargeDictionary,updating_final_further_cleaning
  8. from ReadPandaPickle import readPandas,readDictionary,getRowsDictionary
  9. from nltk.corpus import wordnet
  10. from VictorizeTextA import readLinesFormFile
  11. import pandas as pd
  12. import numpy as np
  13. import pandas as pd
  14.  
  15. from nltk.corpus import wordnet
  16.  
  17. def similarity_check():
  18.  
  19.  
  20.  
  21. # lst_a = ['choose', 'copy', 'define', 'duplicate', 'find', 'how', 'identify', 'label', 'list', 'listen', 'locate',
  22. # 'match', 'memorise', 'name', 'observe', 'omit', 'quote', 'read', 'recall', 'recite', 'recognise', 'record',
  23. # 'relate', 'remember', 'repeat', 'reproduce', 'retell', 'select', 'show', 'spell', 'state', 'tell', 'trace',
  24. # 'write','college']
  25. lst_a =['College','School']
  26.  
  27. lst_b = list(lst_a)
  28.  
  29. lst = []
  30. set_keywords = set([])
  31.  
  32. for i in range(len(lst_a)):
  33. for j in range(i):
  34. word1 = lst_a[i]
  35. word2 = lst_b[j]
  36. wordFromList1 = wordnet.synsets(word1)
  37. wordFromList2 = wordnet.synsets(word2)
  38. if wordFromList1 and wordFromList2: # Thanks to @alexis' note
  39. s = wordFromList1[0].wup_similarity(wordFromList2[0])
  40. if s is not None:
  41. set_keywords.add(word1)
  42. set_keywords.add(word2)
  43. lst.append((s,word1,word2))
  44. lst = sorted(lst, reverse=True, key=lambda tup: tup[0])
  45.  
  46. lst_keywords = list(set_keywords)
  47. lst_keywords = sorted(lst_keywords)
  48. l = len(lst_keywords)
  49. k_dict = dict((v,i) for i,v in enumerate(lst_keywords))
  50. m = np.zeros((l,l), dtype=np.float)
  51. ### SOME PRINTABLE CASES
  52. print k_dict
  53. # print 'test case: ',k_dict['relate'], k_dict['remember'], k_dict['repeat']
  54. for i in range(len(lst)):
  55. t = lst[i]
  56. k1 = t[1]
  57. k2 = t[2]
  58. v = t[0]
  59. m[k_dict[k1],k_dict[k2]] = v
  60. m[k_dict[k2],k_dict[k1]] = v
  61. m[k_dict[k1], k_dict[k1]] = 1.0
  62. m[k_dict[k2], k_dict[k2]] = 1.0
  63.  
  64. I = pd.Index(lst_keywords, name="rows")
  65. C = pd.Index(lst_keywords, name="columns")
  66. df = pd.DataFrame(data=m, index=I, columns=C)
  67.  
  68. print df
  69.  
  70.  
  71.  
  72.  
  73.  
  74.  
  75.  
  76. print(lst)
  77.  
  78. print 'Pandas table...'
  79.  
  80. print 'inserting to pandsa'
  81.  
  82.  
  83.  
  84. def do_print_ex():
  85. for i in range(11):
  86. lst = []
  87. for j in range(i):
  88. lst.append(str(j))
  89. print ','.join(lst)
  90.  
  91.  
  92.  
  93.  
  94.  
  95. if __name__ == '__main__':
  96. similarity_check()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement