Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- #'Sultan Alzahrani'
- # This program to test similarity between two words....
- from sklearn.feature_extraction.text import TfidfVectorizer
- from Preprocessing import UnpickleIt,Reprocess_and_create_LargeDictionary,updating_final_further_cleaning
- from ReadPandaPickle import readPandas,readDictionary,getRowsDictionary
- from nltk.corpus import wordnet
- from VictorizeTextA import readLinesFormFile
- import pandas as pd
- import numpy as np
- import pandas as pd
- from nltk.corpus import wordnet
- def similarity_check():
- # lst_a = ['choose', 'copy', 'define', 'duplicate', 'find', 'how', 'identify', 'label', 'list', 'listen', 'locate',
- # 'match', 'memorise', 'name', 'observe', 'omit', 'quote', 'read', 'recall', 'recite', 'recognise', 'record',
- # 'relate', 'remember', 'repeat', 'reproduce', 'retell', 'select', 'show', 'spell', 'state', 'tell', 'trace',
- # 'write','college']
- lst_a =['College','School']
- lst_b = list(lst_a)
- lst = []
- set_keywords = set([])
- for i in range(len(lst_a)):
- for j in range(i):
- word1 = lst_a[i]
- word2 = lst_b[j]
- wordFromList1 = wordnet.synsets(word1)
- wordFromList2 = wordnet.synsets(word2)
- if wordFromList1 and wordFromList2: # Thanks to @alexis' note
- s = wordFromList1[0].wup_similarity(wordFromList2[0])
- if s is not None:
- set_keywords.add(word1)
- set_keywords.add(word2)
- lst.append((s,word1,word2))
- lst = sorted(lst, reverse=True, key=lambda tup: tup[0])
- lst_keywords = list(set_keywords)
- lst_keywords = sorted(lst_keywords)
- l = len(lst_keywords)
- k_dict = dict((v,i) for i,v in enumerate(lst_keywords))
- m = np.zeros((l,l), dtype=np.float)
- ### SOME PRINTABLE CASES
- print k_dict
- # print 'test case: ',k_dict['relate'], k_dict['remember'], k_dict['repeat']
- for i in range(len(lst)):
- t = lst[i]
- k1 = t[1]
- k2 = t[2]
- v = t[0]
- m[k_dict[k1],k_dict[k2]] = v
- m[k_dict[k2],k_dict[k1]] = v
- m[k_dict[k1], k_dict[k1]] = 1.0
- m[k_dict[k2], k_dict[k2]] = 1.0
- I = pd.Index(lst_keywords, name="rows")
- C = pd.Index(lst_keywords, name="columns")
- df = pd.DataFrame(data=m, index=I, columns=C)
- print df
- print(lst)
- print 'Pandas table...'
- print 'inserting to pandsa'
- def do_print_ex():
- for i in range(11):
- lst = []
- for j in range(i):
- lst.append(str(j))
- print ','.join(lst)
- if __name__ == '__main__':
- similarity_check()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement