Untitled

# -*- coding: utf-8 -*-
#'Sultan Alzahrani'

# This program to test similarity between two words....
from sklearn.feature_extraction.text import TfidfVectorizer

from Preprocessing import UnpickleIt,Reprocess_and_create_LargeDictionary,updating_final_further_cleaning
from ReadPandaPickle import readPandas,readDictionary,getRowsDictionary
from nltk.corpus import wordnet
from VictorizeTextA import readLinesFormFile
import pandas as pd
import numpy as np
import pandas as pd

from nltk.corpus import wordnet

def similarity_check():


    # lst_a = ['choose', 'copy', 'define', 'duplicate', 'find', 'how', 'identify', 'label', 'list', 'listen', 'locate',
    #          'match', 'memorise', 'name', 'observe', 'omit', 'quote', 'read', 'recall', 'recite', 'recognise', 'record',
    #          'relate', 'remember', 'repeat', 'reproduce', 'retell', 'select', 'show', 'spell', 'state', 'tell', 'trace',
    #          'write','college']
    lst_a =['College','School']

    lst_b = list(lst_a)

    lst = []
    set_keywords = set([])

    for i in range(len(lst_a)):
        for j in range(i):
            word1 = lst_a[i]
            word2 = lst_b[j]
            wordFromList1 = wordnet.synsets(word1)
            wordFromList2 = wordnet.synsets(word2)
            if wordFromList1 and wordFromList2:  # Thanks to @alexis' note
                s = wordFromList1[0].wup_similarity(wordFromList2[0])
                if s is not None:
                    set_keywords.add(word1)
                    set_keywords.add(word2)
                    lst.append((s,word1,word2))
                    lst = sorted(lst, reverse=True, key=lambda tup: tup[0])

        lst_keywords = list(set_keywords)
        lst_keywords = sorted(lst_keywords)
    l = len(lst_keywords)
    k_dict = dict((v,i) for i,v in enumerate(lst_keywords))
    m = np.zeros((l,l), dtype=np.float)
    ### SOME PRINTABLE CASES
    print k_dict
    # print 'test case: ',k_dict['relate'], k_dict['remember'], k_dict['repeat']
    for i in range(len(lst)):
        t = lst[i]
        k1 = t[1]
        k2 = t[2]
        v = t[0]
        m[k_dict[k1],k_dict[k2]] = v
        m[k_dict[k2],k_dict[k1]] = v
        m[k_dict[k1], k_dict[k1]] = 1.0
        m[k_dict[k2], k_dict[k2]] = 1.0

    I = pd.Index(lst_keywords, name="rows")
    C = pd.Index(lst_keywords, name="columns")
    df = pd.DataFrame(data=m, index=I, columns=C)

    print df


    print(lst)

    print 'Pandas table...'

    print 'inserting to pandsa'


def do_print_ex():
    for i in range(11):
        lst = []
        for j in range(i):
            lst.append(str(j))
        print ','.join(lst)


if __name__ == '__main__':
    similarity_check()