Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # coding: utf-8
- # In[60]:
- import pymysql
- pymysql.install_as_MySQLdb()
- import MySQLdb
- import sys
- import math
- def similarityRelation(c,n):
- c = list(c)
- c = np.array(c,dtype=np.int8)+1
- union = sum(c)
- mul =1;
- for i in c:
- mul *= i
- result = math.log((n*union)/(mul))/math.log(n)
- return result
- # In[61]:
- host = "localhost"
- user = "0187cs151057"
- password = "noor@896247"
- db = "0187cs151057"
- conn = MySQLdb.connect(host,user,password,db)
- cursor = conn.cursor()
- #searchString = "HACKVEDA ONE2ONE"
- searchString = " ".join(sys.argv[1:])
- #sql = "select * from similarityrelation where searchString like '%s'" %(searchString)
- sql = "select * from similarityrelation where searchString like '%s'" %(searchString) ###For command line argument
- cursor.execute(sql)
- result = cursor.fetchall()
- sql = "desc similarityrelation"
- cursor.execute(sql)
- name = cursor.fetchall()
- head = []
- for i in name:
- head.append(i[0])
- import pandas as pd
- import numpy as np
- df = pd.DataFrame(np.array(result),columns=head);
- # In[62]:
- #sql = "select id,link from query where searchString like '%s'" %(searchString)
- sql = "select id,link from query where searchString like '%s'" %(searchString)
- cursor.execute(sql)
- result = cursor.fetchall()
- sql = "desc query"
- cursor.execute(sql)
- name = cursor.fetchall()
- head = []
- for i in name:
- head.append(i[0])
- df4 = pd.DataFrame(np.array(result),columns=["id","link"]);
- # In[63]:
- searchEngineValue = df["searchEngineValue"].unique()
- df3 = pd.DataFrame(columns=["searchEngineValue","searchId","similarity"])
- for engineValue in searchEngineValue:
- df1 = df[:][df["searchEngineValue"]==engineValue]
- searchId = df1["searchId"].unique()
- n = len(searchId)
- st = []
- ss = []
- su = []
- for i in searchId:
- c = df1["titleCount"][df1["searchId"] == i]
- st.append(similarityRelation(c,n))
- c = df1["summaryCount"][df1["searchId"] == i]
- ss.append(similarityRelation(c,n))
- c = df1["linkCount"][df1["searchId"] == i]
- su.append(similarityRelation(c,n))
- st = np.array(st)
- ss = np.array(ss)
- su = np.array(su)
- s = st+su+ss
- d = pd.Series(s)
- a = len(d.unique())
- b = len(searchId)
- s = pd.Series(s, dtype=np.float64)
- dict1 = {'searchId':searchId, "similarity":s,"searchEngineValue":engineValue}
- df2 =pd.DataFrame(dict1, dtype=np.float32)
- df3 = df3.append(df2,ignore_index=True)
- # In[ ]:
- df3 = df3.join(df4)
- # In[85]:
- # In[86]:
- df5 = df3.drop_duplicates("link")
- df5 = df5.sort_values("similarity", ascending = False)
- #print(df5["link"])
- for link in df5["link"]:
- print("<a href=\"" + link + "\">" + link + "</a><br >")
Add Comment
Please, Sign In to add comment