Guest User

Untitled

a guest
Jun 5th, 2018
138
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.63 KB | None | 0 0
  1. # coding: utf-8
  2.  
  3. # In[60]:
  4.  
  5. import pymysql
  6. pymysql.install_as_MySQLdb()
  7. import MySQLdb
  8. import sys
  9. import math
  10. def similarityRelation(c,n):
  11. c = list(c)
  12. c = np.array(c,dtype=np.int8)+1
  13. union = sum(c)
  14. mul =1;
  15. for i in c:
  16. mul *= i
  17. result = math.log((n*union)/(mul))/math.log(n)
  18. return result
  19.  
  20.  
  21. # In[61]:
  22. host = "localhost"
  23. user = "0187cs151057"
  24. password = "noor@896247"
  25. db = "0187cs151057"
  26.  
  27. conn = MySQLdb.connect(host,user,password,db)
  28.  
  29. cursor = conn.cursor()
  30.  
  31. #searchString = "HACKVEDA ONE2ONE"
  32. searchString = " ".join(sys.argv[1:])
  33.  
  34. #sql = "select * from similarityrelation where searchString like '%s'" %(searchString)
  35. sql = "select * from similarityrelation where searchString like '%s'" %(searchString) ###For command line argument
  36.  
  37. cursor.execute(sql)
  38.  
  39. result = cursor.fetchall()
  40.  
  41. sql = "desc similarityrelation"
  42. cursor.execute(sql)
  43. name = cursor.fetchall()
  44.  
  45. head = []
  46. for i in name:
  47. head.append(i[0])
  48. import pandas as pd
  49. import numpy as np
  50.  
  51. df = pd.DataFrame(np.array(result),columns=head);
  52.  
  53.  
  54. # In[62]:
  55.  
  56. #sql = "select id,link from query where searchString like '%s'" %(searchString)
  57. sql = "select id,link from query where searchString like '%s'" %(searchString)
  58.  
  59. cursor.execute(sql)
  60.  
  61. result = cursor.fetchall()
  62. sql = "desc query"
  63. cursor.execute(sql)
  64. name = cursor.fetchall()
  65.  
  66. head = []
  67. for i in name:
  68. head.append(i[0])
  69.  
  70. df4 = pd.DataFrame(np.array(result),columns=["id","link"]);
  71.  
  72.  
  73. # In[63]:
  74.  
  75. searchEngineValue = df["searchEngineValue"].unique()
  76.  
  77. df3 = pd.DataFrame(columns=["searchEngineValue","searchId","similarity"])
  78.  
  79. for engineValue in searchEngineValue:
  80. df1 = df[:][df["searchEngineValue"]==engineValue]
  81.  
  82. searchId = df1["searchId"].unique()
  83. n = len(searchId)
  84. st = []
  85. ss = []
  86. su = []
  87. for i in searchId:
  88. c = df1["titleCount"][df1["searchId"] == i]
  89. st.append(similarityRelation(c,n))
  90. c = df1["summaryCount"][df1["searchId"] == i]
  91. ss.append(similarityRelation(c,n))
  92. c = df1["linkCount"][df1["searchId"] == i]
  93. su.append(similarityRelation(c,n))
  94. st = np.array(st)
  95. ss = np.array(ss)
  96. su = np.array(su)
  97. s = st+su+ss
  98. d = pd.Series(s)
  99. a = len(d.unique())
  100. b = len(searchId)
  101. s = pd.Series(s, dtype=np.float64)
  102. dict1 = {'searchId':searchId, "similarity":s,"searchEngineValue":engineValue}
  103. df2 =pd.DataFrame(dict1, dtype=np.float32)
  104. df3 = df3.append(df2,ignore_index=True)
  105.  
  106.  
  107. # In[ ]:
  108.  
  109. df3 = df3.join(df4)
  110.  
  111.  
  112. # In[85]:
  113.  
  114.  
  115.  
  116. # In[86]:
  117.  
  118. df5 = df3.drop_duplicates("link")
  119. df5 = df5.sort_values("similarity", ascending = False)
  120. #print(df5["link"])
  121. for link in df5["link"]:
  122. print("<a href=\"" + link + "\">" + link + "</a><br >")
Add Comment
Please, Sign In to add comment