Advertisement
Guest User

Untitled

a guest
Jun 1st, 2017
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.30 KB | None | 0 0
  1. #!/usr/bin/env python3
  2.  
  3. import psycopg2
  4. import sys
  5.  
  6. if __name__ =="__main__":
  7. #conn = psycopg2.connect('dbname=grf2 user=postgres password=asdwsx host=127.0.0.1')
  8. conn = psycopg2.connect('dbname=giraff user=giraff password=g1r@ff host=cobalt.da')
  9.  
  10. cur = conn.cursor()
  11. # cur2 = conn.cursor()
  12. #
  13. # cur.execute("""SELECT id, site_id, url FROM teasers""")
  14. # #rows = cur.fetchall()
  15. # counter = 0
  16. # for row in cur:
  17. # cur2.execute('''INSERT INTO urls (site_id, url) VALUES (%s, %s) RETURNING id''', (row[1],row[2]))
  18. # url_id = cur2.fetchone()
  19. # print(url_id)
  20. # url_id = url_id[0]
  21. # cur2.execute('''UPDATE teasers SET url_id = %s WHERE id = %s''', (url_id, row[0]))
  22. # counter += 1
  23. # if (counter % 1000 == 0):
  24. # print(counter)
  25. #
  26. # conn.commit()
  27.  
  28. cur.execute("""SELECT sites.id, users.email, sites.da_idx FROM sites JOIN users ON users.id = sites.user_id""")
  29. sites_emails = {}
  30. for row in cur:
  31. idx = str(row[2]) if row[2] > 0 else ''
  32. sites_emails['@grfAE' + idx + '@' + row[1]] = row[0]
  33.  
  34.  
  35. cur.execute("""SELECT site_id, url, da_id FROM teasers WHERE da_id > 0""")
  36. grf_teasers = {}
  37. for line in cur:
  38. grf_teasers[(line[0], line[1])] = int(line[2])
  39.  
  40. f = open('/home/rmihno/Downloads/ae-teasers.txt')
  41. da_ids = {}
  42. for line in f:
  43. line = line.strip().split('|')
  44. line[2] = int(sites_emails[line[2]])
  45. line[1] = line[1].replace('http://', '').replace('https://', '')
  46. if (line[2], line[1]) in da_ids:
  47. if int(line[0]) > da_ids[(line[2], line[1])]:
  48. da_ids[(line[2], line[1])] = int(line[0])
  49. else:
  50. da_ids[(line[2], line[1])] = int(line[0])
  51.  
  52.  
  53. # all_cnt = len(da_ids)
  54. # counter = 0
  55. # for key in da_ids:
  56. # cur.execute("""SELECT id FROM teasers WHERE da_id != %s AND site_id = %s AND url = %s""", (da_ids[key], key[0], key[1]))
  57. # all_cnt -=1
  58. # print(all_cnt)
  59. # row = cur.fetchone()
  60. # if (row):
  61. # cur.execute("""UPDATE teasers SET da_id = %s WHERE id =%s""", (da_ids[key], row[0]))
  62. # print(("""UPDATE teasers SET da_id = %s WHERE id =%s""", (da_ids[key], row[0])))
  63.  
  64. #print(counter, len(da_ids), (da_ids[key], key[0], key[1]))
  65. # if (teaser_id):
  66. # counter +=1
  67. # print(counter)
  68. #print(teaser_id, (da_ids[key], key[0], key[1]))
  69.  
  70. # conn.commit()
  71. # sys.exit(0)
  72.  
  73. # if line[2] not in sites_emails:
  74. # print(line[2])
  75. #print(line)
  76. # if (line[1], line[2]) in da_ids:
  77. # da_ids[(line[1], line[2])] += 1
  78. # else:
  79. # da_ids[(line[1], line[2])] = 1
  80.  
  81. counter = 0
  82. for key in da_ids:
  83. if not key in grf_teasers:
  84. counter += 1
  85. print(da_ids[key], key)
  86.  
  87. print(counter)
  88. #
  89. #
  90. #print(da_ids)
  91. sys.exit(0)
  92. print(len(grf_teasers))
  93. counter = 0;
  94. for id in grf_teasers:
  95. if id not in da_ids:
  96. counter += 1
  97. #print(id, grf_teasers[id])
  98.  
  99. print(counter)
  100.  
  101. print(len(da_ids))
  102. counter = 0;
  103. for id in da_ids:
  104. if id not in grf_teasers:
  105. counter += 1
  106. #print(id, grf_teasers[id])
  107.  
  108. print(counter)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement