Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- import psycopg2
- import sys
- if __name__ =="__main__":
- #conn = psycopg2.connect('dbname=grf2 user=postgres password=asdwsx host=127.0.0.1')
- conn = psycopg2.connect('dbname=giraff user=giraff password=g1r@ff host=cobalt.da')
- cur = conn.cursor()
- # cur2 = conn.cursor()
- #
- # cur.execute("""SELECT id, site_id, url FROM teasers""")
- # #rows = cur.fetchall()
- # counter = 0
- # for row in cur:
- # cur2.execute('''INSERT INTO urls (site_id, url) VALUES (%s, %s) RETURNING id''', (row[1],row[2]))
- # url_id = cur2.fetchone()
- # print(url_id)
- # url_id = url_id[0]
- # cur2.execute('''UPDATE teasers SET url_id = %s WHERE id = %s''', (url_id, row[0]))
- # counter += 1
- # if (counter % 1000 == 0):
- # print(counter)
- #
- # conn.commit()
- cur.execute("""SELECT sites.id, users.email, sites.da_idx FROM sites JOIN users ON users.id = sites.user_id""")
- sites_emails = {}
- for row in cur:
- idx = str(row[2]) if row[2] > 0 else ''
- sites_emails['@grfAE' + idx + '@' + row[1]] = row[0]
- cur.execute("""SELECT site_id, url, da_id FROM teasers WHERE da_id > 0""")
- grf_teasers = {}
- for line in cur:
- grf_teasers[(line[0], line[1])] = int(line[2])
- f = open('/home/rmihno/Downloads/ae-teasers.txt')
- da_ids = {}
- for line in f:
- line = line.strip().split('|')
- line[2] = int(sites_emails[line[2]])
- line[1] = line[1].replace('http://', '').replace('https://', '')
- if (line[2], line[1]) in da_ids:
- if int(line[0]) > da_ids[(line[2], line[1])]:
- da_ids[(line[2], line[1])] = int(line[0])
- else:
- da_ids[(line[2], line[1])] = int(line[0])
- # all_cnt = len(da_ids)
- # counter = 0
- # for key in da_ids:
- # cur.execute("""SELECT id FROM teasers WHERE da_id != %s AND site_id = %s AND url = %s""", (da_ids[key], key[0], key[1]))
- # all_cnt -=1
- # print(all_cnt)
- # row = cur.fetchone()
- # if (row):
- # cur.execute("""UPDATE teasers SET da_id = %s WHERE id =%s""", (da_ids[key], row[0]))
- # print(("""UPDATE teasers SET da_id = %s WHERE id =%s""", (da_ids[key], row[0])))
- #print(counter, len(da_ids), (da_ids[key], key[0], key[1]))
- # if (teaser_id):
- # counter +=1
- # print(counter)
- #print(teaser_id, (da_ids[key], key[0], key[1]))
- # conn.commit()
- # sys.exit(0)
- # if line[2] not in sites_emails:
- # print(line[2])
- #print(line)
- # if (line[1], line[2]) in da_ids:
- # da_ids[(line[1], line[2])] += 1
- # else:
- # da_ids[(line[1], line[2])] = 1
- counter = 0
- for key in da_ids:
- if not key in grf_teasers:
- counter += 1
- print(da_ids[key], key)
- print(counter)
- #
- #
- #print(da_ids)
- sys.exit(0)
- print(len(grf_teasers))
- counter = 0;
- for id in grf_teasers:
- if id not in da_ids:
- counter += 1
- #print(id, grf_teasers[id])
- print(counter)
- print(len(da_ids))
- counter = 0;
- for id in da_ids:
- if id not in grf_teasers:
- counter += 1
- #print(id, grf_teasers[id])
- print(counter)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement