Advertisement
Guest User

Untitled

a guest
Dec 18th, 2015
250
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.38 KB | None | 0 0
  1. #!/usr/bin/env python2
  2. # -*- coding: utf-8 -*-
  3.  
  4. import unicodecsv
  5. import lxml.html
  6. import urllib
  7. import re
  8. import datetime
  9. import codecs
  10. import time
  11.  
  12. DIGITS_ONLY = re.compile(r"[^0-9]")
  13. TODAY = datetime.datetime.today().strftime("%m/%d/%Y")
  14.  
  15. from selenium import webdriver
  16. from selenium.webdriver.common.keys import Keys
  17.  
  18. QUERIES = []
  19. RESULTS = []
  20.  
  21. f = open("data.csv", "rb")
  22. reader = unicodecsv.reader(f, encoding='utf-8')
  23. for query in reader:
  24.     if query[0].strip() != "":
  25.         QUERIES.append(query[0].strip())
  26.         RESULTS.append(query)
  27.  
  28.  
  29. driver = webdriver.Firefox()
  30.  
  31. for index, query in enumerate(QUERIES):
  32.     if index == 0:
  33.         RESULTS[index].append(TODAY)
  34.         continue
  35.  
  36.     driver.get("https://www.google.com/search?q=%s" % urllib.quote(query))
  37.     source = driver.page_source
  38.     while "CaptchaRedirect" in source:
  39.         print "Enter captcha..."
  40.         source = driver.page_source
  41.         time.sleep(1)
  42.     data = lxml.html.fromstring(source)
  43.     nb_results = data.cssselect("#resultStats")[0].text
  44.     nb_results = DIGITS_ONLY.sub('', nb_results)
  45.     print "%s => %s" % (query, nb_results)
  46.     RESULTS[index].append(nb_results)
  47.  
  48. driver.quit()
  49.  
  50.  
  51. f = codecs.open("data.csv", "wb")
  52. f.write(codecs.BOM_UTF8)
  53. w = unicodecsv.writer(f, encoding='utf-8')
  54. for row in RESULTS:
  55.     w.writerow(row)
  56. f.close()
  57.  
  58. print "data.csv updated."
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement