Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python2
- # -*- coding: utf-8 -*-
- import unicodecsv
- import lxml.html
- import urllib
- import re
- import datetime
- import codecs
- import time
- DIGITS_ONLY = re.compile(r"[^0-9]")
- TODAY = datetime.datetime.today().strftime("%m/%d/%Y")
- from selenium import webdriver
- from selenium.webdriver.common.keys import Keys
- QUERIES = []
- RESULTS = []
- f = open("data.csv", "rb")
- reader = unicodecsv.reader(f, encoding='utf-8')
- for query in reader:
- if query[0].strip() != "":
- QUERIES.append(query[0].strip())
- RESULTS.append(query)
- driver = webdriver.Firefox()
- for index, query in enumerate(QUERIES):
- if index == 0:
- RESULTS[index].append(TODAY)
- continue
- driver.get("https://www.google.com/search?q=%s" % urllib.quote(query))
- source = driver.page_source
- while "CaptchaRedirect" in source:
- print "Enter captcha..."
- source = driver.page_source
- time.sleep(1)
- data = lxml.html.fromstring(source)
- nb_results = data.cssselect("#resultStats")[0].text
- nb_results = DIGITS_ONLY.sub('', nb_results)
- print "%s => %s" % (query, nb_results)
- RESULTS[index].append(nb_results)
- driver.quit()
- f = codecs.open("data.csv", "wb")
- f.write(codecs.BOM_UTF8)
- w = unicodecsv.writer(f, encoding='utf-8')
- for row in RESULTS:
- w.writerow(row)
- f.close()
- print "data.csv updated."
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement