Advertisement
mattrix

Untitled

Apr 20th, 2017
813
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.04 KB | None | 0 0
  1. import requests, json, re, time, random, string
  2.  
  3. try:
  4.     from urllib.parse import urlparse #python3
  5. except:
  6.     from urlparse import urlparse #python2
  7.  
  8. search_url = "https://www.google.co.nz/search?q=cdn.shopify:*.co.nz&start={0}&nfpr=1"
  9. #search_url = "https://www.google.co.nz/search?q=cdn.shopify:*.nz&start={0}&nfpr=1"
  10.  
  11. post_data = {
  12.     'form_type': 'contact',
  13.     'contact[name]': "Mr Smith",
  14.     'contact[email]': "my_email@example.com",
  15.     'contact[comments]': "Hello Spotify customer....",
  16. }
  17.  
  18. user_agent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36'
  19. pattern = re.compile('<h3 .*?><a href="(.*?)".*?</h3>')
  20.  
  21. try:
  22.     with open("domains.txt", 'r') as file:
  23.         persist = json.loads(file.read())
  24. except:
  25.     persist = {'start':0, 'domains':[]}
  26.  
  27. while True:
  28.     page = requests.get(search_url.format(persist['start']), headers={'User-Agent':user_agent}).text
  29.  
  30.     results = re.findall(pattern, page)
  31.     if not results:
  32.         if 'unusual traffic' in page:
  33.             print("Blocked by google")
  34.         else:
  35.             print("No more results")
  36.         break
  37.  
  38.     for result in results:
  39.         url = urlparse(result)
  40.         if url.netloc in persist['domains'] or 'shopify' in url.netloc:
  41.             continue
  42.  
  43.         contact_url = "{0}://{1}/contact#contact_form".format(url.scheme, url.netloc)
  44.         print(contact_url)
  45.  
  46.         ## Un-comment below line to actually send the contact request
  47.         # requests.post(contact_url, data=post_data, headers={'User-Agent':user_agent})
  48.  
  49.         persist['domains'].append(url.netloc)
  50.  
  51.     persist['start'] += 10
  52.     with open("domains.txt", 'w') as file:
  53.         file.write(json.dumps(persist))
  54.  
  55.     # help reduce google blocking
  56.     time.sleep(2)
  57.     random_search = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(8))
  58.     page = requests.get('https://www.google.co.nz/search?q={0}'.format(random_search), headers={'User-Agent':user_agent})
  59.     time.sleep(2)
  60.  
  61. input("Done")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement