Advertisement
Guest User

Scrape

a guest
Jan 28th, 2020
211
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.35 KB | None | 0 0
  1. import requests
  2. import csv
  3. import time
  4. import backoff
  5. from wappalyzer import Wappalyzer, WebPage
  6. wappalyzer = Wappalyzer.latest()
  7.  
  8. platforms = ['WooCommerce','Shopify','Bigcommerce','Magento','Volusion','InkFrog','3DCart','OsCommerce','Yahoo! Ecommerce','Microsoft ASP.NET']
  9.  
  10. @backoff.on_exception(backoff.expo, (requests.exceptions.ReadTimeout, requests.exceptions.Timeout, requests.exceptions.ConnectionError))
  11. def fetch_result(webpage):
  12.     return wappalyzer.analyze(webpage)
  13.  
  14. with open('/Users/aabesh/Documents/Scripts/eriktest.csv') as csv_input, open('/Users/aabesh/Documents/Scripts/erikshopsoutput.csv', "w") as csv_output:
  15.     csv_reader = csv.reader(csv_input, delimiter=',')
  16.     writer = csv.writer(csv_output, lineterminator='\n')
  17.     writer.writerow(['Shop Name', 'URL', 'Reverb ID', 'Integration', 'Close Website Platform', 'Wappalyzer Website'])
  18.     next(csv_reader)
  19.     for row in csv_reader:
  20.         url = row[1]
  21.         if not url: continue
  22.         try:
  23.             webpage = WebPage.new_from_url(row[1])
  24.             result = fetch_result(webpage)
  25.             print(result)
  26.             wapwebsite = ",".join([u for u in result if u in platforms]) or ""
  27.             row.append(wapwebsite)
  28.             print(row)
  29.             print(wapwebsite)
  30.             writer.writerow(row)
  31.         except:
  32.             print(webpage," timed out.")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement