Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import csv
- import time
- import backoff
- from wappalyzer import Wappalyzer, WebPage
- wappalyzer = Wappalyzer.latest()
- platforms = ['WooCommerce','Shopify','Bigcommerce','Magento','Volusion','InkFrog','3DCart','OsCommerce','Yahoo! Ecommerce','Microsoft ASP.NET']
- @backoff.on_exception(backoff.expo, (requests.exceptions.ReadTimeout, requests.exceptions.Timeout, requests.exceptions.ConnectionError))
- def fetch_result(webpage):
- return wappalyzer.analyze(webpage)
- with open('/Users/aabesh/Documents/Scripts/eriktest.csv') as csv_input, open('/Users/aabesh/Documents/Scripts/erikshopsoutput.csv', "w") as csv_output:
- csv_reader = csv.reader(csv_input, delimiter=',')
- writer = csv.writer(csv_output, lineterminator='\n')
- writer.writerow(['Shop Name', 'URL', 'Reverb ID', 'Integration', 'Close Website Platform', 'Wappalyzer Website'])
- next(csv_reader)
- for row in csv_reader:
- url = row[1]
- if not url: continue
- try:
- webpage = WebPage.new_from_url(row[1])
- result = fetch_result(webpage)
- print(result)
- wapwebsite = ",".join([u for u in result if u in platforms]) or ""
- row.append(wapwebsite)
- print(row)
- print(wapwebsite)
- writer.writerow(row)
- except:
- print(webpage," timed out.")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement