Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # We need a scraper that saves and parses the information on a webpage.
- # Once the information is processed and stored, we need sent it to Firebase
- # We need to save: Prd. Title, Prd. Desc, Prd. Image, Prd. Link.
- # Ownership Bogdan
- import time
- from lxml import html
- from urllib.parse import urlsplit
- import requests
- from core.notifycore import notification_display
- def extract_domain(link):
- return "{0.scheme}://{0.netloc}/".format(urlsplit(link))
- global product_dictionary
- def scraper():
- """
- This function takes a link to a website and scrapes it.
- The data fond on the website is stored in lists and a dictionary is
- returned which contains the lists.
- :param link:
- :return: dictionary that holds 4 lists
- """
- link = "http://192.168.119.171/products.html"
- page = requests.get(link)
- tree = html.fromstring(page.content)
- prd_names = tree.xpath('//div[@class="prdName"]/text()')
- prd_descriptions = tree.xpath('//div[@class="prdDescription"]/text()')
- prd_images = tree.xpath('//img[@name="prdImage"]/@src')
- base_domain = extract_domain(link)
- for i in range(len(prd_images)):
- prd_images[i] = prd_images[i][2:]
- prd_images[i] = base_domain + prd_images[i]
- product_dictionary = {0: prd_names,
- 1: prd_descriptions,
- 2: prd_images
- }
- for i in range(len(product_dictionary) +1 ):
- notification_display("AkhNotify", product_dictionary[0][i], product_dictionary[1][i], "./img/default.png")
- time.sleep(5)
- # Use the code on the next line to test the scraper
- def activate_scraper():
- while True:
- scraper()
- time.sleep(10)
- print(scraper())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement