Advertisement
Guest User

Untitled

a guest
Jan 23rd, 2018
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.74 KB | None | 0 0
  1. # We need a scraper that saves and parses the information on a webpage.
  2. # Once the information is processed and stored, we need sent it to Firebase
  3. # We need to save: Prd. Title, Prd. Desc, Prd. Image, Prd. Link.
  4. # Ownership Bogdan
  5. import time
  6. from lxml import html
  7. from urllib.parse import urlsplit
  8. import requests
  9. from core.notifycore import notification_display
  10.  
  11.  
  12. def extract_domain(link):
  13.     return "{0.scheme}://{0.netloc}/".format(urlsplit(link))
  14.  
  15. global product_dictionary
  16.  
  17. def scraper():
  18.     """
  19.    This function takes a link to a website and scrapes it.
  20.    The data fond on the website is stored in lists and a dictionary is
  21.    returned which contains the lists.
  22.    :param link:
  23.    :return: dictionary that holds 4 lists
  24.    """
  25.     link = "http://192.168.119.171/products.html"
  26.     page = requests.get(link)
  27.     tree = html.fromstring(page.content)
  28.  
  29.     prd_names = tree.xpath('//div[@class="prdName"]/text()')
  30.     prd_descriptions = tree.xpath('//div[@class="prdDescription"]/text()')
  31.     prd_images = tree.xpath('//img[@name="prdImage"]/@src')
  32.  
  33.     base_domain = extract_domain(link)
  34.  
  35.     for i in range(len(prd_images)):
  36.         prd_images[i] = prd_images[i][2:]
  37.         prd_images[i] = base_domain + prd_images[i]
  38.  
  39.     product_dictionary = {0: prd_names,
  40.                           1: prd_descriptions,
  41.                           2: prd_images
  42.                           }
  43.  
  44.     for i in range(len(product_dictionary) +1 ):
  45.         notification_display("AkhNotify", product_dictionary[0][i], product_dictionary[1][i], "./img/default.png")
  46.         time.sleep(5)
  47.  
  48. # Use the code on the next line to test the scraper
  49.  
  50. def activate_scraper():
  51.     while True:
  52.         scraper()
  53.         time.sleep(10)
  54.  
  55. print(scraper())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement