Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests, os, bs4
- from selenium import webdriver
- from selenium.webdriver.common.keys import Keys
- from time import sleep
- from bs4 import BeautifulSoup
- import pandas as pd
- os.makedirs('grocery', exist_ok=True) # store images in ./grocery
- # starting url
- origurl = 'https://www.fairprice.com.sg/search?query='
- # search term list
- searchlist=["chicken", "beef", "fish", "pork"]
- dflist=[]
- for term in searchlist:
- url=origurl+term
- browser = webdriver.Firefox(executable_path = './geckodriver')
- newinst=browser.get(url)
- #get more items due to pagination
- for i in range(10):
- #wait for page to load, else won't scroll down
- sleep(1.5)
- browser.find_element_by_css_selector('body').send_keys(Keys.CONTROL+Keys.END)
- soup_level1=BeautifulSoup(browser.page_source, 'lxml')
- #get item names and prices
- prices=soup_level1.find_all(class_='sc-1bsd7ul-0 dQYxgv')
- items=soup_level1.find_all(class_="bo8pbc-1 ewbXiW")
- #clean item names and prices
- # priceclean = [i.text for i in prices if (any(ch.isdigit() for ch in i.text))]
- priceclean=[]
- for i in prices:
- if (any(ch.isdigit() for ch in i.text)):
- priceclean.append(i.text)
- elif i.text.lower()=="free":
- priceclean.append(i.text)
- cleanitem=[]
- cleanweight=[]
- for i in items:
- x=i.find_all(class_="sc-1bsd7ul-0 dyhHCc")
- for j in x:
- # print(j.text), get rid of out of stock entries
- if j.text!="Out of stock":
- cleanitem.append(j.text)
- for k in i.find_all(class_="bo8pbc-14 dySmhU"):
- cleanweight.append(k.text)
- print("The price length of %s is %i" %(term ,len(priceclean)))
- print("The item length of %s is %i" %(term ,len(cleanitem)))
- #check clean price and item list match
- # if len(priceclean)==len(itemclean):
- pritdict=zip(cleanitem,priceclean,cleanweight)
- prit_df=pd.DataFrame(pritdict)
- dflist.append(prit_df)
- # else:
- # print((priceclean))
- # print((cleanitem))
- # print("Item Qty and Price Qty do not match")
- print('Done.')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement