Advertisement
Guest User

Untitled

a guest
Jan 23rd, 2020
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.15 KB | None | 0 0
  1. import requests, os, bs4
  2. from selenium import webdriver
  3. from selenium.webdriver.common.keys import Keys
  4. from time import sleep
  5. from bs4 import BeautifulSoup
  6. import pandas as pd
  7.  
  8. os.makedirs('grocery', exist_ok=True) # store images in ./grocery
  9.  
  10. # starting url
  11. origurl = 'https://www.fairprice.com.sg/search?query='
  12. # search term list
  13. searchlist=["chicken", "beef", "fish", "pork"]
  14. dflist=[]
  15. for term in searchlist:
  16. url=origurl+term
  17. browser = webdriver.Firefox(executable_path = './geckodriver')
  18. newinst=browser.get(url)
  19. #get more items due to pagination
  20. for i in range(10):
  21. #wait for page to load, else won't scroll down
  22. sleep(1.5)
  23. browser.find_element_by_css_selector('body').send_keys(Keys.CONTROL+Keys.END)
  24.  
  25. soup_level1=BeautifulSoup(browser.page_source, 'lxml')
  26. #get item names and prices
  27. prices=soup_level1.find_all(class_='sc-1bsd7ul-0 dQYxgv')
  28. items=soup_level1.find_all(class_="bo8pbc-1 ewbXiW")
  29. #clean item names and prices
  30. # priceclean = [i.text for i in prices if (any(ch.isdigit() for ch in i.text))]
  31. priceclean=[]
  32. for i in prices:
  33. if (any(ch.isdigit() for ch in i.text)):
  34. priceclean.append(i.text)
  35. elif i.text.lower()=="free":
  36. priceclean.append(i.text)
  37. cleanitem=[]
  38. cleanweight=[]
  39. for i in items:
  40. x=i.find_all(class_="sc-1bsd7ul-0 dyhHCc")
  41. for j in x:
  42. # print(j.text), get rid of out of stock entries
  43. if j.text!="Out of stock":
  44. cleanitem.append(j.text)
  45. for k in i.find_all(class_="bo8pbc-14 dySmhU"):
  46. cleanweight.append(k.text)
  47.  
  48.  
  49. print("The price length of %s is %i" %(term ,len(priceclean)))
  50. print("The item length of %s is %i" %(term ,len(cleanitem)))
  51. #check clean price and item list match
  52.  
  53. # if len(priceclean)==len(itemclean):
  54. pritdict=zip(cleanitem,priceclean,cleanweight)
  55. prit_df=pd.DataFrame(pritdict)
  56. dflist.append(prit_df)
  57. # else:
  58. # print((priceclean))
  59. # print((cleanitem))
  60. # print("Item Qty and Price Qty do not match")
  61.  
  62. print('Done.')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement