Advertisement
Guest User

Untitled

a guest
Nov 17th, 2019
94
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.76 KB | None | 0 0
  1. from selenium import webdriver
  2. from selenium.webdriver.support.ui import WebDriverWait
  3. from selenium.webdriver.support import expected_conditions as EC
  4. from selenium.webdriver.common.by import By
  5. from selenium.common.exceptions import TimeoutException
  6. from bs4 import BeautifulSoup
  7. import urllib.request
  8.  
  9. class FacebookScraper(object):
  10. def __init__(self, location, search, maxPrice):
  11. self.location = location
  12. self.search = search
  13. self.maxPrice = maxPrice
  14.  
  15. self.url = f"https://en-gb.facebook.com/marketplace/{location}/search?query={search}&maxPrice={maxPrice}"
  16.  
  17. self.driver = webdriver.Firefox()
  18. self.delay = 3
  19.  
  20. def load_facebook_url(self):
  21. self.driver.get(self.url)
  22. try:
  23. wait = WebDriverWait(self.driver, self.delay)
  24. wait.until(EC.presence_of_element_located((By.ID, "u_0_5")))
  25. print("Page is ready")
  26. except TimeoutException:
  27. print("Loading took to much time")
  28.  
  29. def extract_post_titles(self):
  30. all_posts = self.driver.find_elements_by_class_name("_1oem")
  31. post_title_list = []
  32. for post in all_posts:
  33. print(post.text)
  34. post_title_list.append(post.text)
  35. return post_title_list
  36.  
  37. def extract_post_urls(self):
  38. url_list = []
  39. html_page = urllib.request.urlopen(self.url)
  40. soup = BeautifulSoup(html_page, 'lxml')
  41. for link in soup.findAll('a', attrs={"data-testid":"marketplace_feed_item"}):
  42. print(link)
  43.  
  44. location = "sheffield"
  45. search = "unicycle"
  46. maxPrice = "30"
  47.  
  48. scraper = FacebookScraper(location, search, maxPrice)
  49. scraper.load_facebook_url()
  50. #scraper.extract_post_titles()
  51. scraper.extract_post_urls()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement