Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from selenium import webdriver
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.support import expected_conditions as EC
- from selenium.webdriver.common.by import By
- from selenium.common.exceptions import TimeoutException
- from bs4 import BeautifulSoup
- import urllib.request
- class FacebookScraper(object):
- def __init__(self, location, search, maxPrice):
- self.location = location
- self.search = search
- self.maxPrice = maxPrice
- self.url = f"https://en-gb.facebook.com/marketplace/{location}/search?query={search}&maxPrice={maxPrice}"
- self.driver = webdriver.Firefox()
- self.delay = 3
- def load_facebook_url(self):
- self.driver.get(self.url)
- try:
- wait = WebDriverWait(self.driver, self.delay)
- wait.until(EC.presence_of_element_located((By.ID, "u_0_5")))
- print("Page is ready")
- except TimeoutException:
- print("Loading took to much time")
- def extract_post_titles(self):
- all_posts = self.driver.find_elements_by_class_name("_1oem")
- post_title_list = []
- for post in all_posts:
- print(post.text)
- post_title_list.append(post.text)
- return post_title_list
- def extract_post_urls(self):
- url_list = []
- html_page = urllib.request.urlopen(self.url)
- soup = BeautifulSoup(html_page, 'lxml')
- for link in soup.findAll('a', attrs={"data-testid":"marketplace_feed_item"}):
- print(link)
- location = "sheffield"
- search = "unicycle"
- maxPrice = "30"
- scraper = FacebookScraper(location, search, maxPrice)
- scraper.load_facebook_url()
- #scraper.extract_post_titles()
- scraper.extract_post_urls()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement