Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from selenium import webdriver
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.support import expected_conditions as EC
- from selenium.webdriver.common.by import By
- from selenium.common.exceptions import TimeoutException
- from bs4 import BeautifulSoup
- import urllib.request
- import math
- # defining log in and password variables
- usernameStr = "alexb2123@gmail.com"
- passwordStr = "2123Constantine"
- # Browser
- browser = webdriver.Firefox(
- executable_path=r'C:\Users\Alex\AppData\Roaming\Microsoft\Windows\Start Menu\Programs\Python 3.7\geckodriver.exe'
- )
- browser.get("https://www.facebook.com")
- # fill in username
- username = browser.find_element_by_id("email")
- username.send_keys(usernameStr)
- # fill in password
- password = browser.find_element_by_id("pass")
- password.send_keys(passwordStr)
- # sign in to facebook
- signInButton = browser.find_element_by_id("loginbutton")
- signInButton.click()
- #defining variables within the facebook url
- class FacebookScraper(object):
- def __init__(self, location, max_price):
- self.location = location
- self.max_price = max_price
- #self.radius = math.trunc((radius/.62137119)+1)
- self.query = query
- #defining the facebook url as a whole
- self.url = f"https://wwww.facebook.com/marketplace/{location}/search?query={query}&maxPrice={max_price}"
- # https://www.facebook.com/marketplace/nyc/search?query=books&maxPrice=0&radiusKM=4
- # https://wwww.facebook.com/marketplace/{location}/search?query={query}&maxPrice={max_price}&radiusKM={radius}
- #getting the webdriver
- self.driver = webdriver.Firefox(
- executable_path=r'C:\Users\Alex\AppData\Roaming\Microsoft\Windows\Start Menu\Programs\Python 3.7\geckodriver.exe'
- )
- self.delay = 6
- #navigating to the facebook url
- def load_facebook_url(self):
- self.driver.get(self.url)
- try:
- wait = WebDriverWait(self.driver, self.delay)
- wait.until(EC.presence_of_element_located((By.ID, "searchform")))
- print("page is ready")
- except TimeoutException:
- print("Loading took too much time")
- #exctracting information from facebook
- #def extract_post_titles(self):
- # all_posts = self.driver.find_elements_by_class_name("result-row")
- # post_title_list = []
- # for post in all_posts:
- # print(post.text)
- # post_title_list.append(post.text)
- # return post_title_list
- #def extract_post_urls(self):
- # url_list = []
- # html_page = urllib.request.urlopen(self.url)
- # soup = BeautifulSoup(html_page, "html.parser")
- # for link in soup.findAll("a", {"class": "result-title hdrlnk"}):
- # print(link)
- #user input variables
- location = "nyc"
- max_price = "0"
- #radius = "2"
- query = "books"
- #craigslist scraper functions
- scraper = FacebookScraper(location, max_price, )
- scraper.load_facebook_url()
- #scraper.extract_post_titles()
- #scraper.extract_post_urls()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement